def train(n_epochs, epsilon, gamma, load_model, filename, random_opponent, n_games_test, freq_test, n_skip_games=int(0), verbose=False): """ Train 2 agents by making them play and learn together. Save the learned Q-function into CSV file. It is possible to confront 1 of the agents (against either the user or a Random Agent) during training, as often as one wants. It is also possible to train an already trained model. Parameters ---------- n_epochs: int Number of games used for training. epsilon: float (in [0,1]) Fraction of greedy decisions during training of the 2 RL Agents. gamma: float (in [0,1]) Factor of significance of first actions over last ones for the 2 RL Agents. load_model: string CSV filename in which is stored the learned Q-function of an agent. If load_model = 'model', the function loads the model './Models/model.csv'. If load_model is not None, the previous parameters epsilon and gamma are used for a second training. filename: string Name of the CSV file that will store the learned Q-function of one of the agents. The path to CSV file is then ./Models/filename.csv. The counter of state-action pairs is also stored at ./Models/data/count_filename.csv for future training. random_opponent: boolean If set to true, the function trains 1 RL Agent by making it play against a Random Agent. Otherwise, the RL agent is trained by playing against another version of itself. n_games_test: int Number of games one of the RL Agent plays against a Random Agent for testing. If set to 0, the RL Agents will not be tested by a Random Agent. freq_test: int Number of epochs after which one of the RL Agents plays n_games_test games against a Random Agent. If set to 1000, each 1000 epochs of training, one of the RL Agents is tested against a Random Agent. If set to 0, test occurs at the last epoch of training only. If set to -1, none of the agents is tested during training. n_skip_games: int Number of epochs after which the user can choose to play against one of the learning agents. If set to 1000, each 1000 games, the user can choose to play against one agent. If set to 0, the user can choose to play against one agent at the last epoch only. If set to -1, no choice is offered and the user cannot test any agent. verbose: boolean If set to True, each game action during training has a written explanation. Return ------ learning_results: list Only significant with n_games_test > 0 (otherwise, empty list by default). List of each n_epochs // freq_test epoch test results against a Random Agent. Each test result is a list: [current epoch, score of RL Agent, number of finished games, n_games test]. """ # Learning agent agent1 = RLAgent(epsilon, gamma) if load_model is not None: agent1.load_model(load_model) # Choose opponent if random_opponent: agent2 = RandomAgent() time_limit = None print('Training vs Random') else: agent2 = RLAgent(epsilon, gamma) if load_model is not None: agent2.load_model(load_model) time_limit = None print('Training vs Self') start_idx = 0 scores = [0, 0] # If the user only confronts the agent at the last epoch # or if no confrontation if n_skip_games in [-1, 0]: n_skip_games = n_epochs - n_skip_games # Boolean for game between the user and agent1 preceding a game # between agent1 and agent2 play_checkpoint_usr = False # If there is a test of agent1 at the last epoch only or no test if freq_test in [-1, 0]: freq_test = n_epochs - freq_test # Number of games between agent1 and a Random Agent for testing n_games_test_mem = n_games_test learning_results = [] # Start training print('Training epoch:') for epoch in range(1, n_epochs + 1): if epoch % (n_epochs // 10) == 0: print(epoch, '/', n_epochs) #Update boolean for playing with user play_checkpoint_usr = bool(epoch % n_skip_games == 0) if play_checkpoint_usr: # Print training status print('Number of games: ', epoch) print('Scores: ', scores) # Ask user to play play = int(input('Play ? (1 Yes | 0 No)\n')) play_checkpoint_usr = bool(play) # Update boolean for test n_games_test = int(epoch % freq_test == 0) * n_games_test_mem # Start game game_over, winner, test_results = game_2Agents( agent1, agent2, start_idx=start_idx, train=True, time_limit=time_limit, n_games_test=n_games_test, play_checkpoint_usr=play_checkpoint_usr, verbose=verbose) assert game_over, str('Game not over but new game' + ' beginning during training') if winner in [0, 1]: scores[winner] += 1 # Save test games of agent1 against a Random Agent if bool(n_games_test): assert len(test_results) != 0, \ 'Agent1 has been tested but there is no result of that.' learning_results.append( [epoch, test_results[2], test_results[0], test_results[1]]) # Next round start_idx = 1 - start_idx # Save Q-function of agent1 np.savetxt(str('Models/' + filename + '.csv'), agent1.Q, delimiter=',') # Save stats for learning rate of agent1 np.savetxt(str('Models/data/count_' + filename + '.csv'), agent1.count_state_action, delimiter=',') return learning_results
def game_mngr(): """ Game manager, used for navigation among different choices offered to user. """ # Options command = options('PLAY', 'RULES', 'Tap 1 to play or 2 to read the rules') # Rules page if int(command) == 2: print_rules() # Go back print('Tap 1 to come back to the main menu\n') comeback = tap_valid_digits([1]) if int(comeback): game_mngr() # Game page if int(command) == 1: # Options players = options('PLAYER', 'PLAYERS', 'How many players ?', comeback=True) # Go back if int(players) == 0: game_mngr() # 2 players if int(players) == 2: # Ask players' name player1, player2 = input_names(n_players=2) # Init scores scores = [0, 0] # Games tapnswap = TapnSwap() over = False while not over: game_over, winner = game_1vs1(tapnswap, player1, player2) scores[winner] += 1 if game_over: # Display scores restart = display_endgame(scores, player1, player2) # Go back if not restart: over = True game_mngr() # 1 player if int(players) == 1: # Options level = options('EASY', 'DIFFICULT', 'Which level ?', comeback=True) # Go back if int(level) == 0: game_mngr() # Define agent elif int(level) == 1: agent = RandomAgent() # easy else: # Load agent agent = RLAgent() agent.load_model('greedy0_2_vsRandomvsSelf') # difficult # Ask player's name player = input_names(n_players=1) # Init scores scores = [0, 0] # Games tapnswap = TapnSwap() over = False while not over: game_over, winner = game_1vsAgent(tapnswap, player, agent, greedy=False) scores[winner] += 1 if game_over: # Display scores restart = display_endgame(scores, player, 'Computer') # Go back if not restart: over = True game_mngr()