Beispiel #1
0
def train(n_epochs,
          epsilon,
          gamma,
          load_model,
          filename,
          random_opponent,
          n_games_test,
          freq_test,
          n_skip_games=int(0),
          verbose=False):
    """
  Train 2 agents by making them play and learn together. Save the
  learned Q-function into CSV file. It is possible to confront 1 of 
  the agents (against either the user or a Random Agent) during 
  training, as often as one wants. It is also possible to train an already 
  trained model.

  Parameters
  ----------
  n_epochs: int
    Number of games used for training.
  epsilon: float (in [0,1])
    Fraction of greedy decisions during training of the 2 RL Agents.
  gamma: float (in [0,1])
    Factor of significance of first actions over last ones for the 
    2 RL Agents.
  load_model: string
    CSV filename in which is stored the learned Q-function of an 
    agent. If load_model = 'model', the function loads the model 
    './Models/model.csv'. If load_model is not None, the previous 
    parameters epsilon and gamma are used for a second training.
  filename: string
    Name of the CSV file that will store the learned Q-function 
    of one of the agents. The path to CSV file is 
    then ./Models/filename.csv. The counter of state-action
    pairs is also stored at ./Models/data/count_filename.csv for
    future training.
  random_opponent: boolean
    If set to true, the function trains 1 RL Agent by making it 
    play against a Random Agent. Otherwise, the RL agent is
    trained by playing against another version of itself.
  n_games_test: int
    Number of games one of the RL Agent plays against a Random Agent
    for testing. If set to 0, the RL Agents will not be tested by a 
    Random Agent. 
  freq_test: int
    Number of epochs after which one of the RL Agents plays n_games_test
    games against a Random Agent. If set to 1000, each 1000 epochs of
    training, one of the RL Agents is tested against a Random Agent.
    If set to 0, test occurs at the last epoch of training only.
    If set to -1, none of the agents is tested during training.
  n_skip_games: int 
    Number of epochs after which the user can choose to play 
    against one of the learning agents. If set to 1000, 
    each 1000 games, the user can choose to play against 
    one agent. If set to 0, the user can choose to play against one 
    agent at the last epoch only. If set to -1, no choice is offered 
    and the user cannot test any agent.
  verbose: boolean
    If set to True, each game action during training has a 
    written explanation.

  Return
  ------
  learning_results: list
    Only significant with n_games_test > 0 (otherwise, empty list 
    by default). List of each n_epochs // freq_test epoch test results 
    against a Random Agent. Each test result is a list: 
    [current epoch, score of RL Agent, number of finished games, 
    n_games test].
  """

    # Learning agent
    agent1 = RLAgent(epsilon, gamma)
    if load_model is not None:
        agent1.load_model(load_model)

    # Choose opponent
    if random_opponent:
        agent2 = RandomAgent()
        time_limit = None
        print('Training vs Random')
    else:
        agent2 = RLAgent(epsilon, gamma)
        if load_model is not None:
            agent2.load_model(load_model)
        time_limit = None
        print('Training vs Self')

    start_idx = 0
    scores = [0, 0]

    # If the user only confronts the agent at the last epoch
    # or if no confrontation
    if n_skip_games in [-1, 0]:
        n_skip_games = n_epochs - n_skip_games

    # Boolean for game between the user and agent1 preceding a game
    # between agent1 and agent2
    play_checkpoint_usr = False

    # If there is a test of agent1 at the last epoch only or no test
    if freq_test in [-1, 0]:
        freq_test = n_epochs - freq_test

    # Number of games between agent1 and a Random Agent for testing
    n_games_test_mem = n_games_test
    learning_results = []

    # Start training
    print('Training epoch:')
    for epoch in range(1, n_epochs + 1):

        if epoch % (n_epochs // 10) == 0:
            print(epoch, '/', n_epochs)

        #Update boolean for playing with user
        play_checkpoint_usr = bool(epoch % n_skip_games == 0)
        if play_checkpoint_usr:
            # Print training status
            print('Number of games: ', epoch)
            print('Scores: ', scores)
            # Ask user to play
            play = int(input('Play ? (1 Yes | 0 No)\n'))
            play_checkpoint_usr = bool(play)

        # Update boolean for test
        n_games_test = int(epoch % freq_test == 0) * n_games_test_mem

        # Start game
        game_over, winner, test_results = game_2Agents(
            agent1,
            agent2,
            start_idx=start_idx,
            train=True,
            time_limit=time_limit,
            n_games_test=n_games_test,
            play_checkpoint_usr=play_checkpoint_usr,
            verbose=verbose)

        assert game_over, str('Game not over but new game' +
                              ' beginning during training')

        if winner in [0, 1]:
            scores[winner] += 1

        # Save test games of agent1 against a Random Agent
        if bool(n_games_test):
            assert len(test_results) != 0, \
            'Agent1 has been tested but there is no result of that.'
            learning_results.append(
                [epoch, test_results[2], test_results[0], test_results[1]])

        # Next round
        start_idx = 1 - start_idx

    # Save Q-function of agent1
    np.savetxt(str('Models/' + filename + '.csv'), agent1.Q, delimiter=',')
    # Save stats for learning rate of agent1
    np.savetxt(str('Models/data/count_' + filename + '.csv'),
               agent1.count_state_action,
               delimiter=',')

    return learning_results
Beispiel #2
0
def game_mngr():
    """
  Game manager, used for navigation among different choices 
  offered to user.
  """

    # Options
    command = options('PLAY', 'RULES', 'Tap 1 to play or 2 to read the rules')

    # Rules page
    if int(command) == 2:
        print_rules()
        # Go back
        print('Tap 1 to come back to the main menu\n')
        comeback = tap_valid_digits([1])
        if int(comeback):
            game_mngr()

    # Game page
    if int(command) == 1:
        # Options
        players = options('PLAYER',
                          'PLAYERS',
                          'How many players ?',
                          comeback=True)

        # Go back
        if int(players) == 0:
            game_mngr()

        # 2 players
        if int(players) == 2:

            # Ask players' name
            player1, player2 = input_names(n_players=2)

            # Init scores
            scores = [0, 0]

            # Games
            tapnswap = TapnSwap()
            over = False
            while not over:
                game_over, winner = game_1vs1(tapnswap, player1, player2)
                scores[winner] += 1
                if game_over:
                    # Display scores
                    restart = display_endgame(scores, player1, player2)
                    # Go back
                    if not restart:
                        over = True
                        game_mngr()

        # 1 player
        if int(players) == 1:

            # Options
            level = options('EASY',
                            'DIFFICULT',
                            'Which level ?',
                            comeback=True)

            # Go back
            if int(level) == 0:
                game_mngr()

            # Define agent
            elif int(level) == 1:
                agent = RandomAgent()  # easy
            else:
                # Load agent
                agent = RLAgent()
                agent.load_model('greedy0_2_vsRandomvsSelf')  # difficult

            # Ask player's name
            player = input_names(n_players=1)

            # Init scores
            scores = [0, 0]

            # Games
            tapnswap = TapnSwap()
            over = False
            while not over:
                game_over, winner = game_1vsAgent(tapnswap,
                                                  player,
                                                  agent,
                                                  greedy=False)
                scores[winner] += 1
                if game_over:
                    # Display scores
                    restart = display_endgame(scores, player, 'Computer')
                    # Go back
                    if not restart:
                        over = True
                        game_mngr()