Python QPlayer Examples

Programming Language: Python

Namespace/Package Name: players

Method/Function: QPlayer

Examples at hotexamples.com: 5

Python QPlayer - 5 examples found. These are the top rated real world Python examples of players.QPlayer extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def play(model_path, is_max_entropy):
    """
    Play a game against a model
    :param model_path: String. Path to the model
    :param is_max_entropy: Boolean. Does the model uses entropy maximization
    """
    random.seed(int(time()))

    p1 = players.QPlayer(hidden_layers_size=layers_size,
                         learning_batch_size=batch_size,
                         gamma=gamma,
                         tau=tau,
                         batches_to_q_target_switch=batches_to_q_target_switch,
                         memory_size=memory_size,
                         session=tf.Session(),
                         maximize_entropy=is_max_entropy)
    p1.restore(model_path)

    p2 = players.Human()

    for g in range(1):
        print('STARTING NEW GAME (#{})\n-------------'.format(g))
        if g % 2 == 0:
            game = Game(p1, p2)
            print("Computer is X (1)")
        else:
            game = Game(p2, p1)
            print("Computer is O (-1)")
        while not game.game_status()['game_over']:
            if isinstance(game.active_player, players.Human):
                game.print_field()
                print("{}'s turn:".format(game.current_player))
            game.print_field()
            state = np.copy(game.board)
            # Force Q-Network to select different starting positions if it plays first
            action = int(
                game.active_player.select_cell(state, epsilon=0.0)
            ) if np.count_nonzero(game.board) > 0 or not isinstance(
                game.active_player, players.QPlayer) else random.randint(
                    0, 399)
            print(game.current_player, action)
            game.play(action)
            if not game.game_status()['game_over']:
                game.next_player()
            if game._invalid_move_played:
                print("*")
                break
        print('-------------\nGAME OVER!')
        game.print_board()
        print(game.game_status())
        print('-------------')

Example #2

Show file

def play():
    random.seed(int(time()))
    p1 = players.QPlayer([100, 160, 160, 100],
                         learning_batch_size=100,
                         gamma=0.95,
                         tau=0.95,
                         batches_to_q_target_switch=100,
                         memory_size=100000)
    p1.restore('./models/q.ckpt')
    p2 = players.Human()
    for g in range(4):
        print('STARTING NEW GAME (#{})\n-------------'.format(g))
        if g % 2 == 0:
            game = Game(p1, p2)
            print("Computer is X (1)")
        else:
            game = Game(p2, p1)
            print("Computer is O (-1)")
        while not game.game_status()['game_over']:
            if isinstance(game.active_player(), players.Human):
                game.print_board()
                print("{}'s turn:".format(game.current_player))
            state = np.copy(game.board)
            # Force Q-Network to select different starting positions if it plays first
            action = int(
                game.active_player().select_cell(state, epsilon=0.0)
            ) if np.count_nonzero(game.board) > 0 or not isinstance(
                game.active_player(), players.QPlayer) else random.randint(
                    0, 8)
            game.play(action)
            if not game.game_status()['game_over']:
                game.next_player()
        print('-------------\nGAME OVER!')
        game.print_board()
        print(game.game_status())
        print('-------------')


#train()

Example #3

Show file

def face_off(paths, rng=3, p1_name='Q', p2_name='E'):
    """
    Test different models against each other
    :param paths: List(String). Paths to the models
    :param rng: Integer. How many models in the paths supplied
    :param p1_name: String. Name of player 1
    :param p2_name: String. name of player 2
    :return: Dict. Number of won games per player
    """
    tie = 'TIE'
    results = {p1_name: 0, p2_name: 0, tie: 0}

    for path1 in paths:
        for i in range(rng):
            p1_dir = '{}/{}'.format(path1, i)
            print('Loading player {} [{}]...'.format(p1_name, p1_dir))
            graph1 = tf.Graph()
            with graph1.as_default():
                p1 = players.QPlayer(
                    hidden_layers_size=layers_size,
                    learning_batch_size=batch_size,
                    gamma=gamma,
                    tau=tau,
                    batches_to_q_target_switch=batches_to_q_target_switch,
                    memory_size=memory_size,
                    session=tf.Session(),
                    maximize_entropy=False)
                p1.restore('{}/{}.ckpt'.format(p1_dir, p1_name))
                p1.name = p1_name

            for path2 in paths:
                for j in range(rng):
                    p2_dir = '{}/{}'.format(path2, j)
                    print('Loading player {} [{}]...'.format(p2_name, p2_dir))
                    graph2 = tf.Graph()
                    with graph2.as_default():
                        p2 = players.QPlayer(hidden_layers_size=layers_size,
                                             learning_batch_size=batch_size,
                                             gamma=gamma,
                                             tau=tau,
                                             batches_to_q_target_switch=
                                             batches_to_q_target_switch,
                                             memory_size=memory_size,
                                             session=tf.Session(),
                                             maximize_entropy=True)
                        p2.restore('{}/{}.ckpt'.format(p2_dir, p2_name))
                        p2.name = p2_name

                    print('Playing...')
                    print('----------')
                    for g in range(18):
                        if g % 2 == 0:
                            game = Game(p1, p2)
                        else:
                            game = Game(p2, p1)
                        first_cell = g // 2
                        while not game.game_status()['game_over']:
                            state = np.copy(game.board)
                            action = int(
                                game.active_player.select_cell(
                                    state, epsilon=0.0)) if np.count_nonzero(
                                        game.board) > 0 else first_cell
                            game.play(action)
                            if not game.game_status()['game_over']:
                                game.next_player()
                        winner = game.game_status()['winner']
                        winner_name = game.player1.name if winner == 1 else (
                            game.player2.name if winner == -1 else tie)
                        print(
                            'GAME - player X: {p1}, player O: {p2} | First cell: {c} | Winner: {w}'
                            .format(p1=game.player1.name,
                                    p2=game.player2.name,
                                    c=first_cell,
                                    w=winner_name))
                        results[winner_name] += 1
                    print('----------')

    print('Final results: {}'.format(results))
    s = sum(results.values())
    pct = {k: int(10000 * v / s) / 100 for k, v in results.items()}
    print('Percents: {}'.format(pct))
    return results

Example #4

Show file

def train(p1_name,
          p2_name,
          p1_max_ent,
          p2_max_ent,
          p2_novice,
          num_of_games=1e6,
          savedir='./models'):
    """
    Initiate a single training process
    :param p1_name: String. Name of player 1 (will be used as file-name)
    :param p2_name: String. Name of player 2 (will be used as file-name)
    :param p1_max_ent: Boolean. Should player 1 use maximum-entropy learning
    :param p2_max_ent: Boolean. Should player 2 use maximum-entropy learning
    :param p2_novice: Boolean. Should player 2 be an instance of players.Novice
    :param num_of_games: Number. Number of games to train on
    :param savedir: String. Path to save trained weights
    """
    random.seed(int(time() * 1000))
    tf.reset_default_graph()
    logging.basicConfig(level=logging.INFO, format='%(message)s')

    # Initialize players
    graph1 = tf.Graph()
    graph2 = tf.Graph()

    with graph1.as_default():
        p1 = players.QPlayer(
            tf.Session(),
            hidden_layers_size=layers_size,
            learning_batch_size=batch_size,
            gamma=gamma,
            batches_to_q_target_switch=batches_to_q_target_switch,
            tau=tau,
            memory_size=memory_size,
            maximize_entropy=p1_max_ent)
    p1.name = p1_name

    if p2_novice:
        p2 = players.Novice()
    else:
        with graph2.as_default():
            p2 = players.QPlayer(
                tf.Session(),
                hidden_layers_size=layers_size,
                learning_batch_size=batch_size,
                gamma=gamma,
                batches_to_q_target_switch=batches_to_q_target_switch,
                tau=tau,
                memory_size=memory_size,
                maximize_entropy=p2_max_ent)
    p2.name = p2_name

    total_rewards = {p1.name: 0, p2.name: 0}
    costs = {
        p1.name: [],
        p2.name: []
    }  # this will store the costs, so we can plot them later
    rewards = {
        p1.name: [],
        p2.name: []
    }  # same, but for the players total rewards

    # Start playing
    num_of_games = int(num_of_games)
    train_start_time = time()
    for g in range(1, num_of_games + 1):
        game = Game(p1, p2) if g % 2 == 0 else Game(
            p2, p1)  # make sure both players play X and O
        last_phases = {
            p1.name: None,
            p2.name: None
        }  # will be used to store the last state a player was in
        while not game.game_status()['game_over']:
            if isinstance(game.active_player, players.Human):
                game.print_board()
                print("{}'s turn:".format(game.active_player.name))

            # If this is not the first move, store in memory the transition from the last state
            # the active player saw to this one
            state = np.copy(game.board)
            if last_phases[game.active_player.name] is not None:
                memory_element = last_phases[game.active_player.name]
                memory_element['next_state'] = state
                memory_element['game_over'] = False
                game.active_player.add_to_memory(memory_element)

            # Calculate annealed epsilon
            if g <= num_of_games // 4:
                max_eps = 0.6
            elif g <= num_of_games // 2:
                max_eps = 0.1
            else:
                max_eps = 0.05
            min_eps = 0.01
            eps = round(
                max(max_eps - round(g * (max_eps - min_eps) / num_of_games, 3),
                    min_eps), 3)

            # Play and receive reward
            action = int(game.active_player.select_cell(state, epsilon=eps))
            play_status = game.play(action)
            game_over = play_status['game_over']
            if play_status['invalid_move']:
                r = game.invalid_move_reward
            elif game_over:
                if play_status['winner'] == 0:
                    r = game.tie_reward
                else:
                    r = game.winning_reward
            else:
                r = 0

            # Store the current state in temporary memory
            last_phases[game.active_player.name] = {
                'state': state,
                'action': action,
                'reward': r
            }
            total_rewards[game.active_player.name] += r
            if r == game.winning_reward:
                total_rewards[game.inactive_player.name] += game.losing_reward

            # Activate learning procedure
            cost = game.active_player.learn(learning_rate=learning_rate)
            if cost is not None:
                costs[game.active_player.name].append(cost)

            # Next player's turn, if game hasn't ended
            if not game_over:
                game.next_player()

        # Adding last phase for winning (active) player
        memory_element = last_phases[game.active_player.name]
        memory_element['next_state'] = np.zeros(9)
        memory_element['game_over'] = True
        game.active_player.add_to_memory(memory_element)

        # Adding last phase for losing (inactive) player
        memory_element = last_phases[game.inactive_player.name]
        memory_element['next_state'] = np.zeros(9)
        memory_element['game_over'] = True
        memory_element[
            'reward'] = game.losing_reward if r == game.winning_reward else game.tie_reward
        game.inactive_player.add_to_memory(memory_element)

        # Print statistics
        period = 100.0
        if g % int(period) == 0:
            print(
                'Game: {g} | Number of Trainings: {t1},{t2} | Epsilon: {e} | Average Rewards - {p1}: {r1}, {p2}: {r2}'
                .format(g=g,
                        p1=p1.name,
                        r1=total_rewards[p1.name] / period,
                        p2=p2.name,
                        r2=total_rewards[p2.name] / period,
                        t1=len(costs[p1.name]),
                        t2=len(costs[p2.name]),
                        e=eps))
            rewards[p1.name].append(total_rewards[p1.name] / period)
            rewards[p2.name].append(total_rewards[p2.name] / period)
            total_rewards = {p1.name: 0, p2.name: 0}

    # Save trained model and shutdown Tensorflow sessions
    training_time = time() - train_start_time
    minutes = int(training_time // 60)
    seconds = int(training_time % 60)
    if seconds < 10:
        seconds = '0{}'.format(seconds)
    print('Training took {m}:{s} minutes'.format(m=minutes, s=seconds))

    # Plot graphs and close sessions
    cost_colors = {p1.name: 'b', p2.name: 'k'}
    reward_colors = {p1.name: 'g', p2.name: 'r'}
    graphs = {p1.name: graph1, p2.name: graph2}
    for pp in [p1, p2]:
        with graphs[pp.name].as_default():
            pp.save('{dir}/{name}.ckpt'.format(dir=savedir, name=pp.name))
            pp.shutdown()

        plt.scatter(range(len(costs[pp.name])),
                    costs[pp.name],
                    c=cost_colors[pp.name])
        plt.title('Cost of player {}'.format(pp.name))
        plt.show()
        plt.scatter(range(len(rewards[pp.name])),
                    rewards[pp.name],
                    c=reward_colors[pp.name])
        plt.title('Average rewards of player {}'.format(pp.name))
        plt.show()

        plt.scatter(range(len(costs[pp.name])),
                    costs[pp.name],
                    c=cost_colors[pp.name])
        plt.title('Cost of player {} [0,1]'.format(pp.name))
        plt.ylim(0, 1)
        plt.show()
        plt.scatter(range(len(rewards[pp.name])),
                    rewards[pp.name],
                    c=reward_colors[pp.name])
        plt.title('Average rewards of player {} [-1,1]'.format(pp.name))
        plt.ylim(-1, 1)
        plt.show()

Example #5

Show file

def train():
    costs = []  # this will store the costs, so we can plot them later
    r1 = []  # same, but for the players total rewards
    r2 = []
    random.seed(int(time() * 1000))
    tf.reset_default_graph()
    logging.basicConfig(level=logging.WARN, format='%(message)s')

    # Initialize players
    p1 = players.QPlayer([100, 160, 160, 100],
                         learning_batch_size=150,
                         batches_to_q_target_switch=1000,
                         gamma=0.95,
                         tau=0.95,
                         memory_size=100000)
    p1.restore('./models/q.ckpt')
    p1.name = 'Q'

    p2 = players.Novice()
    p2.name = 'N'

    total_rewards = {p1.name: 0, p2.name: 0}

    # Start playing
    num_of_games = 400000
    for g in range(1, num_of_games + 1):
        game = Game(p1, p2) if g % 2 == 0 else Game(
            p2, p1)  # make sure both players play X and O
        last_phases = {
            p1.name: None,
            p2.name: None
        }  # will be used to store the last state a player was in
        while not game.game_status()['game_over']:
            if isinstance(game.active_player(), players.Human):
                game.print_board()
                print("{}'s turn:".format(game.active_player().name))

            # If this is not the first move, store in memory the transition from the last state
            # the active player saw to this one
            state = np.copy(game.board)
            if last_phases[game.active_player().name] is not None:
                memory_element = last_phases[game.active_player().name]
                memory_element['next_state'] = state
                memory_element['game_over'] = False
                game.active_player().add_to_memory(memory_element)

            # Calculate annealed epsilon
            if g <= num_of_games // 4:
                max_eps = 0.6
            elif g <= num_of_games // 2:
                max_eps = 0.01
            else:
                max_eps = 0.001
            min_eps = 0.01 if g <= num_of_games // 2 else 0.0
            eps = round(
                max(max_eps - round(g * (max_eps - min_eps) / num_of_games, 3),
                    min_eps), 3)

            # Play and receive reward
            action = int(game.active_player().select_cell(state, epsilon=eps))
            play_status = game.play(action)
            game_over = play_status['game_over']
            if play_status['invalid_move']:
                r = game.invalid_move_reward
            elif game_over:
                if play_status['winner'] == 0:
                    r = game.tie_reward
                else:
                    r = game.winning_reward
            else:
                r = 0

            # Store the current state in temporary memory
            last_phases[game.active_player().name] = {
                'state': state,
                'action': action,
                'reward': r
            }
            total_rewards[game.active_player().name] += r

            # Activate learning procedure
            cost = game.active_player().learn(learning_rate=0.0001)
            if cost is not None:
                costs.append(cost)

            # Next player's turn, if game hasn't ended
            if not game_over:
                game.next_player()

        # Adding last phase for winning (active) player
        memory_element = last_phases[game.active_player().name]
        memory_element['next_state'] = np.zeros(9)
        memory_element['game_over'] = True
        game.active_player().add_to_memory(memory_element)

        # Adding last phase for losing (inactive) player
        memory_element = last_phases[game.inactive_player().name]
        memory_element['next_state'] = np.zeros(9)
        memory_element['game_over'] = True
        memory_element['reward'] = game.losing_reward
        game.inactive_player().add_to_memory(memory_element)

        # Print statistics
        if g % 100 == 0:
            print(
                'Game: {g} | Number of Trainings: {t} | Epsilon: {e} | Average Rewards - {p1}: {r1}, {p2}: {r2}'
                .format(g=g,
                        p1=p1.name,
                        r1=total_rewards[p1.name] / 100.0,
                        p2=p2.name,
                        r2=total_rewards[p2.name] / 100.0,
                        t=len(costs),
                        e=eps))
            r1.append(total_rewards[p1.name] / 100.0)
            r2.append(total_rewards[p2.name] / 100.0)
            total_rewards = {p1.name: 0, p2.name: 0}

    # Save trained model and shutdown Tensorflow sessions
    p1.save('./models/q.ckpt')
    for pp in [p1, p2]:
        pp.shutdown()

    # Plot graphs
    plt.scatter(range(len(costs)), costs)
    plt.show()
    plt.scatter(range(len(r1)), r1, c='g')
    plt.show()
    plt.scatter(range(len(r2)), r2, c='r')
    plt.show()