예제 #1
0
    def test_features(self):
        players = [
            LearningPlayer(name='random',
                           estimation_mode=LearningPlayer.ACTUAL_Q)
            for _ in range(3)
        ]
        game = LandlordGame(players=players)
        while not game.is_round_over():
            curr_player = game.get_current_player()
            curr_features = curr_player._derive_features(game)
            curr_hand_vector = game.get_current_player().get_hand_vector(
                game, game.get_current_position())
            move = game.get_current_player().make_move(
                game, game.get_current_position())
            curr_move_vector = game.get_current_player().compute_move_vector(
                game.get_current_position(), game.get_landlord_position(),
                move)

            game.play_move(move)

            self.assertTrue(
                np.allclose(curr_features,
                            curr_player.record_history_matrices[-1]))
            self.assertTrue(
                np.allclose(curr_move_vector,
                            curr_player.record_move_vectors[-1]))
            self.assertTrue(
                np.allclose(curr_hand_vector,
                            curr_player.record_hand_vectors[-1]))
예제 #2
0
    def test_features_v2(self):
        players = [
            LearningPlayer_v2(name='random',
                              epsilon=0,
                              estimation_mode=LearningPlayer.ACTUAL_Q,
                              learning_rate=1) for _ in range(3)
        ]
        game = LandlordGame(players=players)
        while not game.is_round_over():
            curr_player = game.get_current_player()
            curr_features = curr_player._derive_features(game)

            best_move, best_move_q = curr_player.decide_best_move(game)
            curr_move_vector = game.get_current_player().compute_move_vector(
                game.get_current_position(), game.get_landlord_position(),
                best_move)
            curr_hand_vector = game.get_current_player(
            ).compute_remaining_hand_vector(game, curr_move_vector,
                                            game.get_current_position())

            curr_player.record_move(game, best_move, best_move_q,
                                    game.get_current_position())
            game.play_move(best_move)

            self.assertTrue(
                np.allclose(curr_features,
                            curr_player.record_history_matrices[-1]))
            self.assertTrue(
                np.allclose(curr_move_vector,
                            curr_player.record_move_vectors[-1]))
            self.assertTrue(
                np.allclose(curr_hand_vector,
                            curr_player.record_hand_vectors[-1]))

        players[0].compute_future_q(game)

        if game.has_winners():
            print(np.max(np.abs(players[0].get_estimated_qs())))
            self.assertTrue(np.max(np.abs(players[0].get_estimated_qs())) == 1)

        self.assertTrue(
            players[0].record_history_matrices[0][0].dtype == np.int8)
예제 #3
0
    def test_record_actual_q(self):
        def load_best_sim_net(net):
            return LearningPlayer(name=net,
                                  net_dir='../models/' + net,
                                  estimation_mode=LearningPlayer.ACTUAL_Q,
                                  epsilon=0,
                                  discount_factor=1)

        players = [load_best_sim_net('4_2_sim4_model10') for i in range(3)]
        player_0_scores = []
        game = LandlordGame(players=players)
        while not game.is_round_over():
            curr_player = game.get_current_player()

            best_move, best_move_q = curr_player.decide_best_move(game)
            if curr_player == players[0]:
                player_0_scores.append(best_move_q)

            curr_player.make_move(game)

            game.play_move(best_move)

        for player in players:
            player.compute_future_q(game)

        record_state = players[0]._record_state_q
        future_q = players[0].get_estimated_qs()
        # assert in bounds based on update function
        for i, val in enumerate(record_state):
            if i != len(record_state) - 1:
                if record_state[i + 1] < record_state[i]:
                    self.assertTrue(
                        record_state[i + 1] < future_q[i] < record_state[i])
                elif record_state[i + 1] > record_state[i]:
                    self.assertTrue(
                        record_state[i + 1] > future_q[i] > record_state[i])

        self.assertEqual(len(players[0].get_record_hand_vectors()),
                         len(players[0].get_estimated_qs()))
예제 #4
0
def human_game(player_names, perspective):
    perspective_hand = None
    players = []
    for player_name in player_names:
        if player_name == perspective:
            perspective_hand = manual_hand()
        player_is_perspective = (player_name == perspective)
        players.append(
            HumanPlayer(name=player_name,
                        reference_player=reference_player,
                        known_hand=player_is_perspective,
                        ai_before=player_is_perspective))

    game = LandlordGame(players, kitty_callback=manual_kitty)

    first_player = get_first_player(game)

    game.force_current_position(first_player)
    game.force_hand(perspective_position(game, perspective), perspective_hand)

    while not game.is_round_over():
        current_player = game.get_current_player()
        current_position = game.get_current_position()

        best_move, best_move_q = current_player.decide_best_move(game)

        print(current_player.get_name(), "(" + game.get_position_role_name(current_position) + ", " \
              + str(len(game.get_hand(current_position))) + "):", best_move, '(' + str(best_move_q) + ')')

        # play with known hand if it matches perspective
        game.play_move(best_move,
                       hand_known=current_player.get_name() == perspective)

        if type(game.get_last_played()) == KittyReveal:
            print(game.get_last_played())

    if game.has_winners():
        for winner in game.get_winners():
            print('WINNERS:', game.get_ai_players()[winner].get_name())
예제 #5
0
def play_against_two(players, show_q=True):
    game = LandlordGame(players)
    while not game.is_round_over():
        current_player = game.get_current_player()
        current_position = game.get_current_position()

        best_move, best_move_q = current_player.decide_best_move(game)

        if show_q:
            best_move_q_str = '(' + str(best_move_q) + ')'
        else:
            best_move_q_str = ''

        print(current_player.get_name(), "(" + game.get_position_role_name(current_position) + ", " \
                  + str(len(game.get_hand(current_position))) + "):", best_move, best_move_q_str)
        game.play_move(best_move)

        if type(game.get_last_played()) == KittyReveal:
            print(game.get_last_played())

    if game.has_winners():
        for winner in game.get_winners():
            print('WINNERS:', game.get_ai_players()[winner].get_name())
예제 #6
0
    def test_self_feed(self):
        players = [self.load_v2_net("4_8_actualq1_model20") for _ in range(3)]
        #players = [self.load_v2_net("4_2_sim4_model15") for _ in range(3)]
        game = LandlordGame(players=players)
        best_move_qs = []
        all_history_features = []
        history_vectors = []
        all_hand_vectors = []
        all_move_vectors = []
        while not game.is_round_over():

            best_move, best_move_q = game.get_current_player(
            ).decide_best_move(game, game.get_current_position())

            game.get_current_player().record_move(game, best_move, best_move_q,
                                                  game.get_current_position())

            if game.get_current_player() == players[0]:
                history_features = players[0]._derive_features(game)
                all_history_features.append(history_features)
                # all the moves we make from here will not affect the history, so assess it and copy

                history_vectors.append(players[0].history_net.predict(
                    np.array([history_features]), batch_size=1)[0])

                # create features for each of the possible moves from this position
                all_move_vectors.append(players[0].compute_move_vector(
                    game.get_current_position(), game.get_landlord_position(),
                    best_move))

                all_hand_vectors.append(
                    players[0].compute_remaining_hand_vector(
                        game, all_move_vectors[-1],
                        game.get_current_position()))

                predicted_q = players[0].position_net.predict([
                    np.array([history_vectors[-1]]),
                    np.array([all_move_vectors[-1]]),
                    np.array([all_hand_vectors[-1]])
                ])[0][0]

                self.assertAlmostEqual(predicted_q, best_move_q, places=4)

                best_move_qs.append(best_move_q)

            game.play_move(best_move)

        players[0].compute_future_q(game)

        history_matrices = players[0].get_record_history_matrices()

        for i, j in zip(all_history_features, history_matrices):
            self.assertTrue(np.allclose(i, j))

        move_vectors = players[0].get_record_move_vectors()

        for i, j in zip(all_move_vectors, move_vectors):
            self.assertTrue(np.allclose(i, j))

        hand_vectors = players[0].get_record_hand_vectors()

        for i, j in zip(all_hand_vectors, hand_vectors):
            self.assertTrue(np.allclose(i, j))

        qs = players[0].get_estimated_qs()
        pred_qs = []
        # recreate
        for i, records in enumerate(
                zip(history_matrices, move_vectors, hand_vectors, qs)):
            history_matrix, move_vector, hand_vector, q = records

            history_vector = players[0].history_net.predict(
                np.array([history_matrix]))[0]
            self.assertTrue(np.allclose(history_vector, history_vectors[i]))

            pred_qs.append(players[0].position_net.predict([[history_vector],
                                                            [move_vector],
                                                            [hand_vector]
                                                            ])[0][0])
            # works only if learning rate is 0
        self.assertTrue(np.allclose(qs, pred_qs))