def test_get_value_and_state_returns_current_value_if_state_known(self):
     state1 = get_board_state_tuple("X--|---|---")
     state2 = get_board_state_tuple("XO-|---|---")
     self.player.values[state1] = 0.6
     self.player.values[state2] = 0.3
     self.assert_get_value_and_state_is(0.6, "X--|---|---", None, Board.X)
     self.assert_get_value_and_state_is(0.3, "XO-|---|---", None, Board.O)
    def test_get_move_chooses_random_best_available_move_if_random_gte_epsilon_and_multiple_bests(
            self, random_mock, choice_mock):
        random_mock.return_value = 0.1
        choice_mock.side_effect = MockRandom(1).choice

        self.player.values[get_board_state_tuple(
            "X--|-XO|---")] = 0.501  # position 0
        self.player.values[get_board_state_tuple(
            "--X|-XO|---")] = 0.501  # position 2

        # Symmetries for X--|-XO|---:
        #                -O-|-X-|X-- (Rotated by 90 degrees)
        #                ---|OX-|--X (Rotated by 180 degrees)
        #                --X|-X-|-O- (Rotated by 270 degrees)
        #                ---|-XO|X-- (Reflected horizontally) - position 6
        #                --X|OX-|--- (Reflected vertically)
        #                X--|-X-|-O- (Reflected on left diagonal)
        #                -O-|-X-|--X (Reflected on right diagonal)
        #
        # Symmetries for --X|-XO|---:
        #                XO-|-X-|--- (Rotated by 90 degrees)
        #                ---|OX-|X-- (Rotated by 180 degrees)
        #                ---|-X-|-OX (Rotated by 270 degrees)
        #                ---|-XO|--X (Reflected horizontally) - position 8
        #                X--|OX-|--- (Reflected vertically)
        #                ---|-X-|XO- (Reflected on left diagonal)
        #                -OX|-X-|--- (Reflected on right diagonal)

        assert_get_move_is(self, self.player, self.board, 2, Board.X,
                           "---|-XO|---")
        choice_mock.assert_called_once_with([0, 2, 6, 8])
 def test_get_move_chooses_random_best_available_move_if_random_gte_epsilon_and_multiple_bests(
         self, random_mock, choice_mock):
     random_mock.return_value = 0.1
     choice_mock.side_effect = MockRandom(1).choice
     self.player.values[get_board_state_tuple("X--|-XO|---")] = 0.501
     self.player.values[get_board_state_tuple("--X|-XO|---")] = 0.501
     self.player.values[get_board_state_tuple("---|-XO|X--")] = 0.501
     assert_get_move_is(self, self.player, self.board, 2, Board.X,
                        "---|-XO|---")
     choice_mock.assert_called_once_with([0, 2, 6])
 def test_set_reward_does_not_update_values_for_each_state_if_learning_disabled(
         self):
     self.player.disable_learning()
     self.player.values[get_board_state_tuple("---|-X-|---")] = 0.6
     self.player.values[get_board_state_tuple("-O-|-X-|---")] = 0.55
     self.player.values[get_board_state_tuple("-O-|-X-|--X")] = 0.7
     self.player.values[get_board_state_tuple("-OO|-X-|--X")] = 0.85
     self.player.values[get_board_state_tuple("XOO|-X-|--X")] = 1.0
     self.assert_values_after_reward_are([0.6, 0.55, 0.7, 0.85, 1.0], [
         "---|-X-|---", "-O-|-X-|---", "-O-|-X-|--X", "-OO|-X-|--X",
         "XOO|-X-|--X"
     ], Board.X)
 def assert_get_value_and_state_symmetric_is(self, value, pieces,
                                             symmetric_pieces, winner,
                                             piece):
     self.player.set_piece(piece)
     state = get_board_state_tuple(pieces)
     symmetric_state = get_board_state_tuple(symmetric_pieces)
     current_value, new_state = self.player._get_value_and_state(
         symmetric_state, winner)
     self.assertAlmostEqual(value, current_value)
     self.assertEqual(state, new_state)
     self.assertIn(state, self.player.values)
     if symmetric_state != state:
         self.assertNotIn(symmetric_state, self.player.values)
 def test_get_move_chooses_best_available_move_if_random_gte_epsilon(
         self, random_mock, choice_mock):
     random_mock.return_value = 0.1
     choice_mock.side_effect = MockRandom(0).choice
     self.player.values[get_board_state_tuple("---|-XO|---")] = 0.501
     assert_get_move_is(self, self.player, self.board, 5, Board.O,
                        "---|-X-|---")
     choice_mock.assert_called_once_with([5])
Ejemplo n.º 7
0
 def assert_game_info_is(self, winner, winning_positions, turn, pieces, game_info):
     expected_game_info = \
     {
         "winner": winner,
         "winning_positions": winning_positions,
         "turn": turn,
         "board": list(get_board_state_tuple(pieces))
     }
     self.assertEqual(expected_game_info, game_info)
 def test_get_move_chooses_best_available_move_if_learning_disabled(
         self, random_mock, choice_mock):
     self.player.disable_learning()
     random_mock.return_value = 0.099
     choice_mock.side_effect = MockRandom(0).choice
     self.player.values[get_board_state_tuple("---|-O-|--X")] = 0.501
     assert_get_move_is(self, self.player, self.board, 4, Board.O,
                        "---|---|--X")
     random_mock.assert_not_called()
     choice_mock.assert_called_once_with([4])
 def assert_values_after_reward_are(self, values, pieces_list, winner):
     self.player.set_piece(Board.X)
     values_dict = {}
     for pieces, value in zip(pieces_list, values):
         set_board(self.board, pieces)
         self.player.store_state()
         values_dict[get_board_state_tuple(pieces)] = value
     self.player.set_reward(winner)
     self.assertEqual(sorted(values_dict.keys()),
                      sorted(self.player.values.keys()))
     for key, value in self.player.values.items():
         self.assertAlmostEqual(self.player.values[key], value)
    def test_get_move_chooses_best_available_move_if_random_gte_epsilon(
            self, random_mock, choice_mock):
        random_mock.return_value = 0.1
        choice_mock.side_effect = MockRandom(0).choice
        self.player.values[get_board_state_tuple(
            "---|-XO|---")] = 0.501  # Original
        # Reflected horizontally

        # Force other symmetric choices to be of lower value
        self.player.values[get_board_state_tuple(
            "-O-|-X-|---")] = 0.499  # Rotated by 90 degrees
        # Reflected on right diagonal
        self.player.values[get_board_state_tuple(
            "---|OX-|---")] = 0.499  # Rotated by 180 degrees
        # Reflected vertically
        self.player.values[get_board_state_tuple(
            "---|-X-|-O-")] = 0.499  # Rotated by 270 degrees
        # Reflected on left diagonal

        assert_get_move_is(self, self.player, self.board, 5, Board.O,
                           "---|-X-|---")
        choice_mock.assert_called_once_with([5])
 def test_get_move_values_returns_move_values_for_available_moves(self):
     self.player.values[get_board_state_tuple("XOO|---|-X-")] = 0.7
     self.player.values[get_board_state_tuple("XO-|O--|-X-")] = 0.65
     self.player.values[get_board_state_tuple("XO-|-O-|-X-")] = 0.75
     self.player.values[get_board_state_tuple("XO-|--O|-X-")] = 0.62
     self.player.values[get_board_state_tuple("XO-|---|OX-")] = 0.72
     self.player.values[get_board_state_tuple("XO-|---|-XO")] = 0.67
     assert_get_move_values_are(self, self.player, self.board, {
         2: 0.7,
         3: 0.65,
         4: 0.75,
         5: 0.62,
         6: 0.72,
         8: 0.67
     }, Board.O, "XO-|---|-X-")
 def test_get_num_states_returns_correct_num_of_states(self):
     self.player.values[get_board_state_tuple("---|-X-|---")] = 0.9
     self.player.values[get_board_state_tuple("---|-X-|--O")] = 0.75
     self.player.values[get_board_state_tuple("---|XX-|--O")] = 0.7
     self.assertEqual(3, self.player.get_num_states())
    def test_get_value_and_state_returns_current_value_if_symmetric_state_known(
            self):
        state1 = get_board_state_tuple("XO-|X--|---")
        state2 = get_board_state_tuple("XO-|X--|O--")
        self.player.values[state1] = 0.56
        self.player.values[state2] = 0.45

        # Original
        self.assert_get_value_and_state_symmetric_is(0.56, "XO-|X--|---",
                                                     "XO-|X--|---", None,
                                                     Board.X)
        self.assert_get_value_and_state_symmetric_is(0.45, "XO-|X--|O--",
                                                     "XO-|X--|O--", None,
                                                     Board.O)

        # Rotated by 90 degrees
        self.assert_get_value_and_state_symmetric_is(0.56, "XO-|X--|---",
                                                     "---|O--|XX-", None,
                                                     Board.X)
        self.assert_get_value_and_state_symmetric_is(0.45, "XO-|X--|O--",
                                                     "---|O--|XXO", None,
                                                     Board.O)

        # Rotated by 180 degrees
        self.assert_get_value_and_state_symmetric_is(0.56, "XO-|X--|---",
                                                     "---|--X|-OX", None,
                                                     Board.X)
        self.assert_get_value_and_state_symmetric_is(0.45, "XO-|X--|O--",
                                                     "--O|--X|-OX", None,
                                                     Board.O)

        # Rotated by 270 degrees
        self.assert_get_value_and_state_symmetric_is(0.56, "XO-|X--|---",
                                                     "-XX|--O|---", None,
                                                     Board.X)
        self.assert_get_value_and_state_symmetric_is(0.45, "XO-|X--|O--",
                                                     "OXX|--O|---", None,
                                                     Board.O)

        # Reflected horizontally
        self.assert_get_value_and_state_symmetric_is(0.56, "XO-|X--|---",
                                                     "---|X--|XO-", None,
                                                     Board.X)
        self.assert_get_value_and_state_symmetric_is(0.45, "XO-|X--|O--",
                                                     "O--|X--|XO-", None,
                                                     Board.O)

        # Reflected vertically
        self.assert_get_value_and_state_symmetric_is(0.56, "XO-|X--|---",
                                                     "-OX|--X|---", None,
                                                     Board.X)
        self.assert_get_value_and_state_symmetric_is(0.45, "XO-|X--|O--",
                                                     "-OX|--X|--O", None,
                                                     Board.O)

        # Reflected on left diagonal
        self.assert_get_value_and_state_symmetric_is(0.56, "XO-|X--|---",
                                                     "XX-|O--|---", None,
                                                     Board.X)
        self.assert_get_value_and_state_symmetric_is(0.45, "XO-|X--|O--",
                                                     "XXO|O--|---", None,
                                                     Board.O)

        # Reflected on right diagonal
        self.assert_get_value_and_state_symmetric_is(0.56, "XO-|X--|---",
                                                     "---|--O|-XX", None,
                                                     Board.X)
        self.assert_get_value_and_state_symmetric_is(0.45, "XO-|X--|O--",
                                                     "---|--O|OXX", None,
                                                     Board.O)