def test_get_value_and_state_returns_current_value_if_state_known(self): state1 = get_board_state_tuple("X--|---|---") state2 = get_board_state_tuple("XO-|---|---") self.player.values[state1] = 0.6 self.player.values[state2] = 0.3 self.assert_get_value_and_state_is(0.6, "X--|---|---", None, Board.X) self.assert_get_value_and_state_is(0.3, "XO-|---|---", None, Board.O)
def test_get_move_chooses_random_best_available_move_if_random_gte_epsilon_and_multiple_bests( self, random_mock, choice_mock): random_mock.return_value = 0.1 choice_mock.side_effect = MockRandom(1).choice self.player.values[get_board_state_tuple( "X--|-XO|---")] = 0.501 # position 0 self.player.values[get_board_state_tuple( "--X|-XO|---")] = 0.501 # position 2 # Symmetries for X--|-XO|---: # -O-|-X-|X-- (Rotated by 90 degrees) # ---|OX-|--X (Rotated by 180 degrees) # --X|-X-|-O- (Rotated by 270 degrees) # ---|-XO|X-- (Reflected horizontally) - position 6 # --X|OX-|--- (Reflected vertically) # X--|-X-|-O- (Reflected on left diagonal) # -O-|-X-|--X (Reflected on right diagonal) # # Symmetries for --X|-XO|---: # XO-|-X-|--- (Rotated by 90 degrees) # ---|OX-|X-- (Rotated by 180 degrees) # ---|-X-|-OX (Rotated by 270 degrees) # ---|-XO|--X (Reflected horizontally) - position 8 # X--|OX-|--- (Reflected vertically) # ---|-X-|XO- (Reflected on left diagonal) # -OX|-X-|--- (Reflected on right diagonal) assert_get_move_is(self, self.player, self.board, 2, Board.X, "---|-XO|---") choice_mock.assert_called_once_with([0, 2, 6, 8])
def test_get_move_chooses_random_best_available_move_if_random_gte_epsilon_and_multiple_bests( self, random_mock, choice_mock): random_mock.return_value = 0.1 choice_mock.side_effect = MockRandom(1).choice self.player.values[get_board_state_tuple("X--|-XO|---")] = 0.501 self.player.values[get_board_state_tuple("--X|-XO|---")] = 0.501 self.player.values[get_board_state_tuple("---|-XO|X--")] = 0.501 assert_get_move_is(self, self.player, self.board, 2, Board.X, "---|-XO|---") choice_mock.assert_called_once_with([0, 2, 6])
def test_set_reward_does_not_update_values_for_each_state_if_learning_disabled( self): self.player.disable_learning() self.player.values[get_board_state_tuple("---|-X-|---")] = 0.6 self.player.values[get_board_state_tuple("-O-|-X-|---")] = 0.55 self.player.values[get_board_state_tuple("-O-|-X-|--X")] = 0.7 self.player.values[get_board_state_tuple("-OO|-X-|--X")] = 0.85 self.player.values[get_board_state_tuple("XOO|-X-|--X")] = 1.0 self.assert_values_after_reward_are([0.6, 0.55, 0.7, 0.85, 1.0], [ "---|-X-|---", "-O-|-X-|---", "-O-|-X-|--X", "-OO|-X-|--X", "XOO|-X-|--X" ], Board.X)
def assert_get_value_and_state_symmetric_is(self, value, pieces, symmetric_pieces, winner, piece): self.player.set_piece(piece) state = get_board_state_tuple(pieces) symmetric_state = get_board_state_tuple(symmetric_pieces) current_value, new_state = self.player._get_value_and_state( symmetric_state, winner) self.assertAlmostEqual(value, current_value) self.assertEqual(state, new_state) self.assertIn(state, self.player.values) if symmetric_state != state: self.assertNotIn(symmetric_state, self.player.values)
def test_get_move_chooses_best_available_move_if_random_gte_epsilon( self, random_mock, choice_mock): random_mock.return_value = 0.1 choice_mock.side_effect = MockRandom(0).choice self.player.values[get_board_state_tuple("---|-XO|---")] = 0.501 assert_get_move_is(self, self.player, self.board, 5, Board.O, "---|-X-|---") choice_mock.assert_called_once_with([5])
def assert_game_info_is(self, winner, winning_positions, turn, pieces, game_info): expected_game_info = \ { "winner": winner, "winning_positions": winning_positions, "turn": turn, "board": list(get_board_state_tuple(pieces)) } self.assertEqual(expected_game_info, game_info)
def test_get_move_chooses_best_available_move_if_learning_disabled( self, random_mock, choice_mock): self.player.disable_learning() random_mock.return_value = 0.099 choice_mock.side_effect = MockRandom(0).choice self.player.values[get_board_state_tuple("---|-O-|--X")] = 0.501 assert_get_move_is(self, self.player, self.board, 4, Board.O, "---|---|--X") random_mock.assert_not_called() choice_mock.assert_called_once_with([4])
def assert_values_after_reward_are(self, values, pieces_list, winner): self.player.set_piece(Board.X) values_dict = {} for pieces, value in zip(pieces_list, values): set_board(self.board, pieces) self.player.store_state() values_dict[get_board_state_tuple(pieces)] = value self.player.set_reward(winner) self.assertEqual(sorted(values_dict.keys()), sorted(self.player.values.keys())) for key, value in self.player.values.items(): self.assertAlmostEqual(self.player.values[key], value)
def test_get_move_chooses_best_available_move_if_random_gte_epsilon( self, random_mock, choice_mock): random_mock.return_value = 0.1 choice_mock.side_effect = MockRandom(0).choice self.player.values[get_board_state_tuple( "---|-XO|---")] = 0.501 # Original # Reflected horizontally # Force other symmetric choices to be of lower value self.player.values[get_board_state_tuple( "-O-|-X-|---")] = 0.499 # Rotated by 90 degrees # Reflected on right diagonal self.player.values[get_board_state_tuple( "---|OX-|---")] = 0.499 # Rotated by 180 degrees # Reflected vertically self.player.values[get_board_state_tuple( "---|-X-|-O-")] = 0.499 # Rotated by 270 degrees # Reflected on left diagonal assert_get_move_is(self, self.player, self.board, 5, Board.O, "---|-X-|---") choice_mock.assert_called_once_with([5])
def test_get_move_values_returns_move_values_for_available_moves(self): self.player.values[get_board_state_tuple("XOO|---|-X-")] = 0.7 self.player.values[get_board_state_tuple("XO-|O--|-X-")] = 0.65 self.player.values[get_board_state_tuple("XO-|-O-|-X-")] = 0.75 self.player.values[get_board_state_tuple("XO-|--O|-X-")] = 0.62 self.player.values[get_board_state_tuple("XO-|---|OX-")] = 0.72 self.player.values[get_board_state_tuple("XO-|---|-XO")] = 0.67 assert_get_move_values_are(self, self.player, self.board, { 2: 0.7, 3: 0.65, 4: 0.75, 5: 0.62, 6: 0.72, 8: 0.67 }, Board.O, "XO-|---|-X-")
def test_get_num_states_returns_correct_num_of_states(self): self.player.values[get_board_state_tuple("---|-X-|---")] = 0.9 self.player.values[get_board_state_tuple("---|-X-|--O")] = 0.75 self.player.values[get_board_state_tuple("---|XX-|--O")] = 0.7 self.assertEqual(3, self.player.get_num_states())
def test_get_value_and_state_returns_current_value_if_symmetric_state_known( self): state1 = get_board_state_tuple("XO-|X--|---") state2 = get_board_state_tuple("XO-|X--|O--") self.player.values[state1] = 0.56 self.player.values[state2] = 0.45 # Original self.assert_get_value_and_state_symmetric_is(0.56, "XO-|X--|---", "XO-|X--|---", None, Board.X) self.assert_get_value_and_state_symmetric_is(0.45, "XO-|X--|O--", "XO-|X--|O--", None, Board.O) # Rotated by 90 degrees self.assert_get_value_and_state_symmetric_is(0.56, "XO-|X--|---", "---|O--|XX-", None, Board.X) self.assert_get_value_and_state_symmetric_is(0.45, "XO-|X--|O--", "---|O--|XXO", None, Board.O) # Rotated by 180 degrees self.assert_get_value_and_state_symmetric_is(0.56, "XO-|X--|---", "---|--X|-OX", None, Board.X) self.assert_get_value_and_state_symmetric_is(0.45, "XO-|X--|O--", "--O|--X|-OX", None, Board.O) # Rotated by 270 degrees self.assert_get_value_and_state_symmetric_is(0.56, "XO-|X--|---", "-XX|--O|---", None, Board.X) self.assert_get_value_and_state_symmetric_is(0.45, "XO-|X--|O--", "OXX|--O|---", None, Board.O) # Reflected horizontally self.assert_get_value_and_state_symmetric_is(0.56, "XO-|X--|---", "---|X--|XO-", None, Board.X) self.assert_get_value_and_state_symmetric_is(0.45, "XO-|X--|O--", "O--|X--|XO-", None, Board.O) # Reflected vertically self.assert_get_value_and_state_symmetric_is(0.56, "XO-|X--|---", "-OX|--X|---", None, Board.X) self.assert_get_value_and_state_symmetric_is(0.45, "XO-|X--|O--", "-OX|--X|--O", None, Board.O) # Reflected on left diagonal self.assert_get_value_and_state_symmetric_is(0.56, "XO-|X--|---", "XX-|O--|---", None, Board.X) self.assert_get_value_and_state_symmetric_is(0.45, "XO-|X--|O--", "XXO|O--|---", None, Board.O) # Reflected on right diagonal self.assert_get_value_and_state_symmetric_is(0.56, "XO-|X--|---", "---|--O|-XX", None, Board.X) self.assert_get_value_and_state_symmetric_is(0.45, "XO-|X--|O--", "---|--O|OXX", None, Board.O)