class TestDiagonalVictory(unittest.TestCase): def setUp(self): self.example_ticTacToe_game = TicTacToe(4) self.example_ticTacToe_game.make_move((0, 0)) self.example_ticTacToe_game.make_move((3, 0)) self.example_ticTacToe_game.make_move((1, 1)) self.example_ticTacToe_game.make_move((3, 1)) self.example_ticTacToe_game.make_move((2, 2)) self.example_ticTacToe_game.make_move((3, 2)) def test_diagonal_victory_top_left_to_bottom_right(self): self.example_ticTacToe_game.make_move((3, 3)) self.assertTrue(self.example_ticTacToe_game.is_diagonal_victory()) def test_digital_victory_top_right_to_bottom_left(self): example_tictactoe = TicTacToe(4) example_tictactoe.make_move((0, 3)) example_tictactoe.make_move((0, 0)) example_tictactoe.make_move((1, 2)) example_tictactoe.make_move((1, 0)) example_tictactoe.make_move((2, 1)) example_tictactoe.make_move((2, 0)) example_tictactoe.make_move((3, 0)) self.assertTrue(example_tictactoe.is_diagonal_victory()) def test_no_digital_victory(self): self.assertFalse(self.example_ticTacToe_game.is_vertical_victory())
def test_count_tokens_in_pure_connection_4(self): tictactoe = TicTacToe(4) action_sequence = [(0, 0), (3, 0), (1, 1), (2, 1), (2, 2), (1, 2), (3, 2), (0, 3)] tictactoe.initialize_game_matrix_with_action_sequence(action_sequence) self.assertEqual( 0, tictactoe.count_tokens_in_pure_connection((2, 2), (3, 2), 'X'))
def create_random_tictactoe_training_example(): tictactoe = TicTacToe(4) round_count = 0 target_value = '' action_sequence = [] while target_value == '': actions = tictactoe.get_possible_moves() random_action = randint(0, (len(actions) - 1)) action_sequence.append(actions[random_action]) if (round_count % 2) == 0: tictactoe.make_move(actions[random_action]) else: tictactoe.make_move(actions[random_action]) if round_count == 15 and not tictactoe.is_victory(): target_value = 'draw' if (round_count % 2) == 0 and tictactoe.is_victory(): target_value = 'win' if (round_count % 2) == 1 and tictactoe.is_victory(): target_value = 'lost' round_count += 1 return [action_sequence, target_value, tictactoe]
def setUp(self): self.tictactoe = TicTacToe(4) action_sequenz = [(0, 0), (3, 2), (0, 3), (1, 0), (1, 3), (2, 0), (3, 0), (0, 2), (3, 1), (2, 2), (2, 1), (1, 1), (0, 1), (1, 2)] self.tictactoe.initialize_game_matrix_with_action_sequence( action_sequenz)
def setUp(self): self.example_ticTacToe_game = TicTacToe(4) self.example_ticTacToe_game.make_move((0, 0)) self.example_ticTacToe_game.make_move((3, 0)) self.example_ticTacToe_game.make_move((1, 1)) self.example_ticTacToe_game.make_move((3, 1)) self.example_ticTacToe_game.make_move((2, 2)) self.example_ticTacToe_game.make_move((3, 2))
def test_three_times_three_printable_game_matrix(self): action_sequenz = [(0, 0), (1, 0), (2, 0), (0, 2), (2, 2), (2, 1), (1, 1), (0, 1), (1, 2)] tictactoe = TicTacToe(3) tictactoe.initialize_game_matrix_with_action_sequence(action_sequenz) test_game_matrix_string = 'X | O | O\n---------\nO | X | X\n---------\nX | O | X\n' self.assertEqual(tictactoe.printable_game_matrix(), test_game_matrix_string)
def test_initialize_game_matrix_with_action_sequenz(self): action_sequenz = [(0, 0), (3, 2), (0, 3), (1, 0), (1, 3), (2, 0), (3, 0), (0, 2), (3, 1), (2, 2), (2, 1), (1, 1), (0, 1), (1, 2)] tictactoe = TicTacToe(4) tictactoe.initialize_game_matrix_with_action_sequence(action_sequenz) expected_game_matrix = np.matrix([['X', 'X', 'O', 'X'], ['O', 'O', 'O', 'X'], ['O', 'X', 'O', ' '], ['X', 'X', 'O', ' ']]) self.assertTrue((expected_game_matrix == tictactoe.game_matrix).all())
def getRandomNonTerminalTicTacToeState(): ttt = TicTacToe(4) randomDepth = randint(0, 15) for depth in range(randomDepth): if not ttt.is_victory(): ttt.make_move(getRandomAction(ttt.get_possible_moves())) if ttt.is_victory(): ttt.undo_move() return ttt
def test_10_possible_moves(self): tictactoe = TicTacToe(4) tictactoe.make_move((0, 0)) tictactoe.make_move((0, 1)) tictactoe.make_move((0, 2)) tictactoe.make_move((0, 3)) tictactoe.make_move((1, 0)) tictactoe.make_move((2, 0)) self.assertEqual([(1, 1), (1, 2), (1, 3), (2, 1), (2, 2), (2, 3), (3, 0), (3, 1), (3, 2), (3, 3)], tictactoe.get_possible_moves())
def test_undo_move_2(self): tictactoe = TicTacToe(4) action_sequence = [(0, 0), (3, 0), (1, 1), (2, 1), (2, 2), (1, 2), (3, 2), (0, 3)] tictactoe.initialize_game_matrix_with_action_sequence(action_sequence) tictactoe.undo_move() tictactoe.undo_move() tictactoe.undo_move() tictactoe.undo_move() tictactoe.undo_move() expected_game_matrix = np.matrix([['X', ' ', ' ', ' '], [' ', 'X', ' ', ' '], [' ', ' ', ' ', ' '], ['O', ' ', ' ', ' ']]) self.assertTrue((expected_game_matrix == tictactoe.game_matrix).all())
class TestVerticalVictory(unittest.TestCase): def setUp(self): self.example_ticTacToe_game = TicTacToe(4) self.example_ticTacToe_game.make_move((0, 0)) self.example_ticTacToe_game.make_move((3, 3)) self.example_ticTacToe_game.make_move((1, 0)) self.example_ticTacToe_game.make_move((2, 3)) self.example_ticTacToe_game.make_move((2, 0)) self.example_ticTacToe_game.make_move((1, 3)) def test_vertical_victory(self): self.example_ticTacToe_game.make_move((3, 0)) self.assertTrue(self.example_ticTacToe_game.is_vertical_victory()) def test_no_vertical_victory(self): self.assertFalse(self.example_ticTacToe_game.is_vertical_victory())
def qLearnIteration(self, sPrime, rPrime, alpha, gamma): """The function will learn and update a Q-Function. The Q-Function is a database and will represent a learned TD-Q strategy. Parameters ---------- sPrime : TicTacToe A Tic Tac Toe game situation. rPrime : float A reward for an action in a state. alpha : float The learning rate for stronger or weaker TD update of Q-Values. gamma: float The discounting factor for future rewards. Returns ------- tuple An action tuple which should be explored next.""" if sPrime.is_terminal(): self.insertActionValueInQ(sPrime, None, rPrime) if self.__s is not None: qActionValue = self.getActionValueFromQ(self.__s, self.__a) maxActionValue = self.maxActionValueForAllActionsInSPrime(sPrime) qValueUpdate = qActionValue + alpha * (self.__r + gamma * maxActionValue - qActionValue) self.insertActionValueInQ(self.__s, self.__a, qValueUpdate) logging.info('\nUpdate Q-Value:') logging.info( 'Q(' + str(self.__zobristHash.get_hash(self.__s.game_matrix)) + ', ' + str(self.__a) + ') <-- ' + str( qActionValue) + ' + ' + str(alpha) + '(' + str(self.__r) + ' + ' + str(gamma) + ' * ' + str( maxActionValue) + ' - ' + str(qActionValue) + ') = ' + str(qValueUpdate)) logging.info('\n' + sPrime.printable_game_matrix()) logging.info(self.__zobristHash.get_hash(sPrime.game_matrix)) else: self.__s = TicTacToe(self.__dimension) self.__s.initialize_game_with_another_game(sPrime) self.__a = self.explorationStrategy(sPrime) self.__r = rPrime return self.__a
def alpha_beta_search(state=TicTacToe(4)): list_of_actions = actions(state) list_of_action_utilities = [] for action in list_of_actions: state.make_move(action) list_of_action_utilities.append(max_value(state, -sys.maxint, sys.maxint)) state.undo_move() if player(state) == 'X': best_action_index = argmax(list_of_action_utilities) else: best_action_index = argmin(list_of_action_utilities) return list_of_actions[best_action_index], list_of_action_utilities[best_action_index]
def testAgainstFirstMoveRandomAgentIn9FieldTicTacToe(self): randomAgentWins = 0 heuristicSearchAgentWins = 0 for testGameCount in range(100): ttt = TicTacToe(3) while not ttt.is_terminal(): RandomAgent.processTicTacToeAction(ttt) if not ttt.is_terminal(): HeuristicSearchAgentTicTacToe.processAction(ttt) print ttt.printable_game_matrix() if ttt.is_victory() and ttt.get_player_which_moved_last() == 'X': randomAgentWins += 1 elif ttt.is_victory() and ttt.get_player_which_moved_last() == 'O': heuristicSearchAgentWins += 1 print 'First Move random agent wins: ' + str( randomAgentWins ) + ' games against heuristic search agent in 9 field Tic Tac Toe!' print 'Second Move heuristic search agent wins: ' + str( heuristicSearchAgentWins ) + ' games against random agent in 9 field Tic Tac Toe!' self.assertTrue(heuristicSearchAgentWins >= 60)
def learnTictactoe(self, gamesToPlay): """Represents the self play and learning mode of the TD-Q-Agent. Parameters ---------- gamesToPlay : int The amount of training games to play against itself.""" for gameCount in range(gamesToPlay): ttt = TicTacToe(self.__dimension) logging.info('Learning against itself game: ' + str(gameCount)) while not ttt.is_terminal(): suggestedAction = self.qLearnIteration(ttt, ttt.getReward(), 0.4, 1) ttt.make_move(suggestedAction) if ttt.is_terminal(): self.qLearnIteration(ttt, ttt.getReward(), 0.4, 1) self.__s = None self.__a = None self.__r = None if gameCount % 100 == 0: self.__random_factor += 1
class TestConnectionAnalysis(unittest.TestCase): def setUp(self): self.tictactoe = TicTacToe(4) action_sequenz = [(0, 0), (3, 2), (0, 3), (1, 0), (1, 3), (2, 0), (3, 0), (0, 2), (3, 1), (2, 2), (2, 1), (1, 1), (0, 1), (1, 2)] self.tictactoe.initialize_game_matrix_with_action_sequence( action_sequenz) def test_get_valid_positions_by_two_positions_1(self): self.assertEqual([(0, 0), (1, 1), (2, 2), (3, 3)], self.tictactoe. get_victory_relevant_positions_by_two_given_positions( (0, 0), (1, 1))) def test_get_valid_positions_by_two_positions_2(self): self.assertEqual([(3, 0), (2, 1), (1, 2), (0, 3)], self.tictactoe. get_victory_relevant_positions_by_two_given_positions( (3, 0), (1, 2))) def test_get_valid_positions_by_two_positions_3(self): self.assertEqual([(1, 0), (1, 1), (1, 2), (1, 3)], self.tictactoe. get_victory_relevant_positions_by_two_given_positions( (1, 0), (1, 2))) def test_get_valid_positions_by_two_positions_4(self): self.assertEqual([(0, 1), (1, 1), (2, 1), (3, 1)], self.tictactoe. get_victory_relevant_positions_by_two_given_positions( (2, 1), (0, 1))) def test_is_connection_pure_1(self): self.assertTrue(self.tictactoe.is_connection_pure((0, 3), (1, 3), 'X')) def test_is_connection_pure_2(self): self.assertTrue(self.tictactoe.is_connection_pure((0, 3), (3, 3), 'X')) def test_is_connection_pure_3(self): self.assertFalse(self.tictactoe.is_connection_pure((1, 1), (1, 2), 'O')) def test_is_connection_pure_4(self): self.assertFalse(self.tictactoe.is_connection_pure((1, 1), (1, 2), 'X')) def test_is_connection_pure_5(self): self.assertTrue(self.tictactoe.is_connection_pure((1, 2), (3, 2), 'O')) def test_is_connection_pure_6(self): self.assertFalse(self.tictactoe.is_connection_pure((1, 1), (2, 2), 'O')) def test_is_connection_pure_7(self): tictactoe = TicTacToe(4) action_sequence = [(0, 0), (3, 0), (1, 1), (2, 1), (2, 2), (1, 2), (3, 3)] tictactoe.initialize_game_matrix_with_action_sequence(action_sequence) self.assertTrue(tictactoe.is_connection_pure((1, 1), (2, 2), 'X')) def test_is_connection_pure_8(self): tictactoe = TicTacToe(4) action_sequence = [(0, 0), (3, 0), (1, 1), (2, 1), (2, 2), (1, 2), (3, 2), (0, 3)] tictactoe.initialize_game_matrix_with_action_sequence(action_sequence) self.assertTrue(tictactoe.is_connection_pure((3, 0), (1, 2), 'O')) def test_count_tokens_in_pure_connection_1(self): tictactoe = TicTacToe(4) action_sequence = [(0, 0), (3, 0), (1, 1), (2, 1), (2, 2), (1, 2), (3, 2), (0, 3)] tictactoe.initialize_game_matrix_with_action_sequence(action_sequence) self.assertEqual( 3, tictactoe.count_tokens_in_pure_connection((0, 0), (1, 1), 'X')) def test_count_tokens_in_pure_connection_2(self): tictactoe = TicTacToe(4) action_sequence = [(0, 0), (3, 0), (1, 1), (2, 1), (2, 2), (1, 2), (3, 2), (0, 3)] tictactoe.initialize_game_matrix_with_action_sequence(action_sequence) self.assertEqual( 4, tictactoe.count_tokens_in_pure_connection((3, 0), (1, 2), 'O')) def test_count_tokens_in_pure_connection_3(self): tictactoe = TicTacToe(4) action_sequence = [(0, 0), (3, 0), (1, 1), (2, 1), (2, 2), (1, 2), (3, 2), (0, 3)] tictactoe.initialize_game_matrix_with_action_sequence(action_sequence) self.assertEqual( 0, tictactoe.count_tokens_in_pure_connection((0, 0), (3, 0), 'O')) def test_count_tokens_in_pure_connection_4(self): tictactoe = TicTacToe(4) action_sequence = [(0, 0), (3, 0), (1, 1), (2, 1), (2, 2), (1, 2), (3, 2), (0, 3)] tictactoe.initialize_game_matrix_with_action_sequence(action_sequence) self.assertEqual( 0, tictactoe.count_tokens_in_pure_connection((2, 2), (3, 2), 'X'))
class TestVictory(unittest.TestCase): def setUp(self): self.example_ticTacToe_game = TicTacToe(4) self.example_ticTacToe_game.make_move((0, 0)) self.example_ticTacToe_game.make_move((0, 1)) self.example_ticTacToe_game.make_move((0, 2)) self.example_ticTacToe_game.make_move((0, 3)) self.example_ticTacToe_game.make_move((1, 0)) self.example_ticTacToe_game.make_move((2, 0)) self.example_ticTacToe_game.make_move((1, 1)) self.example_ticTacToe_game.make_move((2, 1)) self.example_ticTacToe_game.make_move((1, 2)) self.example_ticTacToe_game.make_move((3, 0)) self.example_ticTacToe_game.make_move((2, 2)) self.example_ticTacToe_game.make_move((3, 1)) def test_no_victory(self): self.assertFalse(self.example_ticTacToe_game.is_victory()) def test_only_horizontal_victory(self): self.example_ticTacToe_game.make_move((1, 3)) self.assertTrue(self.example_ticTacToe_game.is_victory()) def test_only_vertical_victory(self): self.example_ticTacToe_game.make_move((3, 2)) self.assertTrue(self.example_ticTacToe_game.is_victory()) def test_only_diagonal_victory(self): self.example_ticTacToe_game.make_move((3, 3)) self.assertTrue(self.example_ticTacToe_game.is_victory())
def test_digital_victory_top_right_to_bottom_left(self): example_tictactoe = TicTacToe(4) example_tictactoe.make_move((0, 3)) example_tictactoe.make_move((0, 0)) example_tictactoe.make_move((1, 2)) example_tictactoe.make_move((1, 0)) example_tictactoe.make_move((2, 1)) example_tictactoe.make_move((2, 0)) example_tictactoe.make_move((3, 0)) self.assertTrue(example_tictactoe.is_diagonal_victory())
def __calculate_player_turn(self, tictactoe_state=TicTacToe(4)): if tictactoe_state.count_of_game_tokens_in_game() % 2 == 0: return 'X' elif tictactoe_state.count_of_game_tokens_in_game() % 2 == 1: return 'O'
def getRandomTerminalTicTacToeState(): ttt = TicTacToe(4) while not isTicTacToeStateTerminal(ttt): ttt.make_move(getRandomAction(ttt.get_possible_moves())) return ttt
def test_16_possible_moves(self): tictactoe = TicTacToe(4) self.assertEqual([(0, 0), (0, 1), (0, 2), (0, 3), (1, 0), (1, 1), (1, 2), (1, 3), (2, 0), (2, 1), (2, 2), (2, 3), (3, 0), (3, 1), (3, 2), (3, 3)], tictactoe.get_possible_moves())
def result(state, action): copy_state = TicTacToe(3) copy_state.initialize_game_with_another_game(state) copy_state.make_move(action) return copy_state
def test_initialize_4x4_game_matrix(self): tictactoe = TicTacToe(4) test_matrix = np.matrix([[' ', ' ', ' ', ' '], [' ', ' ', ' ', ' '], [' ', ' ', ' ', ' '], [' ', ' ', ' ', ' ']]) self.assertTrue((test_matrix == tictactoe.game_matrix).all())
def testAgainstSecondMoveHeuristikAgentIn100Testgames(self): heuristicSearchAgentWins = 0 tdqAgent100Wins = 0 for testGameCount in range(100): ttt = TicTacToe(3) tdqAgent100 = TicTacToeTDQLearningAgent(TICTACTOE_3x3_TDQ_AGENT_100_NAME, 3) while not ttt.is_terminal(): action = tdqAgent100.suggestAction(ttt) print action ttt.make_move(action) if not ttt.is_terminal(): HeuristicSearchAgentTicTacToe.processAction(ttt) print ttt.printable_game_matrix() if ttt.is_victory() and ttt.get_player_which_moved_last() == 'X': tdqAgent100Wins += 1 elif ttt.is_victory() and ttt.get_player_which_moved_last() == 'O': heuristicSearchAgentWins += 1 print 'Second move heuristic search agent wins: ' + str( heuristicSearchAgentWins) + ' games against TD-Q-Agent-100 in 9 field Tic Tac Toe!' print 'First move TD-Q-Agent-100 wins: ' + str( tdqAgent100Wins) + ' games against heuristic search agent in 9 field Tic Tac Toe!' self.assertTrue(tdqAgent100Wins >= 50)
def testAgainstFirstMoveRandomAgentIn100Testgames(self): randomAgentWins = 0 tdqAgent1000Wins = 0 for testGameCount in range(100): ttt = TicTacToe(3) tdqAgent1000 = TicTacToeTDQLearningAgent(TICTACTOE_3x3_TDQ_AGENT_1000_NAME, 3) while not ttt.is_terminal(): RandomAgent.processTicTacToeAction(ttt) if not ttt.is_terminal(): ttt.make_move(tdqAgent1000.suggestAction(ttt)) print ttt.printable_game_matrix() if ttt.is_victory() and ttt.get_player_which_moved_last() == 'X': randomAgentWins += 1 elif ttt.is_victory() and ttt.get_player_which_moved_last() == 'O': tdqAgent1000Wins += 1 print 'First Move random agent wins: ' + str( randomAgentWins) + ' games against TD-Q-Agent-1000 in 9 field Tic Tac Toe!' print 'Second Move TD-Q-Agent-1000 wins: ' + str( tdqAgent1000Wins) + ' games against random agent in 9 field Tic Tac Toe!' self.assertTrue(tdqAgent1000Wins >= 50)
def test_is_connection_pure_8(self): tictactoe = TicTacToe(4) action_sequence = [(0, 0), (3, 0), (1, 1), (2, 1), (2, 2), (1, 2), (3, 2), (0, 3)] tictactoe.initialize_game_matrix_with_action_sequence(action_sequence) self.assertTrue(tictactoe.is_connection_pure((3, 0), (1, 2), 'O'))
return 'O' def terminal_test(state): if state.is_victory(): return True elif not state.is_victory() and state.count_of_game_tokens_in_game() == state.get_maximal_amount_of_game_tokens(): return True else: return False def utility(state): if player(state) == 'X' and state.is_victory(): return -1 elif player(state) != 'X' and state.is_victory(): return 1 elif state.count_of_game_tokens_in_game() == state.get_maximal_amount_of_game_tokens() and not state.is_victory(): return 0 ttt_state = TicTacToe(3) #state = result(result(result(result(result(result(ttt_state, (0, 0)), (0, 1)), (0,2)), (1,0)), (2, 2)), (1, 2)) #state = result(result(result(result(result(result(ttt_state, (0, 0)), (0, 1)), (0,2)), (1,0)), (1, 2)), (2, 0)) #state = result(result(result(result(ttt_state, (2, 1)), (2, 0)), (1, 2)), (0, 0)) state = result(result(ttt_state, (1, 1)), (2, 0)) print state.printable_game_matrix() time_before_funciton_call = time.time() print mini_max_decision(state) print 'Time in milliseconds: ' + str(int((time.time() - time_before_funciton_call) * 1000)) print count
def test_undo_move_3(self): tictactoe = TicTacToe(4) tictactoe.make_move((0, 0)) tictactoe.make_move((1, 0)) tictactoe.make_move((0, 1)) tictactoe.make_move((2, 0)) tictactoe.make_move((0, 3)) tictactoe.undo_move() tictactoe.undo_move() action_sequence = [(0, 0), (1, 0), (0, 1)] expected_tictactoe_game = TicTacToe(4) expected_tictactoe_game.initialize_game_matrix_with_action_sequence( action_sequence) self.assertTrue( (expected_tictactoe_game.game_matrix == tictactoe.game_matrix ).all())
def processAction(ticTacToeState=TicTacToe(4)): action = alpha_beta_iterative_deepening_search(ticTacToeState) ticTacToeState.make_move(action[0])
class TestMakeMove(unittest.TestCase): def setUp(self): self.ticTacToe = TicTacToe(4) self.example_ticTacToe_game = TicTacToe(4) self.example_ticTacToe_game.make_move((0, 0)) self.example_ticTacToe_game.make_move((3, 3)) self.example_ticTacToe_game.make_move((0, 1)) self.example_ticTacToe_game.make_move((2, 2)) self.example_ticTacToe_game.make_move((0, 2)) self.example_ticTacToe_game.make_move((1, 1)) self.example_ticTacToe_game.make_move((0, 3)) def test_put_seven_game_tokens(self): self.ticTacToe.make_move((0, 0)) self.ticTacToe.make_move((3, 3)) self.ticTacToe.make_move((0, 1)) self.ticTacToe.make_move((2, 2)) self.ticTacToe.make_move((0, 2)) self.ticTacToe.make_move((1, 1)) self.ticTacToe.make_move((0, 3)) equality_matrix = self.example_ticTacToe_game.game_matrix == self.ticTacToe.game_matrix self.assertTrue(equality_matrix.all()) def test_if_value_not_changes_when_position_is_taken(self): copy_example_ticTacToe_game = self.example_ticTacToe_game copy_example_ticTacToe_game.make_move((0, 0)) copy_example_ticTacToe_game.make_move((3, 3)) copy_example_ticTacToe_game.make_move((0, 1)) copy_example_ticTacToe_game.make_move((2, 2)) self.assertTrue((copy_example_ticTacToe_game.game_matrix == self.example_ticTacToe_game.game_matrix).all())