def testAgainstSecondMoveHeuristikAgentIn100Testgames(self): heuristicSearchAgentWins = 0 tdqAgent1000Wins = 0 for testGameCount in range(100): ttt = TicTacToe(4) tdqAgent1000 = TicTacToeTDQLearningAgent( TICTACTOE_4x4_TDQ_AGENT_1000_NAME, 4) while not ttt.is_terminal(): action = tdqAgent1000.suggestAction(ttt) print action ttt.make_move(action) if not ttt.is_terminal(): HeuristicSearchAgentTicTacToe.processAction(ttt) print ttt.printable_game_matrix() if ttt.is_victory() and ttt.get_player_which_moved_last() == 'X': tdqAgent1000Wins += 1 elif ttt.is_victory() and ttt.get_player_which_moved_last() == 'O': heuristicSearchAgentWins += 1 print 'Second move heuristic search agent wins: ' + str( heuristicSearchAgentWins ) + ' games against TD-Q-Agent-1000 in 16 field Tic Tac Toe!' print 'First move TD-Q-Agent-1000 wins: ' + str( tdqAgent1000Wins ) + ' games against heuristic search agent in 16 field Tic Tac Toe!' self.assertTrue(tdqAgent1000Wins >= 50)
def test_three_times_three_printable_game_matrix(self): action_sequenz = [(0, 0), (1, 0), (2, 0), (0, 2), (2, 2), (2, 1), (1, 1), (0, 1), (1, 2)] tictactoe = TicTacToe(3) tictactoe.initialize_game_matrix_with_action_sequence(action_sequenz) test_game_matrix_string = 'X | O | O\n---------\nO | X | X\n---------\nX | O | X\n' self.assertEqual(tictactoe.printable_game_matrix(), test_game_matrix_string)
def testAgainstFirstMoveRandomAgentIn100Testgames(self): randomAgentWins = 0 tdqAgent1000Wins = 0 for testGameCount in range(100): ttt = TicTacToe(3) tdqAgent1000 = TicTacToeTDQLearningAgent(TICTACTOE_3x3_TDQ_AGENT_1000_NAME, 3) while not ttt.is_terminal(): RandomAgent.processTicTacToeAction(ttt) if not ttt.is_terminal(): ttt.make_move(tdqAgent1000.suggestAction(ttt)) print ttt.printable_game_matrix() if ttt.is_victory() and ttt.get_player_which_moved_last() == 'X': randomAgentWins += 1 elif ttt.is_victory() and ttt.get_player_which_moved_last() == 'O': tdqAgent1000Wins += 1 print 'First Move random agent wins: ' + str( randomAgentWins) + ' games against TD-Q-Agent-1000 in 9 field Tic Tac Toe!' print 'Second Move TD-Q-Agent-1000 wins: ' + str( tdqAgent1000Wins) + ' games against random agent in 9 field Tic Tac Toe!' self.assertTrue(tdqAgent1000Wins >= 50)
def testAgainstFirstMoveRandomAgentIn9FieldTicTacToe(self): randomAgentWins = 0 heuristicSearchAgentWins = 0 for testGameCount in range(100): ttt = TicTacToe(3) while not ttt.is_terminal(): RandomAgent.processTicTacToeAction(ttt) if not ttt.is_terminal(): HeuristicSearchAgentTicTacToe.processAction(ttt) print ttt.printable_game_matrix() if ttt.is_victory() and ttt.get_player_which_moved_last() == 'X': randomAgentWins += 1 elif ttt.is_victory() and ttt.get_player_which_moved_last() == 'O': heuristicSearchAgentWins += 1 print 'First Move random agent wins: ' + str( randomAgentWins ) + ' games against heuristic search agent in 9 field Tic Tac Toe!' print 'Second Move heuristic search agent wins: ' + str( heuristicSearchAgentWins ) + ' games against random agent in 9 field Tic Tac Toe!' self.assertTrue(heuristicSearchAgentWins >= 60)
score += 0.1 if value == 3: score += 0.3 if self.__calculate_player_turn(state) == player_token: score += score * 2 return score def __calculate_player_turn(self, tictactoe_state=TicTacToe(4)): if tictactoe_state.count_of_game_tokens_in_game() % 2 == 0: return 'X' elif tictactoe_state.count_of_game_tokens_in_game() % 2 == 1: return 'O' # state = result(result(result(result(result(result(ttt_state, (0, 0)), (0, 1)), (0,2)), (1,0)), (1,2)), (1,1)) ttt_state = TicTacToe(4) zobrist_hasing = TicTacToeZobrist() pvs = PrincipalVariationSearch(zobrist_hasing) # ttt_state = pvs.result( # pvs.result(pvs.result(pvs.result(pvs.result(pvs.result(ttt_state, (2, 1)), (2, 0)), (1, 2)), (0, 0)), (2, 2)), # (1, 1)) #ttt_state = pvs.result(pvs.result(pvs.result(pvs.result(ttt_state, (2, 1)), (2, 0)), (1, 2)), (0, 0)) ttt_state = pvs.result(pvs.result(pvs.result(pvs.result(pvs.result(pvs.result(pvs.result(pvs.result(ttt_state, (1, 1)), (2, 0)), (2, 1)), (0, 0)), (0, 1)), (3, 1)), (2, 2)), (1, 0)) #ttt_state = pvs.result(pvs.result(ttt_state, (1, 1)), (2, 0)) # print ttt_state.printable_game_matrix() # time_before_funciton_call = time.time() # print pvs.zobrist_alpha_beta_search(ttt_state) # print 'Time in milliseconds: ' + str(int((time.time() - time_before_funciton_call) * 1000)) # print count print ttt_state.printable_game_matrix() print pvs.evaluate(ttt_state)
elif state.count_of_game_tokens_in_game() % 2 == 1: return 'O' def terminal_test(state): if state.is_victory(): return True elif not state.is_victory() and state.count_of_game_tokens_in_game( ) == state.get_maximal_amount_of_game_tokens(): return True else: return False def utility(state): if player(state) == 'X' and state.is_victory(): return -1 elif player(state) != 'X' and state.is_victory(): return 1 elif state.count_of_game_tokens_in_game( ) == state.get_maximal_amount_of_game_tokens() and not state.is_victory(): return 0 ttt = TicTacToe(4) ttt.make_move((1, 1)) ttt.make_move((2, 1)) ttt.make_move((2, 2)) ttt.make_move((1, 2)) print ttt.printable_game_matrix() print iterative_deepening(ttt, 1)