def run(self): if self.opponent == "RandomAgent": log_dir1 = self.log_dir_root + "DeepQLearningAgent_VS_RandomAgent_" + self.time print(log_dir1) print( TensorboardTicTacToeRunner( DeepQLearningAgent(9, 9), RandomAgent(), log_and_reset_score_history_threshold=10000, log_dir=log_dir1).run(100000000)) elif self.opponent == "TabularQLearningAgent": log_dir2 = self.log_dir_root + "DeepQLearningAgent_VS_TabularQLearningAgent_" + self.time print(log_dir2) print( TensorboardTicTacToeRunner( DeepQLearningAgent(9, 9), TabularQLearningAgent(), log_and_reset_score_history_threshold=10000, log_dir=log_dir2).run(100000000)) elif self.opponent == "DeepQLearningAgent": log_dir3 = self.log_dir_root + "DeepQLearningAgent_VS_DeepQLearningAgent_" + self.time print(log_dir3) print( TensorboardTicTacToeRunner( DeepQLearningAgent(9, 9), DeepQLearningAgent(9, 9), log_and_reset_score_history_threshold=10000, log_dir=log_dir3).run(100000000)) elif self.opponent == "ReinforceClassicAgent": log_dir4 = self.log_dir_root + "DeepQLearningAgent_VS_ReinforceClassicAgent_" + self.time print(log_dir4) print( TensorboardTicTacToeRunner( DeepQLearningAgent(9, 9), ReinforceClassicAgent(9, 9), log_and_reset_score_history_threshold=10000, log_dir=log_dir4).run(100000000)) elif self.opponent == "ReinforceClassicWithMultipleTrajectoriesAgent": log_dir5 = self.log_dir_root + "DeepQLearningAgent_VS_ReinforceClassicWithMultipleTrajectoriesAgent_" + self.time print(log_dir5) print( TensorboardTicTacToeRunner( DeepQLearningAgent(9, 9), ReinforceClassicWithMultipleTrajectoriesAgent(9, 9), log_and_reset_score_history_threshold=10000, log_dir=log_dir5).run(100000000)) elif self.opponent == "PPOWithMultipleTrajectoriesMultiOutputsAgent": log_dir6 = self.log_dir_root + "DeepQLearningAgent_VS_PPOWithMultipleTrajectoriesMultiOutputsAgent_" + self.time print(log_dir6) print( TensorboardTicTacToeRunner( DeepQLearningAgent(9, 9), PPOWithMultipleTrajectoriesMultiOutputsAgent(9, 9), log_and_reset_score_history_threshold=10000, log_dir=log_dir6).run(100000000)) elif self.opponent == "MOISMCTSWithRandomRolloutsAgent": log_dir7 = self.log_dir_root + "DeepQLearningAgent_VS_MOISMCTSWithRandomRolloutsAgent_" + self.time print(log_dir7) print( TensorboardTicTacToeRunner( DeepQLearningAgent(9, 9), MOISMCTSWithRandomRolloutsAgent( 100, SafeTicTacToeRunner(RandomAgent(), RandomAgent())), log_and_reset_score_history_threshold=10000, log_dir=log_dir7).run(1000000000)) elif self.opponent == "MOISMCTSWithRandomRolloutsExpertThenApprenticeAgent": log_dir8 = self.log_dir_root + "DeepQLearningAgent_VS_MOISMCTSWithRandomRolloutsExpertThenApprenticeAgent_" + self.time print(log_dir8) print( TensorboardTicTacToeRunner( DeepQLearningAgent(9, 9), MOISMCTSWithRandomRolloutsExpertThenApprenticeAgent( 100, SafeTicTacToeRunner(RandomAgent(), RandomAgent()), 9, 9), log_and_reset_score_history_threshold=10000, log_dir=log_dir8).run(1000000000)) elif self.opponent == "MOISMCTSWithValueNetworkAgent": log_dir9 = self.log_dir_root + "DeepQLearningAgent_VS_MOISMCTSWithValueNetworkAgent_" + self.time print(log_dir9) print( TensorboardTicTacToeRunner( DeepQLearningAgent(9, 9), MOISMCTSWithValueNetworkAgent( 100, SafeTicTacToeRunner(RandomAgent(), RandomAgent())), log_and_reset_score_history_threshold=10000, log_dir=log_dir9).run(1000000000)) elif self.opponent == "DoubleQLearningAgent": log_dir10 = self.log_dir_root + "DeepQLearningAgent_VS_DoubleQLearningAgent_" + self.time print(log_dir10) print( TensorboardTicTacToeRunner( DeepQLearningAgent(9, 9), DoubleQLearningAgent(), log_and_reset_score_history_threshold=10000, log_dir=log_dir10).run(1000000000)) elif self.opponent == "RandomRolloutAgent": nb_rollouts = 3 log_dir11 = self.log_dir_root + "DeepQLearningAgent_VS_RandomRolloutAgent(" + str( nb_rollouts) + ")_" + self.time print(log_dir11) print( TensorboardTicTacToeRunner( DeepQLearningAgent(9, 9), RandomRolloutAgent( nb_rollouts, SafeTicTacToeRunner(RandomAgent(), RandomAgent())), log_and_reset_score_history_threshold=10000, log_dir=log_dir11).run(1000000000)) else: print("Unknown opponent")
if self.print_and_reset_score_history_threshold is not None and \ round_id % self.print_and_reset_score_history_threshold == 0: print(score_history / self.print_and_reset_score_history_threshold) if self.prev_history is not None and \ score_history[0] == self.prev_history[0] and \ score_history[1] == self.prev_history[1] and \ score_history[2] == self.prev_history[2]: self.stuck_on_same_score += 1 else: self.prev_history = score_history self.stuck_on_same_score = 0 if (self. replace_player1_with_commandline_after_similar_results is not None and self.stuck_on_same_score >= self. replace_player1_with_commandline_after_similar_results ): self.agents = (CommandLineAgent(), self.agents[1]) self.stuck_on_same_score = 0 return tuple(score_history) if __name__ == "__main__": print("MOISMCTSWithRandomRolloutsAgent VS RandomAgent") print( BasicTicTacToeRunner( MOISMCTSWithRandomRolloutsAgent( 100, SafeTicTacToeRunner(RandomAgent(), RandomAgent())), RandomAgent(), print_and_reset_score_history_threshold=1000).run(1000))