Beispiel #1
0
 def run(self):
     if self.opponent == "RandomAgent":
         log_dir1 = self.log_dir_root + "DeepQLearningAgent_VS_RandomAgent_" + self.time
         print(log_dir1)
         print(
             TensorboardTicTacToeRunner(
                 DeepQLearningAgent(9, 9),
                 RandomAgent(),
                 log_and_reset_score_history_threshold=10000,
                 log_dir=log_dir1).run(100000000))
     elif self.opponent == "TabularQLearningAgent":
         log_dir2 = self.log_dir_root + "DeepQLearningAgent_VS_TabularQLearningAgent_" + self.time
         print(log_dir2)
         print(
             TensorboardTicTacToeRunner(
                 DeepQLearningAgent(9, 9),
                 TabularQLearningAgent(),
                 log_and_reset_score_history_threshold=10000,
                 log_dir=log_dir2).run(100000000))
     elif self.opponent == "DeepQLearningAgent":
         log_dir3 = self.log_dir_root + "DeepQLearningAgent_VS_DeepQLearningAgent_" + self.time
         print(log_dir3)
         print(
             TensorboardTicTacToeRunner(
                 DeepQLearningAgent(9, 9),
                 DeepQLearningAgent(9, 9),
                 log_and_reset_score_history_threshold=10000,
                 log_dir=log_dir3).run(100000000))
     elif self.opponent == "ReinforceClassicAgent":
         log_dir4 = self.log_dir_root + "DeepQLearningAgent_VS_ReinforceClassicAgent_" + self.time
         print(log_dir4)
         print(
             TensorboardTicTacToeRunner(
                 DeepQLearningAgent(9, 9),
                 ReinforceClassicAgent(9, 9),
                 log_and_reset_score_history_threshold=10000,
                 log_dir=log_dir4).run(100000000))
     elif self.opponent == "ReinforceClassicWithMultipleTrajectoriesAgent":
         log_dir5 = self.log_dir_root + "DeepQLearningAgent_VS_ReinforceClassicWithMultipleTrajectoriesAgent_" + self.time
         print(log_dir5)
         print(
             TensorboardTicTacToeRunner(
                 DeepQLearningAgent(9, 9),
                 ReinforceClassicWithMultipleTrajectoriesAgent(9, 9),
                 log_and_reset_score_history_threshold=10000,
                 log_dir=log_dir5).run(100000000))
     elif self.opponent == "PPOWithMultipleTrajectoriesMultiOutputsAgent":
         log_dir6 = self.log_dir_root + "DeepQLearningAgent_VS_PPOWithMultipleTrajectoriesMultiOutputsAgent_" + self.time
         print(log_dir6)
         print(
             TensorboardTicTacToeRunner(
                 DeepQLearningAgent(9, 9),
                 PPOWithMultipleTrajectoriesMultiOutputsAgent(9, 9),
                 log_and_reset_score_history_threshold=10000,
                 log_dir=log_dir6).run(100000000))
     elif self.opponent == "MOISMCTSWithRandomRolloutsAgent":
         log_dir7 = self.log_dir_root + "DeepQLearningAgent_VS_MOISMCTSWithRandomRolloutsAgent_" + self.time
         print(log_dir7)
         print(
             TensorboardTicTacToeRunner(
                 DeepQLearningAgent(9, 9),
                 MOISMCTSWithRandomRolloutsAgent(
                     100,
                     SafeTicTacToeRunner(RandomAgent(), RandomAgent())),
                 log_and_reset_score_history_threshold=10000,
                 log_dir=log_dir7).run(1000000000))
     elif self.opponent == "MOISMCTSWithRandomRolloutsExpertThenApprenticeAgent":
         log_dir8 = self.log_dir_root + "DeepQLearningAgent_VS_MOISMCTSWithRandomRolloutsExpertThenApprenticeAgent_" + self.time
         print(log_dir8)
         print(
             TensorboardTicTacToeRunner(
                 DeepQLearningAgent(9, 9),
                 MOISMCTSWithRandomRolloutsExpertThenApprenticeAgent(
                     100,
                     SafeTicTacToeRunner(RandomAgent(), RandomAgent()),
                     9, 9),
                 log_and_reset_score_history_threshold=10000,
                 log_dir=log_dir8).run(1000000000))
     elif self.opponent == "MOISMCTSWithValueNetworkAgent":
         log_dir9 = self.log_dir_root + "DeepQLearningAgent_VS_MOISMCTSWithValueNetworkAgent_" + self.time
         print(log_dir9)
         print(
             TensorboardTicTacToeRunner(
                 DeepQLearningAgent(9, 9),
                 MOISMCTSWithValueNetworkAgent(
                     100,
                     SafeTicTacToeRunner(RandomAgent(), RandomAgent())),
                 log_and_reset_score_history_threshold=10000,
                 log_dir=log_dir9).run(1000000000))
     elif self.opponent == "DoubleQLearningAgent":
         log_dir10 = self.log_dir_root + "DeepQLearningAgent_VS_DoubleQLearningAgent_" + self.time
         print(log_dir10)
         print(
             TensorboardTicTacToeRunner(
                 DeepQLearningAgent(9, 9),
                 DoubleQLearningAgent(),
                 log_and_reset_score_history_threshold=10000,
                 log_dir=log_dir10).run(1000000000))
     elif self.opponent == "RandomRolloutAgent":
         nb_rollouts = 3
         log_dir11 = self.log_dir_root + "DeepQLearningAgent_VS_RandomRolloutAgent(" + str(
             nb_rollouts) + ")_" + self.time
         print(log_dir11)
         print(
             TensorboardTicTacToeRunner(
                 DeepQLearningAgent(9, 9),
                 RandomRolloutAgent(
                     nb_rollouts,
                     SafeTicTacToeRunner(RandomAgent(), RandomAgent())),
                 log_and_reset_score_history_threshold=10000,
                 log_dir=log_dir11).run(1000000000))
     else:
         print("Unknown opponent")
                if self.print_and_reset_score_history_threshold is not None and \
                        round_id % self.print_and_reset_score_history_threshold == 0:
                    print(score_history /
                          self.print_and_reset_score_history_threshold)
                    if self.prev_history is not None and \
                            score_history[0] == self.prev_history[0] and \
                            score_history[1] == self.prev_history[1] and \
                            score_history[2] == self.prev_history[2]:
                        self.stuck_on_same_score += 1
                    else:
                        self.prev_history = score_history
                        self.stuck_on_same_score = 0
                    if (self.
                            replace_player1_with_commandline_after_similar_results
                            is not None and self.stuck_on_same_score >= self.
                            replace_player1_with_commandline_after_similar_results
                        ):
                        self.agents = (CommandLineAgent(), self.agents[1])
                        self.stuck_on_same_score = 0
        return tuple(score_history)


if __name__ == "__main__":
    print("MOISMCTSWithRandomRolloutsAgent VS RandomAgent")
    print(
        BasicTicTacToeRunner(
            MOISMCTSWithRandomRolloutsAgent(
                100, SafeTicTacToeRunner(RandomAgent(), RandomAgent())),
            RandomAgent(),
            print_and_reset_score_history_threshold=1000).run(1000))