def test_BlockMid(self): player1 = ttt_players.ExperiencedPlayer(block_mid=True) player2 = ttt_players.ExperiencedPlayer(block_mid=False) simulation = TicTacToe([player1, player2]) results, losses = simulation.run_simulations(self.TEST_EPISODES) print("Average Result Experienced(block) vs Experienced(): %s" % np.mean(results))
def run(self, lr, milestones=False, silent=False): self.player1 = self.pretrained_player if self.pretrained_player else FCBaseLinePlayer( lr=lr) # Player 2 has the same start conditions as Player 1 but does not train self.player2 = self.player1.copy(shared_weights=True) self.player2.strategy.train = False self.simulation = TicTacToe([ self.player1, self.player2 ]) # Players never change, therefore we only need one simulation games_per_evaluation = self.games // self.evaluations start_time = datetime.now() for episode in range(1, self.evaluations + 1): # train self.player1.strategy.train, self.player1.strategy.model.training = True, True # training mode # If milestones exist, use them with probability p if self.milestones and random() < 0.2: self.player2 = choice(self.milestones) else: self.player2 = self.player1.copy(shared_weights=True) # train self.player1.strategy.train, self.player1.strategy.model.training = True, True # training mode self.player2.strategy.train = False results, losses = self.simulation.run_simulations( games_per_evaluation) self.add_loss(np.mean(losses)) self.add_results(("Self", np.mean(results))) # evaluate self.player1.strategy.train, self.player1.strategy.model.training = False, False # eval mode score, results, overview = evaluate_against_base_players( self.player1) self.add_results(results) if not silent and Printer.print_episode( episode * games_per_evaluation, self.games, datetime.now() - start_time): self.plot_and_save( "%s vs SELF" % (self.player1.__str__() + (" milestones" if milestones else "")), "Train %s vs Self version of self\nGames: %s Evaluations: %s\nTime: %s" % (self.player1, episode * games_per_evaluation, self.evaluations, config.time_diff(start_time))) # If x/5th of training is completed, save milestone if milestones and (self.games / episode * games_per_evaluation) % 5 == 0: self.milestones.append(self.player1.copy(shared_weights=False)) self.milestones[-1].strategy.train = False self.final_score, self.final_results, self.results_overview = evaluate_against_base_players( self.player1, silent=False) return self
def evaluate_against_base_players(player, evaluation_players=[ RandomPlayer(), NovicePlayer(), ExperiencedPlayer(), ExpertPlayer() ], silent=True): """ Standardized evaluation against base players. :param player: The player to be evaluated :param evaluation_players: A list of players against which the player should be evaluated :param silent: Flag controlling if output is written to console :return: a tuple (score, results) where score is the average score over all evaluation games (scalar (-1, 1)) and results is a list of tuples (name, score) where 'score' is the score (-1, 1) achieved when evaluated against the player named 'name'. """ # Store original training values if issubclass(player.__class__, LearningPlayer): training_values = player.strategy.train, player.strategy.model.training player.strategy.train, player.strategy.model.training = False, False results = [] for e_player in evaluation_players: simulation = TicTacToe([player, e_player]) rewards, losses = simulation.run_simulations(config.EVALUATION_GAMES) results.append([e_player.__str__(), rewards]) if not silent: print_results(player, e_player, rewards) # Restore original training values if issubclass(player.__class__, LearningPlayer): player.strategy.train, player.strategy.model.training = training_values avg_results = [(result[0], np.mean(result[1])) for result in results] avg_results.insert( 0, ("Total Score", np.mean([ res[1] for res in avg_results ]))) # Insert average overall score as first element of results results_overview = deepcopy(results) total = Counter(dict()) for entry in results_overview: entry[1] = Counter(entry[1]) total += entry[1] results_overview.insert( 0, ("[Total Score]", total)) # Insert average overall score as first element of results if not silent: print("Overall score: %s" % avg_results[0][1]) return avg_results[0][1], avg_results, results_overview
def test_Performance(self): p1 = ttt_players.RandomPlayer() p2 = ttt_players.RandomPlayer() simulation = TicTacToe([p1, p2]) N = 15000 start = datetime.now() simulation.run_simulations(N) print("Simulating %s random games took %s" % (N, datetime.now() - start))
def run(self, lr, silent=False): self.player1 = self.pretrained_player if self.pretrained_player else FCBaseLinePlayer( lr=lr) if self.opponent is not None: self.player2 = self.opponent self.simulation = TicTacToe([self.player1, self.player2]) games_per_evaluation = self.games // self.evaluations start_time = datetime.now() for episode in range(1, self.evaluations + 1): if self.opponent is None: self.player2 = choice( (RandomPlayer(), NovicePlayer(), ExperiencedPlayer() )) # choice((RandomPlayer(), ExpertPlayer())) self.simulation = TicTacToe([self.player1, self.player2]) # train self.player1.strategy.train, self.player1.strategy.model.training = True, True # training mode results, losses = self.simulation.run_simulations( games_per_evaluation) self.add_loss(np.mean(losses)) self.add_results(("Training Results", np.mean(results))) # evaluate self.player1.strategy.train, self.player1.strategy.model.training = False, False # eval mode if self.opponent is None: score, results, overview = evaluate_against_base_players( self.player1) else: score, results, overview = evaluate_against_base_players( self.player1, evaluation_players=[self.opponent]) self.add_results(results) if not silent: if Printer.print_episode(episode * games_per_evaluation, self.games, datetime.now() - start_time): overview = format_overview(overview) self.plot_and_save( "%s vs TRADITIONAL OPPONENT" % (self.player1), "Train %s vs %s\nGames: %s Evaluations: %s\nTime: %s" % (self.player1, self.opponent, episode * games_per_evaluation, self.evaluations, config.time_diff(start_time))) self.final_score, self.final_results, self.results_overview = evaluate_against_base_players( self.player1, silent=False) return self
class TrainReinforcePlayerVsBest(TicTacToeBaseExperiment): def __init__(self, games, evaluations, pretrained_player=None): super(TrainReinforcePlayerVsBest, self).__init__() self.games = games self.evaluations = evaluations self.pretrained_player = pretrained_player.copy( shared_weights=False) if pretrained_player else None def reset(self): self.__init__(games=self.games, evaluations=self.evaluations, pretrained_player=self.pretrained_player) return self def run(self, lr, silent=False): self.player1 = self.pretrained_player if self.pretrained_player else FCReinforcePlayer( lr=lr) # Player 2 has the same start conditions as Player 1 but does not train self.player2 = self.player1.copy(shared_weights=False) self.player2.strategy.train = False games_per_evaluation = self.games // self.evaluations self.replacements = [] start_time = datetime.now() for episode in range(1, self.evaluations + 1): # train self.player1.strategy.train, self.player1.strategy.model.training = True, True # training mode self.simulation = TicTacToe([self.player1, self.player2]) results, losses = self.simulation.run_simulations( games_per_evaluation) self.add_results(("Losses", np.mean(losses))) # evaluate self.player1.strategy.train, self.player1.strategy.model.training = False, False # eval mode score, results, overview = evaluate_against_base_players( self.player1) self.add_loss(np.mean(losses)) self.add_results(results) if not silent and Printer.print_episode( episode * games_per_evaluation, self.games, datetime.now() - start_time): self.plot_and_save( "%s vs BEST" % (self.player1), "Train %s vs Best version of self\nGames: %s Evaluations: %s\nTime: %s" % (self.player1, episode * games_per_evaluation, self.evaluations, config.time_diff(start_time))) if evaluate_against_each_other(self.player1, self.player2): self.player2 = self.player1.copy(shared_weights=False) self.player2.strategy.train, self.player2.strategy.model.training = False, False self.replacements.append(episode) print("Best player replaced after episodes: %s" % self.replacements) self.final_score, self.final_results, self.results_overview = evaluate_against_base_players( self.player1, silent=False) return self
def testRandomPlayer(self): player1 = ttt_players.RandomPlayer() player2 = ttt_players.RandomPlayer() simulation = TicTacToe([player1, player2]) results, losses = simulation.run_simulations(self.TEST_EPISODES) self.assertTrue(len(results) == self.TEST_EPISODES) self.assertTrue(None not in results) for i in range(4): simulation.__run__(player1, player2) black_stones, white_stones = simulation.board.count_stones() self.assertIn(black_stones, [white_stones - 1, white_stones, white_stones + 1]) if not simulation.board.game_won(): self.assertEqual(black_stones + white_stones, simulation.board.board_size**2) print("Average Result Random vs Random: %s" % np.mean(results))
def test_neverLose(self): GAMES = 10000 player1 = SearchPlayer() player2 = SearchPlayer() random_player = RandomPlayer() simulation = TicTacToe([player1, player2]) results, losses = simulation.run_simulations(GAMES // 100) self.assertEqual( len(results), results.count(0), "Perfect player mirror match resulted in a result other than draw") print("\nFirst 20 results: %s against self" % results[:20]) simulation = TicTacToe([player1, random_player]) results, losses = simulation.run_simulations(GAMES) self.assertEqual(0, results.count(-1), "Perfect player lost against random") print("First 20 results: %s against random player" % results[:20]) print("Win rate: %s vs random player" % (sum(results) / len(results)))
class TrainBaselinePlayerVsBest(TicTacToeBaseExperiment): def __init__(self, games, evaluations, pretrained_player=None): super(TrainBaselinePlayerVsBest, self).__init__() self.games = games self.evaluations = evaluations self.pretrained_player = pretrained_player.copy( shared_weights=False) if pretrained_player else None self.milestones = [] def reset(self): self.__init__(games=self.games, evaluations=self.evaluations, pretrained_player=self.pretrained_player) return self def run(self, lr, milestones=False, silent=False): self.player1 = self.pretrained_player if self.pretrained_player else FCBaseLinePlayer( lr=lr) # Player 2 has the same start conditions as Player 1 but does not train self.player2 = self.player1.copy(shared_weights=False) self.player2.strategy.train, self.player2.strategy.model.training = False, False # eval mode games_per_evaluation = self.games // self.evaluations self.replacements = [] start_time = datetime.now() for episode in range(1, self.evaluations + 1): # train self.player1.strategy.train, self.player1.strategy.model.training = True, True # training mode # If milestones exist, use them with probability p if self.milestones and random() < 0.2: self.player2 = choice(self.milestones) self.simulation = TicTacToe([self.player1, self.player2]) results, losses = self.simulation.run_simulations( games_per_evaluation) self.add_loss(np.mean(losses)) self.add_results(("Best", np.mean(results))) # evaluate self.player1.strategy.train, self.player1.strategy.model.training = False, False # eval mode score, results, overview = evaluate_against_base_players( self.player1) self.add_results(results) if not silent and Printer.print_episode( episode * games_per_evaluation, self.games, datetime.now() - start_time): self.plot_and_save( "%s vs BEST" % (self.player1.__str__() + (" milestones" if milestones else "")), "Train %s vs Best version of self\nGames: %s Evaluations: %s\nTime: %s" % (self.player1, episode * games_per_evaluation, self.evaluations, config.time_diff(start_time))) if evaluate_against_each_other(self.player1, self.player2): self.player2 = self.player1.copy(shared_weights=False) self.player2.strategy.train, self.player2.strategy.model.training = False, False self.replacements.append(episode) # If x/5th of training is completed, save milestone if milestones and (self.games / episode * games_per_evaluation) % 5 == 0: self.milestones.append(self.player1.copy(shared_weights=False)) self.milestones[-1].strategy.train = False print("Best player replaced after episodes: %s" % self.replacements) self.final_score, self.final_results, self.results_overview = evaluate_against_base_players( self.player1, silent=False) return self
class TrainReinforcePlayerVsTraditionalOpponent(TicTacToeBaseExperiment): def __init__(self, games, evaluations, pretrained_player, opponent): super(TrainReinforcePlayerVsTraditionalOpponent, self).__init__() self.games = games self.evaluations = evaluations self.pretrained_player = pretrained_player.copy( shared_weights=False) if pretrained_player else None self.opponent = opponent self.milestones = [] def reset(self): self.__init__(games=self.games, evaluations=self.evaluations, pretrained_player=self.pretrained_player, opponent=self.opponent) return self def run(self, lr, silent=False): self.player1 = self.pretrained_player if self.pretrained_player else FCReinforcePlayer( lr=lr) if self.opponent is not None: self.player2 = self.opponent self.simulation = TicTacToe([self.player1, self.player2]) games_per_evaluation = self.games // self.evaluations start_time = datetime.now() for episode in range(1, self.evaluations + 1): if self.opponent is None: self.player2 = choice( (RandomPlayer(), NovicePlayer(), ExperiencedPlayer() )) # choice((RandomPlayer(), ExpertPlayer())) self.simulation = TicTacToe([self.player1, self.player2]) # train self.player1.strategy.train, self.player1.strategy.model.training = True, True # training mode results, losses = self.simulation.run_simulations( games_per_evaluation) self.add_loss(np.mean(losses)) self.add_results(("Training Results", np.mean(results))) # evaluate self.player1.strategy.train, self.player1.strategy.model.training = False, False # eval mode if self.opponent is None: score, results, overview = evaluate_against_base_players( self.player1) else: score, results, overview = evaluate_against_base_players( self.player1, evaluation_players=[self.opponent]) self.add_results(results) if not silent: if Printer.print_episode(episode * games_per_evaluation, self.games, datetime.now() - start_time): self.plot_and_save( "%s vs TRADITIONAL OPPONENT" % (self.player1), "Train %s vs %s\nGames: %s Evaluations: %s\nTime: %s" % (self.player1, self.opponent, episode * games_per_evaluation, self.evaluations, config.time_diff(start_time))) self.final_score, self.final_results, self.results_overview = evaluate_against_base_players( self.player1, silent=False) return self
def test_ConvReinforcePlayer(self): fc_player = FCReinforcePlayer(lr=1e-4) random_player = RandomPlayer() simulation = TicTacToe([fc_player, random_player]) simulation.run_simulations(100)
def test_DummyTrainReinforcePlayer(self): player1 = FCReinforcePlayer(lr=0.001) player2 = RandomPlayer() simulation = TicTacToe([player1, player2]) simulation.run_simulations(10)
def test_ExperiencedVsRandom(self): player1 = ttt_players.ExperiencedPlayer() player2 = ttt_players.RandomPlayer() simulation = TicTacToe([player1, player2]) results, losses = simulation.run_simulations(self.TEST_EPISODES) print("Average Result Experienced vs Random: %s" % np.mean(results))