def test_Board_GameWon(self): # Case 1: Full board board = OthelloBoard() self.assertIsNone(board.game_won(), msg="Empty Board") board.apply_move((2, 3), config.BLACK) board.apply_move((2, 2), config.WHITE) board.apply_move((2, 1), config.BLACK) board.apply_move((1, 1), config.WHITE) board.apply_move((5, 4), config.BLACK) board.apply_move((5, 5), config.WHITE) board.apply_move((5, 6), config.BLACK) board.apply_move((6, 6), config.WHITE) self.assertIsNone(board.game_won(), msg="Empty Board") for col in range(len(board.board)): for tile in range(len(board.board)): if board.board[col, tile] == config.EMPTY: board.board[col, tile] = config.BLACK board.legal_moves = {} # This is required because moves were directly set to the board instead of using apply_move self.assertEqual(board.game_won(), config.BLACK, msg="Black wins by stone count") # Case 2: No valid moves board = OthelloBoard() board.apply_move((3, 2), config.BLACK) board.apply_move((4, 5), config.BLACK) self.assertEqual(board.game_won(), config.BLACK, msg="Black wins by stone count after no players could perform any legal moves") # Case 3: Regular, deterministic game for i in range(32): game = Othello((DeterministicPlayer(), DeterministicPlayer())) game.run_simulations(1) winner = game.board.game_won() self.assertEqual(winner, config.WHITE, "Winner of deterministic game was not Black")
def test_ExperiencedPlayer(self): player1 = ExperiencedPlayer() player2 = ExperiencedPlayer() simulation = Othello([player1, player2]) results, losses = simulation.run_simulations(self.TEST_EPISODES) self.assertTrue(len(results) == self.TEST_EPISODES) self.assertTrue(None not in results) print("Average Result Experienced vs Experienced: %s" % np.mean(results))
def test_Performance(self): p1 = RandomPlayer() p2 = RandomPlayer() simulation = Othello([p1, p2]) N = 500 start = datetime.now() simulation.run_simulations(N) print("Simulating %s random games took %s" % (N, datetime.now()-start))
def testRandomPlayer(self): player1 = RandomPlayer() player2 = RandomPlayer() simulation = Othello([player1, player2]) results, losses = simulation.run_simulations(self.TEST_EPISODES) self.assertTrue(len(results) == self.TEST_EPISODES) self.assertTrue(None not in results) print("Average Result Random vs Random: %s" % np.mean(results))
def evaluate_against_base_players(player, evaluation_players=[ RandomPlayer(), NovicePlayer(), ExperiencedPlayer() ], games=config.EVALUATION_GAMES, silent=True): """ Standardized evaluation against base players. :param player: The player to be evaluated :param evaluation_players: A list of players against which the player should be evaluated :param silent: Flag controlling if output is written to console :return: a tuple (score, results) where score is the average score over all evaluation games (scalar (-1, 1)) and results is a list of tuples (name, score) where 'score' is the score (-1, 1) achieved when evaluated against the player named 'name'. """ # Store original training values if issubclass(player.__class__, LearningPlayer): training_values = player.strategy.train, player.strategy.model.training player.strategy.train, player.strategy.model.training = False, False results = [] for e_player in evaluation_players: simulation = Othello([player, e_player]) rewards, losses = simulation.run_simulations(games) results.append([e_player.__str__(), rewards]) if not silent: print_results(player, e_player, rewards) # Restore original training values if issubclass(player.__class__, LearningPlayer): player.strategy.train, player.strategy.model.training = training_values avg_results = [(result[0], np.mean(result[1])) for result in results] avg_results.insert( 0, ("Total Score", np.mean([ res[1] for res in avg_results ]))) # Insert average overall score as first element of results results_overview = deepcopy(results) total = Counter(dict()) for entry in results_overview: entry[1] = Counter(entry[1]) total += entry[1] results_overview.insert( 0, ("[Total Score]", total)) # Insert average overall score as first element of results if not silent: print("Overall score: %s" % avg_results[0][1]) return avg_results[0][1], avg_results, results_overview
def run(self, lr, silent=False): self.player1 = self.pretrained_player if self.pretrained_player else FCBaselinePlayer( lr=lr) if self.opponent is None: self.player2 = SearchPlayer( search_depth=3, strategy=OthelloHeuristic.RGRUENER_STRATEGY) else: self.player2 = self.opponent self.simulation = Othello([self.player1, self.player2]) games_per_evaluation = self.games // self.evaluations start_time = datetime.now() for episode in range(1, self.evaluations + 1): # train self.player1.strategy.train, self.player1.strategy.model.training = True, True # training mode results, losses = self.simulation.run_simulations( games_per_evaluation) self.add_loss(np.mean(losses)) self.add_results((str(self.player2), np.mean(results))) # evaluate self.player1.strategy.train, self.player1.strategy.model.training = False, False # eval mode if self.opponent is None: score, results, overview = evaluate_against_base_players( self.player1) else: score, results, overview = evaluate_against_base_players( self.player1, evaluation_players=[self.opponent]) self.add_results(results) if not silent: if Printer.print_episode(episode * games_per_evaluation, self.games, datetime.now() - start_time): overview = format_overview(overview) self.plot_and_save( "%s vs TRADITIONAL OPPONENT" % (self.player1), "Train %s vs %s\nGames: %s Evaluations: %s\nTime: %s" % (self.player1, self.opponent, episode * games_per_evaluation, self.evaluations, config.time_diff(start_time))) self.final_score, self.final_results, self.results_overview = evaluate_against_base_players( self.player1, silent=False) return self
def run(self, lr, silent=False): self.player1 = self.pretrained_player if self.pretrained_player else FCACPlayer( lr=lr) # ConvACPlayer(lr=lr) if self.opponent is not None: self.player2 = self.opponent self.simulation = Othello([self.player1, self.player2]) games_per_evaluation = self.games // self.evaluations start_time = datetime.now() for episode in range(1, self.evaluations + 1): if self.opponent is None: self.player2 = choice((RandomPlayer(), NovicePlayer(), ExperiencedPlayer(deterministic=False))) self.simulation = Othello([self.player1, self.player2]) # train self.player1.strategy.train, self.player1.strategy.model.training = True, True # training mode results, losses = self.simulation.run_simulations( games_per_evaluation) self.add_loss(np.mean(losses)) self.add_results(("Training Results", np.mean(results))) # evaluate self.player1.strategy.train, self.player1.strategy.model.training = False, False # eval mode if self.opponent is None: score, results, overview = evaluate_against_base_players( self.player1) else: score, results, overview = evaluate_against_base_players( self.player1, evaluation_players=[self.opponent]) self.add_results(results) if not silent: if Printer.print_episode(episode * games_per_evaluation, self.games, datetime.now() - start_time): overview = format_overview(overview) self.plot_and_save( "%s vs TRADITIONAL OPPONENT" % (self.player1), "Train %s vs %s\nGames: %s Evaluations: %s\nTime: %s" % (self.player1, self.opponent, episode * games_per_evaluation, self.evaluations, config.time_diff(start_time))) self.final_score, self.final_results, self.results_overview = evaluate_against_base_players( self.player1, silent=False) return self
def run(self, lr, milestones=False, silent=False): self.player1 = self.pretrained_player if self.pretrained_player else FCBaselinePlayer(lr=lr) # Player 2 has the same start conditions as Player 1 but does not train self.player2 = self.player1.copy(shared_weights=False) self.player2.strategy.train, self.player2.strategy.model.training = False, False # eval mode games_per_evaluation = self.games // self.evaluations self.replacements = (0, 0) start_time = datetime.now() for episode in range(1, self.evaluations+1): # If milestones exist, use them with probability p if self.milestones and random() < 0.2: self.player2 = choice(self.milestones) self.simulation = Othello([self.player1, self.player2]) # train self.player1.strategy.train, self.player1.strategy.model.training = True, True # training mode results, losses = self.simulation.run_simulations(games_per_evaluation) self.add_loss(np.mean(losses)) self.add_results(("Best", np.mean(results))) # evaluate if episode*games_per_evaluation % 1000 == 0: self.player1.strategy.train, self.player1.strategy.model.training = False, False # eval mode score, results, overview = evaluate_against_base_players(self.player1) self.add_results(results) if not silent and Printer.print_episode(episode*games_per_evaluation, self.games, datetime.now() - start_time): self.plot_and_save( "%s vs BEST" % (self.player1.__str__() + (" milestones" if milestones else "")), "Train %s vs Best version of self\nGames: %s Evaluations: %s Replacement ratio: %s\nTime: %s" % (self.player1, episode*games_per_evaluation, self.evaluations, self.replacements[0]/self.replacements[1], config.time_diff(start_time))) if evaluate_against_each_other(self.player1, self.player2, games=8): self.player2 = self.player1.copy(shared_weights=False) self.player2.strategy.train, self.player2.strategy.model.training = False, False self.replacements = self.replacements[0] + 1, self.replacements[1] + 1 else: self.replacements = self.replacements[0], self.replacements[1] + 1 # If x/5th of training is completed, save milestone if milestones and (self.games / episode * games_per_evaluation) % 5 == 0: self.milestones.append(self.player1.copy(shared_weights=False)) self.milestones[-1].strategy.train = False self.final_score, self.final_results, self.results_overview = evaluate_against_base_players(self.player1, silent=False) return self
class TrainBaselinePlayerVsSelf(OthelloBaseExperiment): def __init__(self, games, evaluations, pretrained_player=None): super(TrainBaselinePlayerVsSelf, self).__init__() self.games = games self.evaluations = evaluations self.pretrained_player = pretrained_player.copy(shared_weights=False) if pretrained_player else None self.milestones = [] def reset(self): self.__init__(games=self.games, evaluations=self.evaluations, pretrained_player=self.pretrained_player) return self def run(self, lr, milestones=False, silent=False): self.player1 = self.pretrained_player if self.pretrained_player else FCBaselinePlayer(lr=lr) games_per_evaluation = self.games // self.evaluations start_time = datetime.now() for episode in range(1, self.evaluations+1): # If milestones exist, use them with probability p if self.milestones and random() < 0.2: self.player2 = choice(self.milestones) else: self.player2 = self.player1.copy(shared_weights=True) # train self.player1.strategy.train, self.player1.strategy.model.training = True, True # training mode self.player2.strategy.train = False self.simulation = Othello([self.player1, self.player2]) results, losses = self.simulation.run_simulations(games_per_evaluation) self.add_loss(np.mean(losses)) self.add_results(("Self", np.mean(results))) # evaluate if episode*games_per_evaluation % 1000 == 0: self.player1.strategy.train, self.player1.strategy.model.training = False, False # eval mode score, results, overview = evaluate_against_base_players(self.player1) self.add_results(results) if not silent and Printer.print_episode(episode*games_per_evaluation, self.games, datetime.now() - start_time): self.plot_and_save( "%s vs SELF" % (self.player1.__str__() + (" milestones" if milestones else "")), "Train %s vs Self\nGames: %s Evaluations: %s\nTime: %s" % (self.player1, episode*games_per_evaluation, self.evaluations, config.time_diff(start_time))) # If x/5th of training is completed, save milestone if milestones and (self.games / episode * games_per_evaluation) % 5 == 0: self.milestones.append(self.player1.copy(shared_weights=False)) self.final_score, self.final_results, self.results_overview = evaluate_against_base_players(self.player1, silent=False) return self
class TrainReinforcePlayerVsTraditionalOpponent(OthelloBaseExperiment): def __init__(self, games, evaluations, pretrained_player, opponent): super(TrainReinforcePlayerVsTraditionalOpponent, self).__init__() self.games = games self.evaluations = evaluations self.pretrained_player = pretrained_player.copy( shared_weights=False) if pretrained_player else None self.opponent = opponent def reset(self): self.__init__(games=self.games, evaluations=self.evaluations, pretrained_player=self.pretrained_player, opponent=self.opponent) return self def run(self, lr, silent=False): self.player1 = self.pretrained_player if self.pretrained_player else FCReinforcePlayer( lr=lr) if self.opponent is not None: self.player2 = self.opponent self.simulation = Othello([self.player1, self.player2]) games_per_evaluation = self.games // self.evaluations start_time = datetime.now() for episode in range(1, self.evaluations + 1): if self.opponent is None: self.player2 = choice((RandomPlayer(), NovicePlayer(), ExperiencedPlayer(deterministic=False))) self.simulation = Othello([self.player1, self.player2]) # train self.player1.strategy.train, self.player1.strategy.model.training = True, True # training mode results, losses = self.simulation.run_simulations( games_per_evaluation) self.add_loss(np.mean(losses)) self.add_results(("Training Results", np.mean(results))) # evaluate self.player1.strategy.train, self.player1.strategy.model.training = False, False # eval mode if self.opponent is None: score, results, overview = evaluate_against_base_players( self.player1) else: score, results, overview = evaluate_against_base_players( self.player1, evaluation_players=[self.opponent]) self.add_results(results) if not silent: if Printer.print_episode(episode * games_per_evaluation, self.games, datetime.now() - start_time): self.plot_and_save( "%s vs TRADITIONAL OPPONENT" % (self.player1), "Train %s vs %s\nGames: %s Evaluations: %s\nTime: %s" % (self.player1, self.opponent, episode * games_per_evaluation, self.evaluations, config.time_diff(start_time))) self.final_score, self.final_results, self.results_overview = evaluate_against_base_players( self.player1, silent=False) return self
def test_ExperiencedVsRandom(self): player1 = ExperiencedPlayer() player2 = RandomPlayer() simulation = Othello([player1, player2]) results, losses = simulation.run_simulations(self.TEST_EPISODES) print("Average Result Experienced vs Random: %s" % np.mean(results))
def run(self): game = Othello((self.player1, self.player2), gui=self.gui) game.run_simulations(4) return self