def run(self, lr, silent=False): self.player1 = self.pretrained_player if self.pretrained_player else FCBaselinePlayer( lr=lr) if self.opponent is None: self.player2 = SearchPlayer( search_depth=3, strategy=OthelloHeuristic.RGRUENER_STRATEGY) else: self.player2 = self.opponent self.simulation = Othello([self.player1, self.player2]) games_per_evaluation = self.games // self.evaluations start_time = datetime.now() for episode in range(1, self.evaluations + 1): # train self.player1.strategy.train, self.player1.strategy.model.training = True, True # training mode results, losses = self.simulation.run_simulations( games_per_evaluation) self.add_loss(np.mean(losses)) self.add_results((str(self.player2), np.mean(results))) # evaluate self.player1.strategy.train, self.player1.strategy.model.training = False, False # eval mode if self.opponent is None: score, results, overview = evaluate_against_base_players( self.player1) else: score, results, overview = evaluate_against_base_players( self.player1, evaluation_players=[self.opponent]) self.add_results(results) if not silent: if Printer.print_episode(episode * games_per_evaluation, self.games, datetime.now() - start_time): overview = format_overview(overview) self.plot_and_save( "%s vs TRADITIONAL OPPONENT" % (self.player1), "Train %s vs %s\nGames: %s Evaluations: %s\nTime: %s" % (self.player1, self.opponent, episode * games_per_evaluation, self.evaluations, config.time_diff(start_time))) self.final_score, self.final_results, self.results_overview = evaluate_against_base_players( self.player1, silent=False) return self
def run(self, lr, silent=False): self.player1 = self.pretrained_player if self.pretrained_player else FCACPlayer( lr=lr) # ConvACPlayer(lr=lr) if self.opponent is not None: self.player2 = self.opponent self.simulation = Othello([self.player1, self.player2]) games_per_evaluation = self.games // self.evaluations start_time = datetime.now() for episode in range(1, self.evaluations + 1): if self.opponent is None: self.player2 = choice((RandomPlayer(), NovicePlayer(), ExperiencedPlayer(deterministic=False))) self.simulation = Othello([self.player1, self.player2]) # train self.player1.strategy.train, self.player1.strategy.model.training = True, True # training mode results, losses = self.simulation.run_simulations( games_per_evaluation) self.add_loss(np.mean(losses)) self.add_results(("Training Results", np.mean(results))) # evaluate self.player1.strategy.train, self.player1.strategy.model.training = False, False # eval mode if self.opponent is None: score, results, overview = evaluate_against_base_players( self.player1) else: score, results, overview = evaluate_against_base_players( self.player1, evaluation_players=[self.opponent]) self.add_results(results) if not silent: if Printer.print_episode(episode * games_per_evaluation, self.games, datetime.now() - start_time): overview = format_overview(overview) self.plot_and_save( "%s vs TRADITIONAL OPPONENT" % (self.player1), "Train %s vs %s\nGames: %s Evaluations: %s\nTime: %s" % (self.player1, self.opponent, episode * games_per_evaluation, self.evaluations, config.time_diff(start_time))) self.final_score, self.final_results, self.results_overview = evaluate_against_base_players( self.player1, silent=False) return self
def run(self, lr, milestones=False, silent=False): self.player1 = self.pretrained_player if self.pretrained_player else FCBaselinePlayer(lr=lr) games_per_evaluation = self.games // self.evaluations start_time = datetime.now() for episode in range(1, self.evaluations+1): # If milestones exist, use them with probability p if self.milestones and random() < 0.2: self.player2 = choice(self.milestones) else: self.player2 = self.player1.copy(shared_weights=True) # train self.player1.strategy.train, self.player1.strategy.model.training = True, True # training mode self.player2.strategy.train = False self.simulation = Othello([self.player1, self.player2]) results, losses = self.simulation.run_simulations(games_per_evaluation) self.add_loss(np.mean(losses)) self.add_results(("Self", np.mean(results))) # evaluate if episode*games_per_evaluation % 1000 == 0: self.player1.strategy.train, self.player1.strategy.model.training = False, False # eval mode score, results, overview = evaluate_against_base_players(self.player1) self.add_results(results) if not silent and Printer.print_episode(episode*games_per_evaluation, self.games, datetime.now() - start_time): self.plot_and_save( "%s vs SELF" % (self.player1.__str__() + (" milestones" if milestones else "")), "Train %s vs Self\nGames: %s Evaluations: %s\nTime: %s" % (self.player1, episode*games_per_evaluation, self.evaluations, config.time_diff(start_time))) # If x/5th of training is completed, save milestone if milestones and (self.games / episode * games_per_evaluation) % 5 == 0: self.milestones.append(self.player1.copy(shared_weights=False)) self.final_score, self.final_results, self.results_overview = evaluate_against_base_players(self.player1, silent=False) return self
def run(self, lr, silent=False): self.player1 = self.pretrained_player if self.pretrained_player else FCBaselinePlayer( lr=lr) # Player 2 has the same start conditions as Player 1 but does not train self.player2 = self.player1.copy(shared_weights=False) self.player2.strategy.train, self.player2.strategy.model.training = False, False # eval mode games_per_evaluation = self.games // self.evaluations self.replacements = (0, 0) start_time = datetime.now() for episode in range(1, self.evaluations + 1): # If milestones exist, use them with probability p if self.milestones and random() < 0.2: self.player2 = choice(self.milestones) self.simulation = Othello([self.player1, self.player2]) # train self.player1.strategy.train, self.player1.strategy.model.training = True, True # training mode results, losses = self.simulation.run_simulations( games_per_evaluation) self.add_loss(np.mean(losses)) self.add_results(("Best", np.mean(results))) # evaluate if episode * games_per_evaluation % 1000 == 0: self.player1.strategy.train, self.player1.strategy.model.training = False, False # eval mode score, results, overview = evaluate_against_base_players( self.player1) self.add_results(results) if not silent and Printer.print_episode( episode * games_per_evaluation, self.games, datetime.now() - start_time): self.plot_and_save( "%s vs BEST" % (self.player1.__str__() + (" milestones" if MILESTONES else "")), "Train %s vs Best version of self\nGames: %s Evaluations: %s Replacement ratio: %s\nTime: %s" % (self.player1, episode * games_per_evaluation, self.evaluations, self.replacements[0] / self.replacements[1], config.time_diff(start_time))) if evaluate_against_each_other(self.player1, self.player2, games=8): self.player2 = self.player1.copy(shared_weights=False) self.player2.strategy.train, self.player2.strategy.model.training = False, False self.replacements = self.replacements[ 0] + 1, self.replacements[1] + 1 else: self.replacements = self.replacements[ 0], self.replacements[1] + 1 # If x/5th of training is completed, save milestone if MILESTONES and (self.games / episode * games_per_evaluation) % 5 == 0: self.milestones.append(self.player1.copy(shared_weights=False)) self.milestones[-1].strategy.train = False self.final_score, self.final_results, self.results_overview = evaluate_against_base_players( self.player1, silent=False) return self