Example #1
0
    def run(self, lr, silent=False):

        self.player1 = self.pretrained_player if self.pretrained_player else FCBaselinePlayer(
            lr=lr)

        if self.opponent is None:
            self.player2 = SearchPlayer(
                search_depth=3, strategy=OthelloHeuristic.RGRUENER_STRATEGY)
        else:
            self.player2 = self.opponent

        self.simulation = Othello([self.player1, self.player2])

        games_per_evaluation = self.games // self.evaluations
        start_time = datetime.now()
        for episode in range(1, self.evaluations + 1):

            # train
            self.player1.strategy.train, self.player1.strategy.model.training = True, True  # training mode

            results, losses = self.simulation.run_simulations(
                games_per_evaluation)
            self.add_loss(np.mean(losses))
            self.add_results((str(self.player2), np.mean(results)))

            # evaluate
            self.player1.strategy.train, self.player1.strategy.model.training = False, False  # eval mode
            if self.opponent is None:
                score, results, overview = evaluate_against_base_players(
                    self.player1)
            else:
                score, results, overview = evaluate_against_base_players(
                    self.player1, evaluation_players=[self.opponent])

            self.add_results(results)

            if not silent:
                if Printer.print_episode(episode * games_per_evaluation,
                                         self.games,
                                         datetime.now() - start_time):
                    overview = format_overview(overview)
                    self.plot_and_save(
                        "%s vs TRADITIONAL OPPONENT" % (self.player1),
                        "Train %s vs %s\nGames: %s Evaluations: %s\nTime: %s" %
                        (self.player1, self.opponent,
                         episode * games_per_evaluation, self.evaluations,
                         config.time_diff(start_time)))

        self.final_score, self.final_results, self.results_overview = evaluate_against_base_players(
            self.player1, silent=False)
        return self
Example #2
0
    def run(self, lr, silent=False):
        self.player1 = self.pretrained_player if self.pretrained_player else FCACPlayer(
            lr=lr)  # ConvACPlayer(lr=lr)

        if self.opponent is not None:
            self.player2 = self.opponent
            self.simulation = Othello([self.player1, self.player2])

        games_per_evaluation = self.games // self.evaluations
        start_time = datetime.now()
        for episode in range(1, self.evaluations + 1):

            if self.opponent is None:
                self.player2 = choice((RandomPlayer(), NovicePlayer(),
                                       ExperiencedPlayer(deterministic=False)))
                self.simulation = Othello([self.player1, self.player2])

            # train
            self.player1.strategy.train, self.player1.strategy.model.training = True, True  # training mode

            results, losses = self.simulation.run_simulations(
                games_per_evaluation)
            self.add_loss(np.mean(losses))
            self.add_results(("Training Results", np.mean(results)))

            # evaluate
            self.player1.strategy.train, self.player1.strategy.model.training = False, False  # eval mode
            if self.opponent is None:
                score, results, overview = evaluate_against_base_players(
                    self.player1)
            else:
                score, results, overview = evaluate_against_base_players(
                    self.player1, evaluation_players=[self.opponent])

            self.add_results(results)

            if not silent:
                if Printer.print_episode(episode * games_per_evaluation,
                                         self.games,
                                         datetime.now() - start_time):
                    overview = format_overview(overview)
                    self.plot_and_save(
                        "%s vs TRADITIONAL OPPONENT" % (self.player1),
                        "Train %s vs %s\nGames: %s Evaluations: %s\nTime: %s" %
                        (self.player1, self.opponent,
                         episode * games_per_evaluation, self.evaluations,
                         config.time_diff(start_time)))

        self.final_score, self.final_results, self.results_overview = evaluate_against_base_players(
            self.player1, silent=False)
        return self
    def run(self, lr, milestones=False, silent=False):
        self.player1 = self.pretrained_player if self.pretrained_player else FCBaselinePlayer(lr=lr)

        games_per_evaluation = self.games // self.evaluations
        start_time = datetime.now()
        for episode in range(1, self.evaluations+1):
            # If milestones exist, use them with probability p
            if self.milestones and random() < 0.2:
                self.player2 = choice(self.milestones)
            else:
                self.player2 = self.player1.copy(shared_weights=True)

            # train
            self.player1.strategy.train, self.player1.strategy.model.training = True, True  # training mode
            self.player2.strategy.train = False

            self.simulation = Othello([self.player1, self.player2])
            results, losses = self.simulation.run_simulations(games_per_evaluation)
            self.add_loss(np.mean(losses))
            self.add_results(("Self", np.mean(results)))

            # evaluate
            if episode*games_per_evaluation % 1000 == 0:
                self.player1.strategy.train, self.player1.strategy.model.training = False, False  # eval mode
                score, results, overview = evaluate_against_base_players(self.player1)
                self.add_results(results)

                if not silent and Printer.print_episode(episode*games_per_evaluation, self.games, datetime.now() - start_time):
                    self.plot_and_save(
                        "%s vs SELF" % (self.player1.__str__() + (" milestones" if milestones else "")),
                        "Train %s vs Self\nGames: %s Evaluations: %s\nTime: %s"
                        % (self.player1, episode*games_per_evaluation, self.evaluations, config.time_diff(start_time)))

            # If x/5th of training is completed, save milestone
            if milestones and (self.games / episode * games_per_evaluation) % 5 == 0:
                self.milestones.append(self.player1.copy(shared_weights=False))

        self.final_score, self.final_results, self.results_overview = evaluate_against_base_players(self.player1, silent=False)
        return self
    def run(self, lr, silent=False):
        self.player1 = self.pretrained_player if self.pretrained_player else FCBaselinePlayer(
            lr=lr)

        # Player 2 has the same start conditions as Player 1 but does not train
        self.player2 = self.player1.copy(shared_weights=False)
        self.player2.strategy.train, self.player2.strategy.model.training = False, False  # eval mode

        games_per_evaluation = self.games // self.evaluations
        self.replacements = (0, 0)
        start_time = datetime.now()
        for episode in range(1, self.evaluations + 1):

            # If milestones exist, use them with probability p
            if self.milestones and random() < 0.2:
                self.player2 = choice(self.milestones)

            self.simulation = Othello([self.player1, self.player2])

            # train
            self.player1.strategy.train, self.player1.strategy.model.training = True, True  # training mode

            results, losses = self.simulation.run_simulations(
                games_per_evaluation)
            self.add_loss(np.mean(losses))
            self.add_results(("Best", np.mean(results)))

            # evaluate
            if episode * games_per_evaluation % 1000 == 0:
                self.player1.strategy.train, self.player1.strategy.model.training = False, False  # eval mode
                score, results, overview = evaluate_against_base_players(
                    self.player1)
                self.add_results(results)

                if not silent and Printer.print_episode(
                        episode * games_per_evaluation, self.games,
                        datetime.now() - start_time):
                    self.plot_and_save(
                        "%s vs BEST" % (self.player1.__str__() +
                                        (" milestones" if MILESTONES else "")),
                        "Train %s vs Best version of self\nGames: %s Evaluations: %s Replacement ratio: %s\nTime: %s"
                        % (self.player1, episode * games_per_evaluation,
                           self.evaluations, self.replacements[0] /
                           self.replacements[1], config.time_diff(start_time)))

            if evaluate_against_each_other(self.player1, self.player2,
                                           games=8):
                self.player2 = self.player1.copy(shared_weights=False)
                self.player2.strategy.train, self.player2.strategy.model.training = False, False
                self.replacements = self.replacements[
                    0] + 1, self.replacements[1] + 1
            else:
                self.replacements = self.replacements[
                    0], self.replacements[1] + 1

            # If x/5th of training is completed, save milestone
            if MILESTONES and (self.games / episode *
                               games_per_evaluation) % 5 == 0:
                self.milestones.append(self.player1.copy(shared_weights=False))
                self.milestones[-1].strategy.train = False

        self.final_score, self.final_results, self.results_overview = evaluate_against_base_players(
            self.player1, silent=False)
        return self