Esempio n. 1
0
    def run(self, lr, silent=False):

        EVALUATION_GAMES = 10

        player = FCReinforcePlayer(lr=lr)
        player.color = config.BLACK

        expert = ExperiencedPlayer(deterministic=True, block_mid=True)
        expert.color = config.BLACK

        generator = RandomPlayer()
        color_iterator = self.AlternatingColorIterator()

        validation_set = self.generate_supervised_training_data(
            EVALUATION_GAMES,
            ExperiencedPlayer(deterministic=True, block_mid=True))

        print("Training ReinforcedPlayer supervised continuously with LR: %s" %
              lr)
        start = datetime.now()
        for game in range(self.games):
            rewards = []
            board = TicTacToeBoard()

            for i in range(9):
                expert_move = expert.get_move(board)
                player_move = player.get_move(board)

                reward = config.LABEL_WIN if expert_move == player_move else config.LABEL_LOSS
                rewards.append(reward)

                # prepare for next sample
                move = generator.get_move(board)
                board.apply_move(move, color_iterator.__next__())

            average_reward = sum(rewards) / len(rewards)
            player.strategy.rewards = rewards
            loss = player.strategy.update()

            del rewards[:]
            self.add_results([("Losses", loss), ("Reward", average_reward)])

            if game % self.evaluation_period == 0:
                test_rewards = []
                for board, expert_move in validation_set:
                    # Evaluation mode
                    player.strategy.train, player.strategy.model.training = False, False
                    strategy_move = player.get_move(board)
                    player.strategy.train, player.strategy.model.training = True, True

                    test_reward = config.BLACK if expert_move == strategy_move else config.WHITE
                    test_rewards.append(test_reward)

                average_test_reward = sum(test_rewards) / len(test_rewards)
                del test_rewards[:]
                self.add_results(("Test reward", average_test_reward))

            if not silent:
                if Printer.print_episode(game + 1, self.games,
                                         datetime.now() - start):
                    plot_name = "Supervised Continuous training of %s" % (
                        player)
                    plot_info = "%s Games - Final reward: %s \nTime: %s" % (
                        game + 1, average_reward, config.time_diff(start))
                    self.plot_and_save(plot_name, plot_name + "\n" + plot_info)

        return average_reward
Esempio n. 2
0
    def run(self, lr, silent=False):

        print(
            "Training PGStrategy supervised on %s games for %s Episodes - LR: %s"
            % (self.games, self.episodes, lr))
        TEST_GAMES = 1

        player = FCReinforcePlayer(lr=lr)
        player.color = config.BLACK

        expert = ExperiencedPlayer(deterministic=True, block_mid=True)
        expert.color = config.BLACK

        training_set = self.generate_supervised_training_data(
            self.games, expert)
        test_set = self.generate_supervised_training_data(TEST_GAMES, expert)

        start = datetime.now()
        for episode in range(self.episodes):
            rewards = []
            test_rewards = []

            for board, expert_move in training_set:
                # Training mode
                player.strategy.train, player.strategy.model.training = True, True

                strategy_move = player.get_move(board)
                reward = config.LABEL_WIN if expert_move == strategy_move else config.LABEL_LOSS
                rewards.append(reward)

            average_reward = sum(rewards) / len(rewards)
            player.strategy.rewards = rewards
            loss = player.strategy.update()

            for board, expert_move in test_set:
                # Evaluation mode
                player.strategy.train, player.strategy.model.training = False, False

                strategy_move = player.get_move(board)
                test_reward = config.BLACK if expert_move == strategy_move else config.WHITE
                test_rewards.append(test_reward)

            average_test_reward = sum(test_rewards) / len(test_rewards)

            self.add_results([("Losses", loss),
                              ("Average reward", average_reward),
                              ("Average test reward", average_test_reward)])

            if not silent:
                if Printer.print_episode(episode + 1, self.episodes,
                                         datetime.now() - start):
                    plot_name = "Supervised on %s games lr: %s" % (self.games,
                                                                   lr)
                    plot_info = "Lr: %s - %s Games - %s Episodes\nFinal Scores: %s / %s \nTime: %s" % (
                        lr, self.games, episode + 1,
                        '{:.2f}'.format(average_reward),
                        '{:.2f}'.format(average_test_reward),
                        config.time_diff(start))
                    self.plot_and_save(plot_name, plot_name + "\n" + plot_info)

        return average_reward, average_test_reward