コード例 #1
0
    def train_agent(self, random_agents_interval):
        self.log_training_info()
        for i in range(self.num_games):
            if i % 100 == 0:
                self.logger.info("Playing training round {}".format(i))

            wiz = Wizard(num_players=self.num_players, players=self.players)
            wiz.play()

            if i > 0 and i % random_agents_interval == 0:
                self.play_evaluation_games()
コード例 #2
0
 def test_play_with_RLAgent(self):
     from Environment.Wizard import Wizard
     seed(2)
     players = [RandomPlayer() for _ in range(5)]
     players.append(RLAgent())
     wiz = Wizard(players=players)
     self.assertIsNotNone(wiz.play())
コード例 #3
0
    def train_agent(self):
        self.log_training_info()
        for i in range(self.num_games):
            if i % 100 == 0:
                self.logger.info("Playing round {}".format(i))

            wiz = Wizard(num_players=self.num_players, players=self.players)
            scores = wiz.play()

            self.update_scores(scores, i)
コード例 #4
0
    def collect_training_data(self, players):
        x = None
        y = None

        for i in range(self.training_rounds):
            wizard = Wizard(players=players, track_tricks=True)
            wizard.play()

            temp_x, temp_y = wizard.get_history()
            if x is None:
                x = temp_x
                y = temp_y
            else:
                x = np.concatenate((x, temp_x), axis=0)
                y = np.concatenate((y, temp_y), axis=0)

            # temporärer Tracker
            if i % 100 == 0:
                self.logger.info("Trick Prediction Initializer: Round {} finished".format(i))

        return x, y
コード例 #5
0
 def test_several_RL_one_estimator(self):
     from Environment.Wizard import Wizard
     games = 20000
     players = [RLAgent()]
     players[0].load_estimator()
     for rl_player in range(3):
         players.append(RLAgent(estimator=players[0].estimator))
     players.append(AverageRandomPlayer())
     players.append(RandomPlayer())
     scores = []
     for i in range(games):
         if i % 100 == 0:
             print("{}/{}".format(i, games))
         wiz = Wizard(players=players)
         scores.append(wiz.play())
     players[0].save_estimator()
     scores = np.array(scores)
     plotting.plot_moving_average_scores(scores, 100)
コード例 #6
0
    def play_evaluation_games(self):
        self.disable_training()
        self.t_eval += 1
        self.logger.info("Starting a train evaluation game with random players")
        players = [AverageRandomPlayer() for _ in range(len(self.players) - 1)]
        players.append(self.players[0])

        # arrays for evaluation scores and
        eval_scores = np.zeros((self.evaluation_games, len(players)))
        eval_wins = np.zeros((self.evaluation_games, len(players)))

        for i in range(self.evaluation_games):
            wiz = Wizard(num_players=len(players), players=players)
            scores = wiz.play()
            # evaluate scores
            eval_scores[i] = scores
            eval_wins[i][scores == np.max(scores)] = 1

        self.update_scores(eval_scores, eval_wins)
        self.enable_training()
コード例 #7
0
 def test_play(self):
     from Environment.Wizard import Wizard
     seed(2)
     wiz = Wizard(4)
     self.assertIsNotNone(wiz.play())