def train_agent(self, random_agents_interval): self.log_training_info() for i in range(self.num_games): if i % 100 == 0: self.logger.info("Playing training round {}".format(i)) wiz = Wizard(num_players=self.num_players, players=self.players) wiz.play() if i > 0 and i % random_agents_interval == 0: self.play_evaluation_games()
def test_play_with_RLAgent(self): from Environment.Wizard import Wizard seed(2) players = [RandomPlayer() for _ in range(5)] players.append(RLAgent()) wiz = Wizard(players=players) self.assertIsNotNone(wiz.play())
def train_agent(self): self.log_training_info() for i in range(self.num_games): if i % 100 == 0: self.logger.info("Playing round {}".format(i)) wiz = Wizard(num_players=self.num_players, players=self.players) scores = wiz.play() self.update_scores(scores, i)
def collect_training_data(self, players): x = None y = None for i in range(self.training_rounds): wizard = Wizard(players=players, track_tricks=True) wizard.play() temp_x, temp_y = wizard.get_history() if x is None: x = temp_x y = temp_y else: x = np.concatenate((x, temp_x), axis=0) y = np.concatenate((y, temp_y), axis=0) # temporärer Tracker if i % 100 == 0: self.logger.info("Trick Prediction Initializer: Round {} finished".format(i)) return x, y
def test_several_RL_one_estimator(self): from Environment.Wizard import Wizard games = 20000 players = [RLAgent()] players[0].load_estimator() for rl_player in range(3): players.append(RLAgent(estimator=players[0].estimator)) players.append(AverageRandomPlayer()) players.append(RandomPlayer()) scores = [] for i in range(games): if i % 100 == 0: print("{}/{}".format(i, games)) wiz = Wizard(players=players) scores.append(wiz.play()) players[0].save_estimator() scores = np.array(scores) plotting.plot_moving_average_scores(scores, 100)
def play_evaluation_games(self): self.disable_training() self.t_eval += 1 self.logger.info("Starting a train evaluation game with random players") players = [AverageRandomPlayer() for _ in range(len(self.players) - 1)] players.append(self.players[0]) # arrays for evaluation scores and eval_scores = np.zeros((self.evaluation_games, len(players))) eval_wins = np.zeros((self.evaluation_games, len(players))) for i in range(self.evaluation_games): wiz = Wizard(num_players=len(players), players=players) scores = wiz.play() # evaluate scores eval_scores[i] = scores eval_wins[i][scores == np.max(scores)] = 1 self.update_scores(eval_scores, eval_wins) self.enable_training()
def test_play(self): from Environment.Wizard import Wizard seed(2) wiz = Wizard(4) self.assertIsNotNone(wiz.play())