def run_episode(self): examples = [] board = get_random_board() initial_node = MCTSNode(is_initial=True) player_blue = NNPlayer(Color.BLUE, n_simulations=self.n_simulations, current_node=initial_node, janggi_net=self.predictor, temperature_start=1, temperature_threshold=30, temperature_end=0.01) player_red = NNPlayer(Color.RED, n_simulations=self.n_simulations, current_node=initial_node, janggi_net=self.predictor, temperature_start=1, temperature_threshold=30, temperature_end=0.01) game = Game(player_blue, player_red, board) while not game.is_finished(self.iter_max): new_action = game.get_next_action() game.actions.append(new_action) if game.current_player == Color.BLUE: examples.append([ board.get_features(game.current_player, game.round), player_blue.current_node.get_policy(game.current_player), Color.BLUE ]) examples.append([ board.get_features(game.current_player, game.round, data_augmentation=True), player_blue.current_node.get_policy( game.current_player, data_augmentation=True), Color.BLUE ]) else: examples.append([ board.get_features(game.current_player, game.round, data_augmentation=True), player_red.current_node.get_policy(game.current_player, data_augmentation=True), Color.RED ]) examples.append([ board.get_features(game.current_player, game.round), player_red.current_node.get_policy(game.current_player), Color.RED ]) game.board.apply_action(new_action) game.switch_player() game.board.invalidate_action_cache( new_action) # Try to reduce memory usage game.round += 1 winner = game.get_winner() set_winner(examples, winner) return examples
def run_game(board, player_blue, player_red, iter_max): game = Game(player_blue, player_red, board) while not game.is_finished(iter_max): new_action = game.get_next_action() game.actions.append(new_action) game.board.apply_action(new_action) game.switch_player() game.board.invalidate_action_cache( new_action) # Try to reduce memory usage game.round += 1 return game