def player_factory(name): """ Returns a new player object of the given type """ players = { "RANDOM": RandomPlayer(), "GREEDY": GreedyPlayer(), "MINIMAX": MinimaxPlayer(), "EXPECTIMAX": ExpectimaxPlayer(), "IDS": IterativeDeepeningPlayer(), "ALPHABETA": AlphaBetaPlayer(), "HEURISTIC": HeuristicPlayer(), "MINIMAX3": MinimaxPlayer(3), "MINIMAX1": MinimaxPlayer(1) } return players[name]
def __init__(self, saved_weights=None): """ Initialize Attributes. """ # board attributes self.size = 5 # board (go) size self.go = GO(self.size) # initialize the board (go) # training params # mine, adjust this manually to set training process # -------------------------------------------------------------------- self.R = 200 # num of simulations (rollouts) for each move self.check_freq = 50 # the frequency to check performance self.game_batch_num = 5000 # total batch number of selfplays self.test_num = 50 # test games number # -------------------------------------------------------------------- # github preset, do not change self.lr = 2e-3 # learning rate of the whole process self.lr_coef = 1.0 # adaptively adjust lr based on KL self.temp = 1.0 # the temperature param controlling exploration self.C = 5 # hyperparameter controls the weight of prior probs self.buffer_size = 10000 # buffer size for data retrieving self.batch_size = 512 # mini-batch size for training self.data_buffer = deque(maxlen=self.buffer_size) # data buffer self.play_batch_size = 1 # batch size for playing each time self.epochs = 5 # num of train_steps for each update self.kl_targ = 0.02 # kl target self.best_win_ratio = 0.0 # best_win_ratio to compare models # set policy-value net self.pv_net = PolicyValueNet(self.size, saved_weights) # set my player self.mcts_player = MyPlayer(self.pv_net.policy, c=self.C, r=self.R, is_selfplay=True, is_train=True) # set opponent players to evaluate performance self.op_player_n = 0 self.op_players = [ RandomPlayer(), GreedyPlayer(), AggressivePlayer(), SmartPlayer() ]
type=int, help="playing times.", default=1) args = parser.parse_args() n = args.size times = args.times players = [args.player1, args.player2] player_objs = [] for player in players: if player.lower() == 'random': player_objs.append(RandomPlayer()) elif player.lower() == 'manual': player_objs.append(ManualPlayer()) elif player.lower() == 'greedy': player_objs.append(GreedyPlayer()) elif player.lower() == 'aggressive': player_objs.append(AggressivePlayer()) elif player.lower() == 'smart': player_objs.append(SmartPlayer()) elif player.lower() == 'my': player_objs.append(MyPlayer()) else: print( 'Wrong player type. Options: manual, random, greedy, aggressive, smart, my' ) sys.exit() start = timeit.default_timer() black_wins, white_wins = play_multiple(player_objs[0], player_objs[1],
def game_ends(self, win, lose, self_win=False, drain=False): Player.game_ends(self, win, lose, self_win, drain) if lose: training = self._prev_state is not None and self._mode == TRAIN if training: if self_win: final_reward = LOSE_REWARD / 2.0 else: final_reward = LOSE_REWARD # print("Lose", final_reward, self._prev_action, "\n", # self._prev_state.get(), "\n", self.game_state.get(), "\n") self._dqn_model.notify_reward(final_reward) self._dqn_model.append_memory_and_train( self._prev_state, self._prev_action, final_reward, self.game_state, True) # Summary. if drain: self._drain_rounds += 1 summary_dict = { "Win rate": self.rounds_won * 1.0 / self._total_rounds, "Lose rate": self.rounds_lost * 1.0 / self._total_rounds, "Drain rate": self._drain_rounds * 1.0 / self._total_rounds } if self._mode == TRAIN and self._evaluate: if self._total_rounds % 6000 == 1: opponent = GreedyPlayer("Greedy BOT") eval_player = FullDQNPlayer("Full DQN BOT", MUTE, log_game_state=True) game = Game(100, [opponent, eval_player], win_on_discard=True, disclose_all=False) game.play() summary_dict["Eval win rate"] = eval_player.rounds_won summary_dict["Eval lose rate"] = eval_player.rounds_lost self._dqn_model.episode_finished(summary_dict) if self._mode == PLAY: print(self.name + ":") if win: print("Won!") elif self._mode == EVAL: print(self.name + ":") print( "Win rate:", str(self.rounds_won * 100.0 / self._total_rounds) + "%, Lose rate:", str(self.rounds_lost * 100.0 / self._total_rounds) + "%") elif self._mode == DEBUG: if win: if self_win: print(self.name, "self win!") else: print(self.name, "win on discard!") elif lose: if self_win: print(self.name, "lose, opponent self win.") else: print(self.name, "lose, opponent win on this discard.") else: print("Tile stack drained.")