Example #1
0
def player_factory(name):
    """
    Returns a new player object of the given type
    """
    players = {
        "RANDOM": RandomPlayer(),
        "GREEDY": GreedyPlayer(),
        "MINIMAX": MinimaxPlayer(),
        "EXPECTIMAX": ExpectimaxPlayer(),
        "IDS": IterativeDeepeningPlayer(),
        "ALPHABETA": AlphaBetaPlayer(),
        "HEURISTIC": HeuristicPlayer(),
        "MINIMAX3": MinimaxPlayer(3),
        "MINIMAX1": MinimaxPlayer(1)
    }
    return players[name]
Example #2
0
 def __init__(self, saved_weights=None):
     """ Initialize Attributes. """
     # board attributes
     self.size = 5  # board (go) size
     self.go = GO(self.size)  # initialize the board (go)
     # training params
     # mine, adjust this manually to set training process
     # --------------------------------------------------------------------
     self.R = 200  # num of simulations (rollouts) for each move
     self.check_freq = 50  # the frequency to check performance
     self.game_batch_num = 5000  # total batch number of selfplays
     self.test_num = 50  # test games number
     # --------------------------------------------------------------------
     # github preset, do not change
     self.lr = 2e-3  # learning rate of the whole process
     self.lr_coef = 1.0  # adaptively adjust lr based on KL
     self.temp = 1.0  # the temperature param controlling exploration
     self.C = 5  # hyperparameter controls the weight of prior probs
     self.buffer_size = 10000  # buffer size for data retrieving
     self.batch_size = 512  # mini-batch size for training
     self.data_buffer = deque(maxlen=self.buffer_size)  # data buffer
     self.play_batch_size = 1  # batch size for playing each time
     self.epochs = 5  # num of train_steps for each update
     self.kl_targ = 0.02  # kl target
     self.best_win_ratio = 0.0  # best_win_ratio to compare models
     # set policy-value net
     self.pv_net = PolicyValueNet(self.size, saved_weights)
     # set my player
     self.mcts_player = MyPlayer(self.pv_net.policy,
                                 c=self.C,
                                 r=self.R,
                                 is_selfplay=True,
                                 is_train=True)
     # set opponent players to evaluate performance
     self.op_player_n = 0
     self.op_players = [
         RandomPlayer(),
         GreedyPlayer(),
         AggressivePlayer(),
         SmartPlayer()
     ]
                        type=int,
                        help="playing times.",
                        default=1)
    args = parser.parse_args()

    n = args.size
    times = args.times
    players = [args.player1, args.player2]
    player_objs = []
    for player in players:
        if player.lower() == 'random':
            player_objs.append(RandomPlayer())
        elif player.lower() == 'manual':
            player_objs.append(ManualPlayer())
        elif player.lower() == 'greedy':
            player_objs.append(GreedyPlayer())
        elif player.lower() == 'aggressive':
            player_objs.append(AggressivePlayer())
        elif player.lower() == 'smart':
            player_objs.append(SmartPlayer())
        elif player.lower() == 'my':
            player_objs.append(MyPlayer())
        else:
            print(
                'Wrong player type. Options: manual, random, greedy, aggressive, smart, my'
            )
            sys.exit()

    start = timeit.default_timer()

    black_wins, white_wins = play_multiple(player_objs[0], player_objs[1],
Example #4
0
    def game_ends(self, win, lose, self_win=False, drain=False):
        Player.game_ends(self, win, lose, self_win, drain)

        if lose:
            training = self._prev_state is not None and self._mode == TRAIN
            if training:
                if self_win:
                    final_reward = LOSE_REWARD / 2.0
                else:
                    final_reward = LOSE_REWARD
                # print("Lose", final_reward, self._prev_action, "\n",
                #       self._prev_state.get(), "\n", self.game_state.get(), "\n")
                self._dqn_model.notify_reward(final_reward)
                self._dqn_model.append_memory_and_train(
                    self._prev_state, self._prev_action, final_reward,
                    self.game_state, True)

        # Summary.
        if drain:
            self._drain_rounds += 1

        summary_dict = {
            "Win rate": self.rounds_won * 1.0 / self._total_rounds,
            "Lose rate": self.rounds_lost * 1.0 / self._total_rounds,
            "Drain rate": self._drain_rounds * 1.0 / self._total_rounds
        }
        if self._mode == TRAIN and self._evaluate:
            if self._total_rounds % 6000 == 1:
                opponent = GreedyPlayer("Greedy BOT")
                eval_player = FullDQNPlayer("Full DQN BOT",
                                            MUTE,
                                            log_game_state=True)

                game = Game(100, [opponent, eval_player],
                            win_on_discard=True,
                            disclose_all=False)
                game.play()

                summary_dict["Eval win rate"] = eval_player.rounds_won
                summary_dict["Eval lose rate"] = eval_player.rounds_lost

        self._dqn_model.episode_finished(summary_dict)

        if self._mode == PLAY:
            print(self.name + ":")
            if win:
                print("Won!")
        elif self._mode == EVAL:
            print(self.name + ":")
            print(
                "Win rate:",
                str(self.rounds_won * 100.0 / self._total_rounds) +
                "%, Lose rate:",
                str(self.rounds_lost * 100.0 / self._total_rounds) + "%")
        elif self._mode == DEBUG:
            if win:
                if self_win:
                    print(self.name, "self win!")
                else:
                    print(self.name, "win on discard!")
            elif lose:
                if self_win:
                    print(self.name, "lose, opponent self win.")
                else:
                    print(self.name, "lose, opponent win on this discard.")
            else:
                print("Tile stack drained.")