Esempio n. 1
0
    def __init__(self, T: int, n: int, tau: float, rollout: Rollout,
                 eps: float):
        # initialize game config
        self.game_config = GameConfig(StartingSplit.StartingRandomSplit,
                                      prosperity=False,
                                      num_players=1,
                                      sandbox=True)
        self.supply = Supply(self.game_config)

        self.game = None
        # max number of turns in a game
        self.T = T
        self.expanded = False
        self.rollout_model = rollout
        self.data = MCTSData()
        self.player = None
        self.iter = 0
        self.iters = n

        if self.rollout_model == Rollout.Random:
            self.rollout = RandomRollout()
        elif rollout == Rollout.HistoryHeuristic:
            self.rollout_cards = []
            self.rollout = HistoryHeuristicRollout(tau=tau, train=True)
        elif rollout == Rollout.LinearRegression:
            self.rollout = LinearRegressionRollout(self.iters,
                                                   self.supply,
                                                   tau=tau,
                                                   train=True,
                                                   eps=eps)
        self.player = MCTSPlayer(rollout=self.rollout, train=True)
def main(args):
    config = GameConfig(num_players=2,
                        sandbox=args.sandbox,
                        feature_type=args.ftype,
                        device=args.device)

    tree = GameTree(train=True)

    D_in = config.feature_size
    H = (config.feature_size + 1) // 2

    player = MCTSPlayer(rollout=init_rollouts(args.rollout, D_in=D_in, H=H)[0],
                        tree=tree)

    players = [player, player]

    env = DefaultEnvironment(config, players)

    train_mcts(env,
               tree,
               args.n,
               save_epochs=args.save_epochs,
               train_epochs=args.train_epochs,
               train_epochs_interval=args.train_epochs_interval,
               path=args.path,
               rollout_path=args.rollout_path,
               capacity=args.buffer_cap)
Esempio n. 3
0
def run():
    parse = argparse.ArgumentParser(description="gomoku program")
    parse.add_argument("player1",
                       type=int,
                       choices=[1, 2, 3, 4],
                       help="1.Human; 2.MCTS; 3.Random; 4.Expert")

    parse.add_argument("player2",
                       type=int,
                       choices=[1, 2, 3, 4],
                       help="1.Human; 2.MCTS; 3.Random; 4.Expert")

    parse.add_argument("--size",
                       type=int,
                       default=8,
                       help="The Board size,default is 8*8 ")

    parse.add_argument("--simulate_time",
                       type=int,
                       default=2,
                       help="The MCTS playout simulation time,default is 2s ")

    args = parse.parse_args()
    chess = Gomoku(board_size=args.size)
    p1 = {
        1: HumanPlayer(chess),
        2: MCTSPlayer(chess, simulate_time=args.simulate_time),
        3: RandomPlayer(chess),
        4: ExpertPlayer(chess)
    }

    p2 = {
        1: HumanPlayer(chess),
        2: MCTSPlayer(chess, simulate_time=args.simulate_time),
        3: RandomPlayer(chess),
        4: ExpertPlayer(chess)
    }

    chess.play(p1[args.player1], p2[args.player2], isShow=True)
Esempio n. 4
0
 def __init__(self, game_state, policy_value_net):
     self.state = game_state
     self.figures = self.init_figures()
     self.buffer_value = []
     self.policy_value_net = policy_value_net
     self.p1 = MCTSPlayer(self.state,
                          'p1',
                          self.policy_value_net.policy_value_fn,
                          n_playout=100,
                          is_selfplay=1)  #用于训练是selfplay
     self.p2 = MCTSPlayer(self.state,
                          'p2',
                          self.policy_value_net.policy_value_fn,
                          n_playout=1000,
                          is_selfplay=0)  #用于真正自己玩
     self.human = Human_Player('human')
     self.random_player = Player('random')
     self.pure_tree_playre = Pure_MCTS_Player(
         self.state,
         'pure_tree',
         self.policy_value_net.policy_value_fn,
         n_playout=1000,
         is_selfplay=0)
Esempio n. 5
0
 def construct_player_model(self, player_model_str):
     if player_model_str == 'random':
         return RandomPlayer(draft=self)
     elif player_model_str.startswith('mcts'):
         max_iters, c = parse_mcts_maxiter_c(player_model_str)
         return MCTSPlayer(name=player_model_str,
                           draft=self,
                           maxiters=max_iters,
                           c=c)
     elif player_model_str == 'assocrule':
         return AssocRulePlayer(draft=self)
     elif player_model_str == 'hwr':
         return HighestWinRatePlayer(draft=self)
     else:
         raise NotImplementedError
Esempio n. 6
0
def generate_model(choice, param):
    if choice == 'player':
        from player import Player
        return Player(**param)
    elif choice == 'random':
        from player import RandomBot
        return RandomBot(**param)
    elif choice == 'mcts':
        from player import MCTSPlayer
        return MCTSPlayer(name=param['name'],
                          c_puct=5,
                          n_playout=1000,
                          max_step=1000)
    else:
        from player import MyPolicy
        return MyPolicy(**param)
Esempio n. 7
0
 def policy_evaluate(self,
                     n_playout_ai=400,
                     n_playout_mcts=100,
                     n_games=10):
     """
     策略胜率评估:模型与纯MCTS玩家对战n局看胜率
         n_playout_ai    ai预测每个action的mcts模拟次数
         n_playout_mcts  纯mcts随机走子时每个action的mcts模拟步数
         n_games         策略评估胜率时的模拟对局次数
     """
     logging.info("__policy_evaluate__")
     # ai玩家(使用策略价值网络来指导树搜索和评估叶节点)
     ai_player = AIPlayer(self.policy_value_net.policy_value_fn,
                          n_playout=n_playout_ai)
     # 纯mcts玩家
     mcts_player = MCTSPlayer(n_playout=n_playout_mcts)
     win_cnt = {'ai': 0, 'mcts': 0, 'tie': 0}
     for i in range(n_games):  # 对战
         if i % 2 == 0:  # ai first
             logging.info("policy evaluate start: {}, ai use W".format(i +
                                                                       1))
             winner = self.game.start_play(ai_player, mcts_player)
             if winner == 0:
                 win_cnt['ai'] += 1
             elif winner == 1:
                 win_cnt['mcts'] += 1
             else:
                 win_cnt['tie'] += 1
         else:  # mcts first
             logging.info("policy evaluate start: {}, ai use B".format(i +
                                                                       1))
             winner = self.game.start_play(mcts_player, ai_player)
             if winner == 0:
                 win_cnt['mcts'] += 1
             elif winner == 1:
                 win_cnt['ai'] += 1
             else:
                 win_cnt['tie'] += 1
         # win_cnt[winner] += 1
         logging.info("policy evaluate res: {},{}".format(i + 1, win_cnt))
     # 胜率
     win_ratio = 1.0 * (win_cnt['ai'] + 0.5 * win_cnt['tie']) / n_games
     logging.info(
         "evaluate n_playout_mcts:{}, win: {}, lose: {}, tie:{}".format(
             n_playout_mcts, win_cnt['ai'], win_cnt['mcts'],
             win_cnt['tie']))
     return win_ratio
Esempio n. 8
0
    # # print(g.get_parents_to_root(">10"))

    # keras.backend.clear_session()

    # m = MCTSPlayer(numplayouts = 20, movetime = 10, ep = 1.4142135623730950488)

    # m = MinimaxPlayer(ev=None, depth=9)
    # t = TicTacToe(r, m, verbose = True)
    # print(t.play())
    m = HumanPlayer()

    dic = {}


    unit = 1.414
    r = MCTSPlayer()

    match = Match(r, m, True)
    res = match.play(10)
    dic[unit] = res
    
    print(dic)

    # m.startGame()
    # b = Board()
    # b.pushMove(0)
    # b.pushMove(2)
    # b.pushMove(3)
    # b.pushMove(4)
    # b.pushMove(6)
    # print(m.board_already_in_gt(b))
Esempio n. 9
0
class Blackjack():
    def __init__(self, game_state, policy_value_net):
        self.state = game_state
        self.figures = self.init_figures()
        self.buffer_value = []
        self.policy_value_net = policy_value_net
        self.p1 = MCTSPlayer(self.state,
                             'p1',
                             self.policy_value_net.policy_value_fn,
                             n_playout=100,
                             is_selfplay=1)  #用于训练是selfplay
        self.p2 = MCTSPlayer(self.state,
                             'p2',
                             self.policy_value_net.policy_value_fn,
                             n_playout=1000,
                             is_selfplay=0)  #用于真正自己玩
        self.human = Human_Player('human')
        self.random_player = Player('random')
        self.pure_tree_playre = Pure_MCTS_Player(
            self.state,
            'pure_tree',
            self.policy_value_net.policy_value_fn,
            n_playout=1000,
            is_selfplay=0)

    # 初始化数字池和可用数字
    def init_figures(self):
        figures = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
        for _ in range(2):
            figures.append(random.randint(1, 10))
        self.figures = np.array(figures)
        self.availabel_figures = np.ones(12)
        self.state.update_current_state(self.figures,
                                        self.availabel_figures,
                                        p1_num=False,
                                        p2_num=False,
                                        p1_choi=-1,
                                        p2_choi=-1)

    # 初始化玩家的数字
    def init_player_figures(self):
        a = random.randint(15, 21)
        b = random.randint(19, 27)
        # 两者相加不能大于21*2
        if a + b >= 21 * 2:
            a = a - (a + b - 21 * 2) / 2
            b = b - (a + b - 21 * 2) / 2
            a = int(a) - 1
            b = int(b) - 1
        # 两者相加要为奇数
        if (a + b) % 2 == 0:
            b = b - 1
        # 随机P1和P2
        if random.random() >= .5:
            self.p1_num = a
            self.p2_num = b
        else:
            self.p1_num = b
            self.p2_num = a
        self.state.current_state[2] = self.p1_num
        self.state.current_state[3] = self.p2_num
        return self.p1_num, self.p2_num

    def who_first(self):
        if self.p1.num >= self.p2.num:
            return self.p1, self.p2
        else:
            return self.p2, self.p1

    def get_winner(self):
        if count_one(self.state.current_state[1], 1) <= 2:
            if self.state.current_state[2][
                    0] <= 21 and self.state.current_state[3][0] <= 21:
                if self.state.current_state[2][0] >= self.state.current_state[
                        3][0]:
                    winner = 0
                else:
                    winner = 1
            elif self.state.current_state[3][0] > 21:
                winner = 0
            else:
                winner = 1
        return winner

    # 开始游戏(和纯树玩家玩)
    def start_game(self):
        print('=========START GAME==========')
        self.init_figures()  # 初始化数字池
        self.init_player_figures()  # 初始化双方数字
        self.state.save_current_state()

        #print(self.state.current_state)
        for i in range(5):
            #=====打印状态=====
            print('********ROUND  %i*********' % (i + 1))
            print(self.state.current_state[0])
            print(self.state.current_state[1])
            print(self.state.current_state[2])
            print(self.state.current_state[3])

            if self.state.current_state[2][0] > self.state.current_state[3][
                    0]:  #如果第3行大于第4行,就纯树先走。
                act, num = self.pure_tree_playre.get_action(
                    self.state.current_state)  # 纯树玩家选择
                #act, num = self.random_player.get_action(self.state.current_state)     # 随机玩家选择
                self.state.do_move(act)
                print('PTreePlayer Selcet No.%i  fig: %i ' %
                      (act, self.state.current_state[0][act - 1]))

                act_2nd, num_2nd = self.p2.get_action(
                    self.state.current_state)  # MCTS玩家选择
                self.state.do_move(act_2nd)
                print('MCTSPlayer  Selcet No.%i  fig: %i ' %
                      (act_2nd, self.state.current_state[0][act_2nd - 1]))
            else:
                act_2nd, num_2nd = self.p2.get_action(
                    self.state.current_state)  # MCTS玩家选择
                self.state.do_move(act_2nd)
                print('MCTSPlayer  Selcet No.%i  fig: %i ' %
                      (act_2nd, self.state.current_state[0][act_2nd - 1]))
                act, num = self.pure_tree_playre.get_action(
                    self.state.current_state)  # 纯树玩家选择
                #act, num = self.random_player.get_action(self.state.current_state)
                self.state.do_move(act)
                print('PTreePlayer Selcet No.%i  fig: %i ' %
                      (act, self.state.current_state[0][act - 1]))

        if count_one(self.state.current_state[1], 1) <= 2:  # 判断是否已经结束
            if self.state.current_state[2][
                    0] <= 21 and self.state.current_state[3][
                        0] <= 21:  #如果两者都小于21,那么大的一方获胜
                if self.state.current_state[2][0] >= self.state.current_state[
                        3][0]:
                    winner = 0  # 纯树
                else:
                    winner = 1  # MCTS
            elif self.state.current_state[3][0] > 21:
                winner = 0  # 纯树
            else:
                winner = 1  # 纯树

        return winner

    # 开始游戏(和真人玩家玩)
    def start_game_human(self):
        print('=========START GAME==========')
        self.init_figures()
        self.init_player_figures()
        self.state.save_current_state()

        #print(self.state.current_state)
        for i in range(5):
            print('********ROUND  %i*********' % (i + 1))
            num_list = []
            for i in range(12):
                if self.state.current_state[1][i] == 1:
                    num_list.append(int(self.state.current_state[0][i]))
                else:
                    num_list.append(0)
            print('数字列表:   : ', num_list)
            print('行动列表:   : ', list(range(1, 13)))
            print('your number: ', self.state.current_state[2][0])
            print('oppe number: ', self.state.current_state[3][0])
            if self.state.current_state[2][0] > self.state.current_state[3][
                    0]:  #p1先手,p1是random 或者是玩家
                act, num = self.human.get_action(self.state.current_state)
                self.state.do_move(act)
                print('你的选择:[%i]  数字: [%i] ' %
                      (act + 1, self.state.current_state[0][act]))

                act_2nd, num_2nd = self.p2.get_action(self.state.current_state)
                self.state.do_move(act_2nd)
                print('对手选择:[%i]  数字: [%i] ' %
                      (act_2nd + 1, self.state.current_state[0][act_2nd]))
            else:
                act_2nd, num_2nd = self.p2.get_action(self.state.current_state)
                self.state.do_move(act_2nd)
                print('对手选择:[%i]  数字: [%i] ' %
                      (act_2nd + 1, self.state.current_state[0][act_2nd]))
                act, num = self.human.get_action(self.state.current_state)
                self.state.do_move(act)
                print('你的选择:[%i]  数字: [%i] ' %
                      (act + 1, self.state.current_state[0][act]))

        if count_one(self.state.current_state[1], 1) <= 2:
            if self.state.current_state[2][
                    0] <= 21 and self.state.current_state[3][0] <= 21:
                if self.state.current_state[2][0] >= self.state.current_state[
                        3][0]:
                    winner = 0
                else:
                    winner = 1
            elif self.state.current_state[3][0] > 21:
                winner = 0
            else:
                winner = 1

        return winner

    def start_self_play(self):

        states, mcts_probs, current_players, buffer_value = [], [], [], []
        run_down_list = []

        self.init_figures()  # 初始化数字公共数字
        self.init_player_figures()  # 初始化玩家自己的数字
        self.state.save_current_state()  # 保存到current_state

        #=====start a selfplay game=======
        for _ in range(5):  # 进行5轮游戏
            #通过state判断谁先手。
            if self.state.current_state[2][0] > self.state.current_state[3][0]:
                #run_down_list主要记录哪个player先手
                run_down_list.append(0)
                run_down_list.append(1)
            else:
                run_down_list.append(1)
                run_down_list.append(0)

            #【敲黑板】选择1个动作。这个动作的选择,是根据MCTS模拟获得的。
            act1, act1_porbs = self.p1.get_action(self.state.current_state)
            self.state.do_move(act1)  # 执行动作,并进入下一个state
            states.append((copy.copy(self.state.current_state)).reshape(
                -1, 6, 12, 1).astype('float32'))  #加入到states保存,等会拿来训练网络
            mcts_probs.append(np.array(act1_porbs).astype(
                'float32'))  #把act1_porbs保存,等会拿来训练网络

            #print('======change player========')
            act2, act2_porbs = self.p1.get_action(self.state.current_state)
            self.state.do_move(act2)
            states.append((copy.copy(self.state.current_state)).reshape(
                -1, 6, 12, 1).astype('float32'))
            mcts_probs.append(np.array(act2_porbs).astype('float32'))

        # 经过5轮之后,计算winner
        winner = self.get_winner()
        if winner == 0:
            print('winner: p1')
        else:
            print('winner: p2')

        # 根据胜负,放入到最后
        for p in run_down_list:
            if p != winner:
                #if p == winner:
                buffer_value.append(np.ones(12).astype('float32'))
            else:
                buffer_value.append((np.ones(12) * (-1)).astype('float32'))

        self.p1.reset_player()
        self.p2.reset_player()

        #把state,动作概率,结果返回。
        return zip(states, mcts_probs, buffer_value)
Esempio n. 10
0
class MCTS:
    def __init__(self, T: int, n: int, tau: float, rollout: Rollout,
                 eps: float):
        # initialize game config
        self.game_config = GameConfig(StartingSplit.StartingRandomSplit,
                                      prosperity=False,
                                      num_players=1,
                                      sandbox=True)
        self.supply = Supply(self.game_config)

        self.game = None
        # max number of turns in a game
        self.T = T
        self.expanded = False
        self.rollout_model = rollout
        self.data = MCTSData()
        self.player = None
        self.iter = 0
        self.iters = n

        if self.rollout_model == Rollout.Random:
            self.rollout = RandomRollout()
        elif rollout == Rollout.HistoryHeuristic:
            self.rollout_cards = []
            self.rollout = HistoryHeuristicRollout(tau=tau, train=True)
        elif rollout == Rollout.LinearRegression:
            self.rollout = LinearRegressionRollout(self.iters,
                                                   self.supply,
                                                   tau=tau,
                                                   train=True,
                                                   eps=eps)
        self.player = MCTSPlayer(rollout=self.rollout, train=True)

    def run(self):
        s = self.game.state
        d: DecisionState = s.decision
        tree_score = 0
        # run the game up to game end or turn limit reached
        while d.type != DecisionType.DecisionGameOver and s.player_states[
                0]._turns < self.T:
            if d.text:
                logging.info(d.text)
            response = DecisionResponse([])
            player = self.game.players[d.controlling_player]
            next_node = player.controller.makeDecision(s, response)

            if s.phase == Phase.BuyPhase:
                # apply selection until leaf node is reached
                if next_node:
                    assert next_node == self.player.node
                    self.player.node.n += 1
                elif not self.expanded:
                    # expand one node
                    cards = list(
                        filter(lambda x: not isinstance(x, Curse),
                               d.card_choices + [None]))
                    self.player.node.add_unique_children(cards)
                    self.expanded = True
                    self.player.node = self.player.node.get_child_node(
                        response.single_card)
                    self.player.node.n += 1
                    # Uncomment to track UCT score within the tree
                    tree_score = self.game.get_player_scores()[0]
                    self.data.update_split_scores(tree_score, False, self.iter)
                elif self.rollout_model == Rollout.HistoryHeuristic:
                    self.rollout_cards.append(response.single_card)

            s.process_decision(response)
            s.advance_next_decision()

        score = self.game.get_player_scores()[0]
        # update data
        self.data.update_split_scores(score - tree_score, True, self.iter)

        # backpropagate
        delta = score
        self.player.node.v += delta
        self.player.node = self.player.node.parent
        while self.player.node != self.player.root:
            self.player.node.update_v(lambda x: sum(x) / len(x))
            self.player.node = self.player.node.parent

        # update history heuristic
        if self.rollout_model == Rollout.HistoryHeuristic:
            self.rollout.update(cards=self.rollout_cards, score=score)
        elif self.rollout_model == Rollout.LinearRegression:
            counts = self.game.state.get_card_counts(0)
            self.rollout.update(counts=counts, score=score, i=self.iter)

        return self.game.get_player_scores()[0]

    def reset(self, i: int):
        self.expanded = False
        self.rollout_cards = []
        self.iter = i
        self.game_config = GameConfig(StartingSplit.StartingRandomSplit,
                                      prosperity=False,
                                      num_players=1,
                                      sandbox=True)
        self.game = Game(self.game_config, [self.player])
        self.game.new_game()
        self.game.state.advance_next_decision()

        self.player.reset(self.game.state.player_states[0])

    def train(self,
              n: int,
              output_iters: int,
              save_model=False,
              model_dir=model_dir,
              model_name='mcts',
              save_data=False,
              data_dir=data_dir,
              data_name='data'):

        avg = 0
        for i in tqdm(range(n)):
            # initialize new game
            self.reset(i)
            self.run()
            self.data.update(self.game, self.player, i)

            avg = sum(self.data.scores) / (i + 1)

            if i > 0 and i % output_iters == 0:
                print(
                    f'Last {output_iters} avg: {sum(self.data.scores[i-output_iters:i]) / output_iters}'
                )
                print(f'Total {i} avg: {avg}')

        if save_model:
            save(os.path.join(model_dir, model_name), self.player.root)
            save(os.path.join(model_dir, f'{model_name}_rollout'),
                 self.rollout)
        if save_data:
            self.data.update_dataframes()
            self.data.augment_avg_scores(100)
            save(os.path.join(data_dir, data_name), self.data)
Esempio n. 11
0
    def start_infer(self,
                    vs_type='human-vs-ai',
                    n_playout=400,
                    best_model=None):
        """
        启动对战
        Params:
            vs_type         对战类型
            n_playout       ai预测每个action的mcts模拟次数
            best_model      AIPlayer使用的模型
        """
        logging.info("__start_vsplay__")

        # 1.初始化棋盘
        self.board.init_board()
        # 2.初始化棋手
        # 初始化AI棋手
        #from net.policy_value_net_keras import PolicyValueNet  # Keras
        from net.policy_value_net_tensorflow import PolicyValueNet  # Tensorflow
        best_policy = PolicyValueNet(self.board.action_ids_size,
                                     model_file=best_model)
        ai_player = AIPlayer(best_policy.policy_value_fn, n_playout=n_playout)
        # 初始化MCTS棋手
        mcts_player = MCTSPlayer(n_playout=n_playout)
        # 初始化人类棋手,输入移动命令的格式: Nf3
        human_player = HumanPlayer()
        # 初始化MiniMax棋手
        minimax_player = MiniMaxPlayer(depth=4)
        # 初始化stockfish棋手
        stockfish_player = StockfishPlayer()

        # 3.启动游戏
        logging.info("vsplay start: ".format(vs_type))
        if vs_type == 'human-vs-ai':
            self.start_play(human_player, ai_player, vsprint=True)
        elif vs_type == 'human-vs-mcts':
            self.start_play(human_player, mcts_player, vsprint=True)
        elif vs_type == 'human-vs-minimax':
            self.start_play(human_player, minimax_player, vsprint=True)
        elif vs_type == 'human-vs-stockfish':
            self.start_play(human_player, stockfish_player, vsprint=True)
        elif vs_type == 'ai-vs-human':
            self.start_play(ai_player,
                            human_player,
                            vsprint=True,
                            angle_player=Board.BLACK)
        elif vs_type == 'mcts-vs-human':
            self.start_play(mcts_player,
                            human_player,
                            vsprint=True,
                            angle_player=Board.BLACK)
        elif vs_type == 'minimax-vs-human':
            self.start_play(minimax_player,
                            human_player,
                            vsprint=True,
                            angle_player=Board.BLACK)
        elif vs_type == 'stockfish-vs-human':
            self.start_play(stockfish_player,
                            human_player,
                            vsprint=True,
                            angle_player=Board.BLACK)
        else:
            exit("undefind vs-type: ".format(vs_type))