Exemple #1
0
    def MCTS_search(self,
                    state,
                    history=[],
                    is_root_node=False,
                    real_hist=None) -> float:
        """
        Monte Carlo Tree Search
        """
        while True:
            # logger.debug(f"start MCTS, state = {state}, history = {history}")
            game_over, v, _ = senv.done(state)
            if game_over:
                self.executor.submit(self.update_tree, None, v, history)
                break

            with self.node_lock[state]:
                if state not in self.tree:
                    # Expand and Evaluate
                    self.tree[state].sum_n = 1
                    self.tree[state].legal_moves = senv.get_legal_moves(state)
                    self.tree[state].waiting = True
                    # logger.debug(f"expand_and_evaluate {state}, sum_n = {self.tree[state].sum_n}, history = {history}")
                    if is_root_node and real_hist:
                        self.expand_and_evaluate(state, history, real_hist)
                    else:
                        self.expand_and_evaluate(state, history)
                    break

                if state in history[:-1]:  # loop -> loss
                    # logger.debug(f"loop -> loss, state = {state}, history = {history[:-1]}")
                    self.executor.submit(self.update_tree, None, 0, history)
                    break

                # Select
                node = self.tree[state]
                if node.waiting:
                    node.visit.append(history)
                    # logger.debug(f"wait for prediction state = {state}")
                    break

                sel_action = self.select_action_q_and_u(state, is_root_node)

                virtual_loss = self.config.play.virtual_loss
                self.tree[state].sum_n += 1
                # logger.debug(f"node = {state}, sum_n = {node.sum_n}")

                action_state = self.tree[state].a[sel_action]
                action_state.n += virtual_loss
                action_state.w -= virtual_loss
                action_state.q = action_state.w / action_state.n

                # logger.debug(f"apply virtual_loss = {virtual_loss}, as.n = {action_state.n}, w = {action_state.w}, q = {action_state.q}")

                # if action_state.next is None:
                history.append(sel_action)
                state = senv.step(state, sel_action)
                history.append(state)
    def start_game(self, idx, search_tree):
        pipes = self.cur_pipes.pop()

        if not self.config.play.share_mtcs_info_in_self_play or \
            idx % self.config.play.reset_mtcs_info_per_game == 0:
            search_tree = defaultdict(VisitState)

        if random() > self.config.play.enable_resign_rate:
            enable_resign = True
        else:
            enable_resign = False

        self.player = CChessPlayer(self.config,
                                   search_tree=search_tree,
                                   pipes=pipes,
                                   enable_resign=enable_resign,
                                   debugging=False)

        state = senv.INIT_STATE
        history = [state]
        value = 0
        turns = 0  # even == red; odd == black
        game_over = False
        is_alpha_red = True if idx % 2 == 0 else False
        final_move = None
        check = False

        while not game_over:
            if (is_alpha_red and turns % 2 == 0) or (not is_alpha_red
                                                     and turns % 2 == 1):
                no_act = None
                if not check and state in history[:-1]:
                    no_act = []
                    for i in range(len(history) - 1):
                        if history[i] == state:
                            no_act.append(history[i + 1])
                action, _ = self.player.action(state, turns, no_act)
                if action is None:
                    logger.debug(
                        f"{turns % 2} (0 = red; 1 = black) has resigned!")
                    value = -1
                    break
            else:
                fen = senv.state_to_fen(state, turns)
                action = self.get_ucci_move(fen)
                if action is None:
                    logger.debug(
                        f"{turns % 2} (0 = red; 1 = black) has resigned!")
                    value = -1
                    break
                if turns % 2 == 1:
                    action = flip_move(action)
            history.append(action)
            state = senv.step(state, action)
            turns += 1
            history.append(state)

            if turns / 2 >= self.config.play.max_game_length:
                game_over = True
                value = 0
            else:
                game_over, value, final_move, check = senv.done(
                    state, need_check=True)

        if final_move:
            history.append(final_move)
            state = senv.step(state, final_move)
            history.append(state)
            turns += 1
            value = -value

        self.player.close()
        del search_tree
        del self.player
        gc.collect()
        if turns % 2 == 1:  # balck turn
            value = -value

        v = value
        if turns <= 10:
            if random() > 0.7:
                store = True
            else:
                store = False
        else:
            store = True

        if store:
            data = [history[0]]
            for i in range(turns):
                k = i * 2
                data.append([history[k + 1], value])
                value = -value
            self.save_play_data(idx, data)

        self.cur_pipes.append(pipes)
        self.remove_play_data()
        return v, turns, state, store
def self_play_buffer(config, pipes_bt, pipes_ng, idx,
                     res_data) -> (tuple, list):
    sleep(random())
    playouts = randint(8, 12) * 100
    config.play.simulation_num_per_move = playouts
    logger.info(f"Set playouts = {config.play.simulation_num_per_move}")

    pipe1 = pipes_bt.pop()  # borrow
    pipe2 = pipes_ng.pop()

    player1 = CChessPlayer(config,
                           search_tree=defaultdict(VisitState),
                           pipes=pipe1,
                           enable_resign=False,
                           debugging=False)
    player2 = CChessPlayer(config,
                           search_tree=defaultdict(VisitState),
                           pipes=pipe2,
                           enable_resign=False,
                           debugging=False)

    # even: bst = red, ng = black; odd: bst = black, ng = red
    if idx % 2 == 0:
        red = player1
        black = player2
        print(f"基准模型执红,待评测模型执黑")
    else:
        red = player2
        black = player1
        print(f"待评测模型执红,基准模型执黑")

    state = senv.INIT_STATE
    history = [state]
    # policys = []
    value = 0
    turns = 0
    game_over = False
    final_move = None
    no_eat_count = 0
    check = False

    while not game_over:
        no_act = None
        if not check and state in history[:-1]:
            no_act = []
            free_move = defaultdict(int)
            for i in range(len(history) - 1):
                if history[i] == state:
                    # 如果走了下一步是将军或捉:禁止走那步
                    if senv.will_check_or_catch(state, history[i + 1]):
                        no_act.append(history[i + 1])
                    # 否则当作闲着处理
                    else:
                        free_move[state] += 1
                        if free_move[state] >= 2:
                            # 作和棋处理
                            game_over = True
                            value = 0
                            logger.info("闲着循环三次,作和棋处理")
                            break
        if game_over:
            break
        start_time = time()
        if turns % 2 == 0:
            action, _ = red.action(state, turns, no_act=no_act)
        else:
            action, _ = black.action(state, turns, no_act=no_act)
        end_time = time()
        if action is None:
            print(f"{turns % 2} (0 = 红; 1 = 黑) 投降了!")
            value = -1
            break
        print(
            f"博弈中: 回合{turns / 2 + 1} {'红方走棋' if turns % 2 == 0 else '黑方走棋'}, 着法: {action}, 用时: {(end_time - start_time):.1f}s"
        )
        # policys.append(policy)
        history.append(action)
        try:
            state, no_eat = senv.new_step(state, action)
        except Exception as e:
            logger.error(f"{e}, no_act = {no_act}, policy = {policy}")
            value = 0
            break
        turns += 1
        if no_eat:
            no_eat_count += 1
        else:
            no_eat_count = 0
        history.append(state)

        if no_eat_count >= 120 or turns / 2 >= config.play.max_game_length:
            game_over = True
            value = 0
        else:
            game_over, value, final_move, check = senv.done(state,
                                                            need_check=True)
            if not game_over:
                if not senv.has_attack_chessman(state):
                    logger.info(f"双方无进攻子力,作和。state = {state}")
                    game_over = True
                    value = 0

    if final_move:
        history.append(final_move)
        state = senv.step(state, final_move)
        turns += 1
        value = -value
        history.append(state)

    data = []
    if idx % 2 == 0:
        data = [res_data['base']['digest'], res_data['unchecked']['digest']]
    else:
        data = [res_data['unchecked']['digest'], res_data['base']['digest']]
    player1.close()
    player2.close()
    del player1, player2
    gc.collect()

    if turns % 2 == 1:  # balck turn
        value = -value

    v = value
    data.append(history[0])
    for i in range(turns):
        k = i * 2
        data.append([history[k + 1], value])
        value = -value

    pipes_bt.append(pipe1)
    pipes_ng.append(pipe2)
    return (turns, v, idx), data
Exemple #4
0
    def start_game(self, idx, search_tree):
        pipes = self.cur_pipes.pop()

        if not self.config.play.share_mtcs_info_in_self_play or \
            idx % self.config.play.reset_mtcs_info_per_game == 0:
            search_tree = defaultdict(VisitState)

        if random() > self.config.play.enable_resign_rate:
            enable_resign = True
        else:
            enable_resign = False

        self.player = CChessPlayer(self.config, search_tree=search_tree, pipes=pipes, enable_resign=enable_resign, debugging=False)

        state = senv.INIT_STATE
        history = [state]
        # policys = [] 
        value = 0
        turns = 0       # even == red; odd == black
        game_over = False
        final_move = None
        no_eat_count = 0
        check = False

        while not game_over:
            no_act = None
            if not check and state in history[:-1]:
                no_act = []
                free_move = defaultdict(int)
                for i in range(len(history) - 1):
                    if history[i] == state:
                        # 如果走了下一步是将军或捉:禁止走那步
                        if senv.will_check_or_catch(state, history[i+1]):
                            no_act.append(history[i + 1])
                        # 否则当作闲着处理
                        else:
                            free_move[state] += 1
                            if free_move[state] >= 2:
                                # 作和棋处理
                                game_over = True
                                value = 0
                                logger.info("闲着循环三次,作和棋处理")
                                break
            if game_over:
                break
            start_time = time()
            action, policy = self.player.action(state, turns, no_act)
            end_time = time()
            if action is None:
                logger.debug(f"{turns % 2} (0 = red; 1 = black) has resigned!")
                value = -1
                break
            if self.config.opts.log_move:
                logger.info(f"Process{self.pid} Playing: {turns % 2}, action: {action}, time: {(end_time - start_time):.1f}s")
            # logger.info(f"Process{self.pid} Playing: {turns % 2}, action: {action}, time: {(end_time - start_time):.1f}s")
            # for move, action_state in self.player.search_results.items():
            #     if action_state[0] >= 20:
            #         logger.info(f"move: {move}, prob: {action_state[0]}, Q_value: {action_state[1]:.2f}, Prior: {action_state[2]:.3f}")
            # self.player.search_results = {}
            history.append(action)
            # policys.append(policy)
            try:
                state, no_eat = senv.new_step(state, action)
            except Exception as e:
                logger.error(f"{e}, no_act = {no_act}, policy = {policy}")
                game_over = True
                value = 0
                break
            turns += 1
            if no_eat:
                no_eat_count += 1
            else:
                no_eat_count = 0
            history.append(state)

            if no_eat_count >= 120 or turns / 2 >= self.config.play.max_game_length:
                game_over = True
                value = 0
            else:
                game_over, value, final_move, check = senv.done(state, need_check=True)
                if not game_over:
                    if not senv.has_attack_chessman(state):
                        logger.info(f"双方无进攻子力,作和。state = {state}")
                        game_over = True
                        value = 0

        if final_move:
            # policy = self.build_policy(final_move, False)
            history.append(final_move)
            # policys.append(policy)
            state = senv.step(state, final_move)
            turns += 1
            value = -value
            history.append(state)

        self.player.close()
        del search_tree
        del self.player
        gc.collect()
        if turns % 2 == 1:  # balck turn
            value = -value

        v = value
        if turns < 10:
            if random() > 0.9:
                store = True
            else:
                store = False
        else:
            store = True

        if store:
            data = [history[0]]
            for i in range(turns):
                k = i * 2
                data.append([history[k + 1], value])
                value = -value
            self.save_play_data(idx, data)

        self.cur_pipes.append(pipes)
        self.remove_play_data()
        return v, turns, state, store
Exemple #5
0
    def start_game(self, idx):
        sleep(random())
        playouts = randint(8, 12) * 100
        self.config.play.simulation_num_per_move = playouts
        logger.info(
            f"Set playouts = {self.config.play.simulation_num_per_move}")

        pipe1 = self.pipes_bt.pop()
        pipe2 = self.pipes_ng.pop()
        search_tree1 = defaultdict(VisitState)
        search_tree2 = defaultdict(VisitState)

        self.player1 = CChessPlayer(self.config,
                                    search_tree=search_tree1,
                                    pipes=pipe1,
                                    debugging=False,
                                    enable_resign=False,
                                    use_history=self.hist_base)
        self.player2 = CChessPlayer(self.config,
                                    search_tree=search_tree2,
                                    pipes=pipe2,
                                    debugging=False,
                                    enable_resign=False,
                                    use_history=self.hist_ng)

        # even: bst = red, ng = black; odd: bst = black, ng = red
        if idx % 2 == 0:
            red = self.player1
            black = self.player2
            logger.info(f"进程id = {self.pid} 基准模型执红,待评测模型执黑")
        else:
            red = self.player2
            black = self.player1
            logger.info(f"进程id = {self.pid} 待评测模型执红,基准模型执黑")

        state = senv.INIT_STATE
        history = [state]
        value = 0  # best model's value
        turns = 0  # even == red; odd == black
        game_over = False
        no_eat_count = 0
        check = False

        while not game_over:
            start_time = time()
            no_act = None
            if not check and state in history[:-1]:
                no_act = []
                free_move = defaultdict(int)
                for i in range(len(history) - 1):
                    if history[i] == state:
                        # 如果走了下一步是将军或捉:禁止走那步
                        if senv.will_check_or_catch(state, history[i + 1]):
                            no_act.append(history[i + 1])
                        # 否则当作闲着处理
                        else:
                            free_move[state] += 1
                            if free_move[state] >= 2:
                                # 作和棋处理
                                game_over = True
                                value = 0
                                logger.info("闲着循环三次,作和棋处理")
                                break
            if game_over:
                break
            if turns % 2 == 0:
                action, _ = red.action(state, turns, no_act=no_act)
            else:
                action, _ = black.action(state, turns, no_act=no_act)
            end_time = time()
            if self.config.opts.log_move:
                logger.debug(
                    f"进程id = {self.pid}, action = {action}, turns = {turns}, time = {(end_time-start_time):.1f}"
                )
            if action is None:
                logger.debug(f"{turns % 2} (0 = red; 1 = black) has resigned!")
                value = -1
                break
            history.append(action)
            state, no_eat = senv.new_step(state, action)
            turns += 1
            if no_eat:
                no_eat_count += 1
            else:
                no_eat_count = 0
            history.append(state)

            if no_eat_count >= 120 or turns / 2 >= self.config.play.max_game_length:
                game_over = True
                value = 0
            else:
                game_over, value, final_move, check = senv.done(
                    state, need_check=True)
                if not game_over:
                    if not senv.has_attack_chessman(state):
                        logger.info(f"双方无进攻子力,作和。state = {state}")
                        game_over = True
                        value = 0

        if final_move:
            history.append(final_move)
            state = senv.step(state, final_move)
            turns += 1
            value = -value
            history.append(state)

        data = []
        if idx % 2 == 0:
            data = [
                self.data['base']['digest'], self.data['unchecked']['digest']
            ]
        else:
            data = [
                self.data['unchecked']['digest'], self.data['base']['digest']
            ]
        self.player1.close()
        self.player2.close()

        if turns % 2 == 1:  # black turn
            value = -value

        v = value
        data.append(history[0])
        for i in range(turns):
            k = i * 2
            data.append([history[k + 1], v])
            v = -v

        self.pipes_bt.append(pipe1)
        self.pipes_ng.append(pipe2)
        return value, turns, data
def self_play_buffer(config, cur) -> (tuple, list):
    pipe = cur.pop()  # borrow

    if random() > config.play.enable_resign_rate:
        enable_resign = True
    else:
        enable_resign = False

    player = CChessPlayer(config,
                          search_tree=defaultdict(VisitState),
                          pipes=pipe,
                          enable_resign=enable_resign,
                          debugging=False)

    state = senv.INIT_STATE
    history = [state]
    # policys = []
    value = 0
    turns = 0
    game_over = False
    final_move = None
    no_eat_count = 0
    check = False

    while not game_over:
        no_act = None
        if not check and state in history[:-1]:
            no_act = []
            free_move = defaultdict(int)
            for i in range(len(history) - 1):
                if history[i] == state:
                    # 如果走了下一步是将军或捉:禁止走那步
                    if senv.will_check_or_catch(state, history[i + 1]):
                        no_act.append(history[i + 1])
                    # 否则当作闲着处理
                    else:
                        free_move[state] += 1
                        if free_move[state] >= 2:
                            # 作和棋处理
                            game_over = True
                            value = 0
                            logger.info("闲着循环三次,作和棋处理")
                            break
        if game_over:
            break
        start_time = time()
        action, policy = player.action(state, turns, no_act)
        end_time = time()
        if action is None:
            print(f"{turns % 2} (0 = 红; 1 = 黑) 投降了!")
            value = -1
            break
        print(
            f"博弈中: 回合{turns / 2 + 1} {'红方走棋' if turns % 2 == 0 else '黑方走棋'}, 着法: {action}, 用时: {(end_time - start_time):.1f}s"
        )
        # policys.append(policy)
        history.append(action)
        try:
            state, no_eat = senv.new_step(state, action)
        except Exception as e:
            logger.error(f"{e}, no_act = {no_act}, policy = {policy}")
            game_over = True
            value = 0
            break
        turns += 1
        if no_eat:
            no_eat_count += 1
        else:
            no_eat_count = 0
        history.append(state)

        if no_eat_count >= 120 or turns / 2 >= config.play.max_game_length:
            game_over = True
            value = 0
        else:
            game_over, value, final_move, check = senv.done(state,
                                                            need_check=True)
            if not game_over:
                if not senv.has_attack_chessman(state):
                    logger.info(f"双方无进攻子力,作和。state = {state}")
                    game_over = True
                    value = 0

    if final_move:
        # policy = build_policy(final_move, False)
        history.append(final_move)
        # policys.append(policy)
        state = senv.step(state, final_move)
        turns += 1
        value = -value
        history.append(state)

    player.close()
    del player
    gc.collect()

    if turns % 2 == 1:  # balck turn
        value = -value

    v = value
    data = [history[0]]
    for i in range(turns):
        k = i * 2
        data.append([history[k + 1], value])
        value = -value

    cur.append(pipe)
    return (turns, v), data
Exemple #7
0
 def ai_move(self):
     ai_move_first = not self.human_move_first
     self.history = [self.env.get_state()]
     no_act = None
     while not self.env.done:
         if ai_move_first == self.env.red_to_move:
             self.ai.search_results = {}
             state = self.env.get_state()
             logger.info(f"state = {state}")
             _, _, _, check = senv.done(state, need_check=True)
             if not check and state in self.history[:-1]:
                 no_act = []
                 free_move = defaultdict(int)
                 for i in range(len(self.history) - 1):
                     if self.history[i] == state:
                         # 如果走了下一步是将军或捉:禁止走那步
                         if senv.will_check_or_catch(
                                 state, self.history[i + 1]):
                             no_act.append(self.history[i + 1])
                         # 否则当作闲着处理
                         else:
                             free_move[state] += 1
                             if free_move[state] >= 2:
                                 # 作和棋处理
                                 self.env.winner = Winner.draw
                                 self.env.board.winner = Winner.draw
                                 break
                 if no_act:
                     logger.debug(f"no_act = {no_act}")
             action, policy = self.ai.action(state, self.env.num_halfmoves,
                                             no_act)
             if action is None:
                 logger.info("AI has resigned!")
                 return
             self.history.append(action)
             if not self.env.red_to_move:
                 action = flip_move(action)
             key = self.env.get_state()
             p, v = self.ai.debug[key]
             logger.info(f"check = {check}, NN value = {v:.3f}")
             self.nn_value = v
             logger.info("MCTS results:")
             self.mcts_moves = {}
             for move, action_state in self.ai.search_results.items():
                 move_cn = self.env.board.make_single_record(
                     int(move[0]), int(move[1]), int(move[2]), int(move[3]))
                 logger.info(
                     f"move: {move_cn}-{move}, visit count: {action_state[0]}, Q_value: {action_state[1]:.3f}, Prior: {action_state[2]:.3f}"
                 )
                 self.mcts_moves[move_cn] = action_state
             x0, y0, x1, y1 = int(action[0]), int(action[1]), int(
                 action[2]), int(action[3])
             chessman_sprite = self.select_sprite_from_group(
                 self.chessmans, x0, y0)
             sprite_dest = self.select_sprite_from_group(
                 self.chessmans, x1, y1)
             if sprite_dest:
                 self.chessmans.remove(sprite_dest)
                 sprite_dest.kill()
             chessman_sprite.move(x1, y1, self.chessman_w, self.chessman_h)
             self.history.append(self.env.get_state())
    def start_game(self, idx):
        pipe1 = self.pipes_bt.pop()
        pipe2 = self.pipes_ng.pop()
        search_tree1 = defaultdict(VisitState)
        search_tree2 = defaultdict(VisitState)

        self.player1 = CChessPlayer(self.config,
                                    search_tree=search_tree1,
                                    pipes=pipe1,
                                    debugging=False,
                                    enable_resign=True)
        self.player2 = CChessPlayer(self.config,
                                    search_tree=search_tree2,
                                    pipes=pipe2,
                                    debugging=False,
                                    enable_resign=True)

        # even: bst = red, ng = black; odd: bst = black, ng = red
        if idx % 2 == 0:
            red = self.player1
            black = self.player2
            logger.debug(f"best model is red, ng is black")
        else:
            red = self.player2
            black = self.player1
            logger.debug(f"best model is black, ng is red")

        state = senv.INIT_STATE
        value = 0  # best model's value
        turns = 0  # even == red; odd == black
        game_over = False

        while not game_over:
            start_time = time()
            if turns % 2 == 0:
                action, _ = red.action(state, turns)
            else:
                action, _ = black.action(state, turns)
            end_time = time()
            # logger.debug(f"pid = {self.pid}, idx = {idx}, action = {action}, turns = {turns}, time = {(end_time-start_time):.1f}")
            if action is None:
                logger.debug(f"{turn % 2} (0 = red; 1 = black) has resigned!")
                value = -1
                break

            state = senv.step(state, action)
            turns += 1

            if turns / 2 >= self.config.play.max_game_length:
                game_over = True
                value = 0
            else:
                game_over, value, final_move = senv.done(state)

        self.player1.close()
        self.player2.close()

        if turns % 2 == 1:  # black turn
            value = -value

        if idx % 2 == 1:
            value = -value

        self.pipes_bt.append(pipe1)
        self.pipes_ng.append(pipe2)
        return value, turns