Ejemplo n.º 1
0
def test_be_catched():
    import cchess_alphazero.environment.static_env as senv
    state = senv.fen_to_state(
        'rnbakab1r/9/1c3c2n/p1p5p/7p1/3PR4/P1P3P1P/C7C/9/RNBAKABN1 b')
    # state = senv.fliped_state(state)
    ori_state = state
    senv.render(state)
    print()
    action = '4454'
    print(senv.be_catched(ori_state, action))
Ejemplo n.º 2
0
    def start_game(self, idx, search_tree):
        pipes = self.cur_pipes.pop()

        if not self.config.play.share_mtcs_info_in_self_play or \
            idx % self.config.play.reset_mtcs_info_per_game == 0:
            search_tree = defaultdict(VisitState)

        if random() > self.config.play.enable_resign_rate:
            enable_resign = True
        else:
            enable_resign = False

        self.player = CChessPlayer(self.config,
                                   search_tree=search_tree,
                                   pipes=pipes,
                                   enable_resign=enable_resign,
                                   debugging=False,
                                   use_history=self.use_history)

        state = senv.INIT_STATE
        history = [state]
        # policys = []
        value = 0
        turns = 0  # even == red; odd == black
        game_over = False
        final_move = None
        no_eat_count = 0
        check = False
        no_act = []
        increase_temp = False

        while not game_over:
            start_time = time()
            action, policy = self.player.action(state,
                                                turns,
                                                no_act,
                                                increase_temp=increase_temp)
            end_time = time()
            if action is None:
                logger.error(f"{turns % 2} (0 = red; 1 = black) has resigned!")
                value = -1
                break
            # if self.config.opts.log_move:
            #     logger.info(f"Process{self.pid} Playing: {turns % 2}, action: {action}, time: {(end_time - start_time):.1f}s")
            # logger.info(f"Process{self.pid} Playing: {turns % 2}, action: {action}, time: {(end_time - start_time):.1f}s")
            history.append(action)
            # policys.append(policy)
            try:
                state, no_eat = senv.new_step(state, action)
            except Exception as e:
                logger.error(f"{e}, no_act = {no_act}, policy = {policy}")
                game_over = True
                value = 0
                break
            turns += 1
            if no_eat:
                no_eat_count += 1
            else:
                no_eat_count = 0
            history.append(state)

            if no_eat_count >= 120 or turns / 2 >= self.config.play.max_game_length:
                game_over = True
                value = 0
            else:
                game_over, value, final_move, check = senv.done(
                    state, need_check=True)
                if not game_over:
                    if not senv.has_attack_chessman(state):
                        logger.error(f"双方无进攻子力,作和。state = {state}")
                        game_over = True
                        value = 0
                increase_temp = False
                no_act = []
                if not game_over and not check and state in history[:-1]:
                    free_move = defaultdict(int)
                    for i in range(len(history) - 1):
                        if history[i] == state:
                            if senv.will_check_or_catch(state, history[i + 1]):
                                no_act.append(history[i + 1])
                            elif not senv.be_catched(state, history[i + 1]):
                                increase_temp = True
                                free_move[state] += 1
                                if free_move[state] >= 3:
                                    # 作和棋处理
                                    game_over = True
                                    value = 0
                                    logger.error("闲着循环三次,作和棋处理")
                                    break

        if final_move:
            # policy = self.build_policy(final_move, False)
            history.append(final_move)
            # policys.append(policy)
            state = senv.step(state, final_move)
            turns += 1
            value = -value
            history.append(state)

        self.player.close()
        del search_tree
        del self.player
        gc.collect()
        if turns % 2 == 1:  # balck turn
            value = -value

        v = value
        if turns < 10:
            if random() > 0.9:
                store = True
            else:
                store = False
        else:
            store = True

        if store:
            data = [history[0]]
            for i in range(turns):
                k = i * 2
                data.append([history[k + 1], value])
                value = -value
            self.save_play_data(idx, data)

        self.cur_pipes.append(pipes)
        self.remove_play_data()
        return v, turns, state, store
Ejemplo n.º 3
0
def self_play_buffer(config, cur, use_history=False) -> (tuple, list):
    pipe = cur.pop() # borrow

    if random() > config.play.enable_resign_rate:
        enable_resign = True
    else:
        enable_resign = False

    player = CChessPlayer(config, search_tree=defaultdict(VisitState), pipes=pipe, 
                            enable_resign=enable_resign, debugging=False, use_history=use_history)

    state = senv.INIT_STATE
    history = [state]
    # policys = [] 
    value = 0
    turns = 0
    game_over = False
    final_move = None
    no_eat_count = 0
    check = False
    no_act = None
    increase_temp = False

    while not game_over:
        start_time = time()
        action, policy = player.action(state, turns, no_act, increase_temp=increase_temp)
        end_time = time()
        if action is None:
            print(f"{turns % 2} (0 = 红; 1 = 黑) 投降了!")
            value = -1
            break
        print(f"博弈中: 回合{turns / 2 + 1} {'红方走棋' if turns % 2 == 0 else '黑方走棋'}, 着法: {action}, 用时: {(end_time - start_time):.1f}s")
        # policys.append(policy)
        history.append(action)
        try:
            state, no_eat = senv.new_step(state, action)
        except Exception as e:
            logger.error(f"{e}, no_act = {no_act}, policy = {policy}")
            game_over = True
            value = 0
            break
        turns += 1
        if no_eat:
            no_eat_count += 1
        else:
            no_eat_count = 0
        history.append(state)

        if no_eat_count >= 120 or turns / 2 >= config.play.max_game_length:
            game_over = True
            value = 0
        else:
            game_over, value, final_move, check = senv.done(state, need_check=True)
            no_act = []
            increase_temp = False
            if not game_over:
                if not senv.has_attack_chessman(state):
                    logger.info(f"双方无进攻子力,作和。state = {state}")
                    game_over = True
                    value = 0
            if not game_over and not check and state in history[:-1]:
                free_move = defaultdict(int)
                for i in range(len(history) - 1):
                    if history[i] == state:
                        if senv.will_check_or_catch(state, history[i+1]):
                            no_act.append(history[i + 1])
                        elif not senv.be_catched(state, history[i+1]):
                            increase_temp = True
                            free_move[state] += 1
                            if free_move[state] >= 3:
                                # 作和棋处理
                                game_over = True
                                value = 0
                                logger.info("闲着循环三次,作和棋处理")
                                break

    if final_move:
        # policy = build_policy(final_move, False)
        history.append(final_move)
        # policys.append(policy)
        state = senv.step(state, final_move)
        turns += 1
        value = -value
        history.append(state)

    player.close()
    del player
    gc.collect()

    if turns % 2 == 1:  # balck turn
        value = -value
    
    v = value
    data = [history[0]]
    for i in range(turns):
        k = i * 2
        data.append([history[k + 1], value])
        value = -value

    cur.append(pipe)
    return (turns, v), data
Ejemplo n.º 4
0
    def start_game(self, idx):
        sleep(random())
        playouts = randint(8, 12) * 100
        self.config.play.simulation_num_per_move = playouts
        logger.info(f"Set playouts = {self.config.play.simulation_num_per_move}")

        pipe1 = self.pipes_bt.pop()
        pipe2 = self.pipes_ng.pop()
        search_tree1 = defaultdict(VisitState)
        search_tree2 = defaultdict(VisitState)

        self.player1 = CChessPlayer(self.config, search_tree=search_tree1, pipes=pipe1, 
                        debugging=False, enable_resign=False, use_history=self.hist_base)
        self.player2 = CChessPlayer(self.config, search_tree=search_tree2, pipes=pipe2, 
                        debugging=False, enable_resign=False, use_history=self.hist_ng)

        # even: bst = red, ng = black; odd: bst = black, ng = red
        if idx % 2 == 0:
            red = self.player1
            black = self.player2
            logger.info(f"进程id = {self.pid} 基准模型执红,待评测模型执黑")
        else:
            red = self.player2
            black = self.player1
            logger.info(f"进程id = {self.pid} 待评测模型执红,基准模型执黑")

        state = senv.INIT_STATE
        history = [state]
        value = 0       # best model's value
        turns = 0       # even == red; odd == black
        game_over = False
        no_eat_count = 0
        check = False
        increase_temp = False
        no_act = []

        while not game_over:
            start_time = time()
            if turns % 2 == 0:
                action, _ = red.action(state, turns, no_act=no_act, increase_temp=increase_temp)
            else:
                action, _ = black.action(state, turns, no_act=no_act, increase_temp=increase_temp)
            end_time = time()
            if self.config.opts.log_move:
                logger.debug(f"进程id = {self.pid}, action = {action}, turns = {turns}, time = {(end_time-start_time):.1f}")
            if action is None:
                logger.debug(f"{turns % 2} (0 = red; 1 = black) has resigned!")
                value = -1
                break
            history.append(action)
            state, no_eat = senv.new_step(state, action)
            turns += 1
            if no_eat:
                no_eat_count += 1
            else:
                no_eat_count = 0
            history.append(state)

            if no_eat_count >= 120 or turns / 2 >= self.config.play.max_game_length:
                game_over = True
                value = 0
            else:
                game_over, value, final_move, check = senv.done(state, need_check=True)
                no_act = []
                increase_temp = False
                if not game_over:
                    if not senv.has_attack_chessman(state):
                        logger.info(f"双方无进攻子力,作和。state = {state}")
                        game_over = True
                        value = 0
                if not game_over and not check and state in history[:-1]:
                    free_move = defaultdict(int)
                    for i in range(len(history) - 1):
                        if history[i] == state:
                            if senv.will_check_or_catch(state, history[i+1]):
                                no_act.append(history[i + 1])
                            elif not senv.be_catched(state, history[i+1]):
                                increase_temp = True
                                free_move[state] += 1
                                if free_move[state] >= 3:
                                    # 作和棋处理
                                    game_over = True
                                    value = 0
                                    logger.info("闲着循环三次,作和棋处理")
                                    break

        if final_move:
            history.append(final_move)
            state = senv.step(state, final_move)
            turns += 1
            value = - value
            history.append(state)

        data = []
        if idx % 2 == 0:
            data = [self.data['base']['digest'], self.data['unchecked']['digest']]
        else:
            data = [self.data['unchecked']['digest'], self.data['base']['digest']]
        self.player1.close()
        self.player2.close()

        if turns % 2 == 1:  # black turn
            value = -value

        v = value
        data.append(history[0])
        for i in range(turns):
            k = i * 2
            data.append([history[k + 1], v])
            v = -v

        self.pipes_bt.append(pipe1)
        self.pipes_ng.append(pipe2)
        return value, turns, data
Ejemplo n.º 5
0
def self_play_buffer(config, pipes_bt, pipes_ng, idx, res_data, hist_base,
                     hist_ng) -> (tuple, list):
    sleep(random())
    playouts = randint(8, 12) * 100
    config.play.simulation_num_per_move = playouts
    logger.info(f"Set playouts = {config.play.simulation_num_per_move}")

    pipe1 = pipes_bt.pop()  # borrow
    pipe2 = pipes_ng.pop()

    player1 = CChessPlayer(config,
                           search_tree=defaultdict(VisitState),
                           pipes=pipe1,
                           enable_resign=False,
                           debugging=False,
                           use_history=hist_base)
    player2 = CChessPlayer(config,
                           search_tree=defaultdict(VisitState),
                           pipes=pipe2,
                           enable_resign=False,
                           debugging=False,
                           use_history=hist_ng)

    # even: bst = red, ng = black; odd: bst = black, ng = red
    if idx % 2 == 0:
        red = player1
        black = player2
        print(f"基准模型执红,待评测模型执黑")
    else:
        red = player2
        black = player1
        print(f"待评测模型执红,基准模型执黑")

    state = senv.INIT_STATE
    history = [state]
    # policys = []
    value = 0
    turns = 0
    game_over = False
    final_move = None
    no_eat_count = 0
    check = False
    increase_temp = False
    no_act = []

    while not game_over:
        start_time = time()
        if turns % 2 == 0:
            action, _ = red.action(state,
                                   turns,
                                   no_act=no_act,
                                   increase_temp=increase_temp)
        else:
            action, _ = black.action(state,
                                     turns,
                                     no_act=no_act,
                                     increase_temp=increase_temp)
        end_time = time()
        if action is None:
            print(f"{turns % 2} (0 = 红; 1 = 黑) 投降了!")
            value = -1
            break
        print(
            f"博弈中: 回合{turns / 2 + 1} {'红方走棋' if turns % 2 == 0 else '黑方走棋'}, 着法: {action}, 用时: {(end_time - start_time):.1f}s"
        )
        # policys.append(policy)
        history.append(action)
        try:
            state, no_eat = senv.new_step(state, action)
        except Exception as e:
            logger.error(f"{e}, no_act = {no_act}, policy = {policy}")
            game_over = True
            value = 0
            break
        turns += 1
        if no_eat:
            no_eat_count += 1
        else:
            no_eat_count = 0
        history.append(state)

        if no_eat_count >= 120 or turns / 2 >= config.play.max_game_length:
            game_over = True
            value = 0
        else:
            game_over, value, final_move, check = senv.done(state,
                                                            need_check=True)
            no_act = []
            increase_temp = False
            if not game_over:
                if not senv.has_attack_chessman(state):
                    logger.info(f"双方无进攻子力,作和。state = {state}")
                    game_over = True
                    value = 0
            if not game_over and not check and state in history[:-1]:
                free_move = defaultdict(int)
                for i in range(len(history) - 1):
                    if history[i] == state:
                        if senv.will_check_or_catch(state, history[i + 1]):
                            no_act.append(history[i + 1])
                        elif not senv.be_catched(state, history[i + 1]):
                            increase_temp = True
                            free_move[state] += 1
                            if free_move[state] >= 3:
                                # 作和棋处理
                                game_over = True
                                value = 0
                                logger.info("闲着循环三次,作和棋处理")
                                break

    if final_move:
        history.append(final_move)
        state = senv.step(state, final_move)
        turns += 1
        value = -value
        history.append(state)

    data = []
    if idx % 2 == 0:
        data = [res_data['base']['digest'], res_data['unchecked']['digest']]
    else:
        data = [res_data['unchecked']['digest'], res_data['base']['digest']]
    player1.close()
    player2.close()
    del player1, player2
    gc.collect()

    if turns % 2 == 1:  # balck turn
        value = -value

    v = value
    data.append(history[0])
    for i in range(turns):
        k = i * 2
        data.append([history[k + 1], value])
        value = -value

    pipes_bt.append(pipe1)
    pipes_ng.append(pipe2)
    return (turns, v, idx), data
Ejemplo n.º 6
0
    def MCTS_search(self,
                    state,
                    history=[],
                    is_root_node=False,
                    real_hist=None) -> float:
        """
        Monte Carlo Tree Search
        """
        while True:
            # logger.debug(f"start MCTS, state = {state}, history = {history}")
            game_over, v, _ = senv.done(state)
            if game_over:
                v = v * 2
                self.executor.submit(self.update_tree, None, v, history)
                break

            with self.node_lock[state]:
                if state not in self.tree:
                    # Expand and Evaluate
                    self.tree[state].sum_n = 1
                    self.tree[state].legal_moves = senv.get_legal_moves(state)
                    self.tree[state].waiting = True
                    # logger.debug(f"expand_and_evaluate {state}, sum_n = {self.tree[state].sum_n}, history = {history}")
                    if is_root_node and real_hist:
                        self.expand_and_evaluate(state, history, real_hist)
                    else:
                        self.expand_and_evaluate(state, history)
                    break

                if state in history[:-1]:  # loop
                    for i in range(len(history) - 1):
                        if history[i] == state:
                            if senv.will_check_or_catch(state, history[i + 1]):
                                self.executor.submit(self.update_tree, None,
                                                     -1, history)
                            elif senv.be_catched(state, history[i + 1]):
                                self.executor.submit(self.update_tree, None, 1,
                                                     history)
                            else:
                                # logger.debug(f"loop -> loss, state = {state}, history = {history[:-1]}")
                                self.executor.submit(self.update_tree, None, 0,
                                                     history)
                            break
                    break

                # Select
                node = self.tree[state]
                if node.waiting:
                    node.visit.append(history)
                    # logger.debug(f"wait for prediction state = {state}")
                    break

                sel_action = self.select_action_q_and_u(state, is_root_node)

                virtual_loss = self.config.play.virtual_loss
                self.tree[state].sum_n += 1
                # logger.debug(f"node = {state}, sum_n = {node.sum_n}")

                action_state = self.tree[state].a[sel_action]
                action_state.n += virtual_loss
                action_state.w -= virtual_loss
                action_state.q = action_state.w / action_state.n

                # logger.debug(f"apply virtual_loss = {virtual_loss}, as.n = {action_state.n}, w = {action_state.w}, q = {action_state.q}")

                # if action_state.next is None:
                history.append(sel_action)
                state = senv.step(state, sel_action)
                history.append(state)