예제 #1
0
def test_done():
    import cchess_alphazero.environment.static_env as senv
    state = '1Rems1e1r/4m4/2c6/p3C1p1p/9/6P2/P3P3P/1c5C1/3p5/2EMSME1R'
    board = senv.state_to_board(state)
    for i in range(9, -1, -1):
        print(board[i])
    print(senv.done(state))
    def start_game(self, idx):
        pipe1 = self.pipes_bt.pop()
        pipe2 = self.pipes_ng.pop()
        search_tree1 = defaultdict(VisitState)
        search_tree2 = defaultdict(VisitState)

        self.player1 = CChessPlayer(self.config, search_tree=search_tree1, pipes=pipe1, 
                        debugging=False, enable_resign=True)
        self.player2 = CChessPlayer(self.config, search_tree=search_tree2, pipes=pipe2, 
                        debugging=False, enable_resign=True)

        # even: bst = red, ng = black; odd: bst = black, ng = red
        if idx % 2 == 0:
            red = self.player1
            black = self.player2
            logger.debug(f"best model is red, ng is black")
        else:
            red = self.player2
            black = self.player1
            logger.debug(f"best model is black, ng is red")

        state = senv.INIT_STATE
        value = 0       # best model's value
        turns = 0       # even == red; odd == black
        game_over = False

        while not game_over:
            start_time = time()
            if turns % 2 == 0:
                action, _ = red.action(state, turns)
            else:
                action, _ = black.action(state, turns)
            end_time = time()
            # logger.debug(f"pid = {self.pid}, idx = {idx}, action = {action}, turns = {turns}, time = {(end_time-start_time):.1f}")
            if action is None:
                logger.debug(f"{turn % 2} (0 = red; 1 = black) has resigned!")
                value = -1
                break
            
            state = senv.step(state, action)
            turns += 1

            if turns / 2 >= self.config.play.max_game_length:
                game_over = True
                value = 0
            else:
                game_over, value, final_move = senv.done(state)

        self.player1.close()
        self.player2.close()

        if turns % 2 == 1:  # black turn
            value = -value

        if idx % 2 == 1:
            value = -value

        self.pipes_bt.append(pipe1)
        self.pipes_ng.append(pipe2)
        return value, turns
예제 #3
0
 def search_action(self, depth, infinite):
     no_act = None
     _, _, _, check = senv.done(self.state, need_check=True)
     logger.debug(f"Check = {check}, state = {self.state}")
     if not check and self.state in self.history[:-1]:
         no_act = []
         for i in range(len(self.history) - 1):
             if self.history[i] == self.state:
                 if senv.will_check_or_catch(self.state,
                                             self.history[i + 1]):
                     no_act.append(self.history[i + 1])
                     logger.debug(f"Foul: no act = {no_act}")
     action, _ = self.player.action(self.state,
                                    self.turns,
                                    no_act=no_act,
                                    depth=depth,
                                    infinite=infinite,
                                    hist=self.history)
     if self.t:
         self.t.cancel()
     _, value = self.player.debug[self.state]
     depth = self.player.done_tasks // 100
     self.player.close(wait=False)
     self.player = None
     self.model.close_pipes()
     self.info_best_move(action, value, depth)
예제 #4
0
def test_done():
    import cchess_alphazero.environment.static_env as senv
    state = '4s4/9/4e4/p8/2e2R2p/P5E2/8P/9/9/4S1E2'
    board = senv.state_to_board(state)
    for i in range(9, -1, -1):
        print(board[i])
    print(senv.done(state))
예제 #5
0
def fixbug():
    from cchess_alphazero.config import Config
    from cchess_alphazero.lib.data_helper import get_game_data_filenames, read_game_data_from_file, write_game_data_to_file
    import cchess_alphazero.environment.static_env as senv
    c = Config('distribute')
    files = get_game_data_filenames(c.resource)
    cnt = 0
    fix = 0
    draw_cnt = 0
    for filename in files:
        try:
            data = read_game_data_from_file(filename)
        except:
            print(f"error: {filename}")
            os.remove(filename)
            continue
        state = data[0]
        real_data = [state]
        need_fix = True
        draw = False
        action = None
        value = None
        is_red_turn = True
        for item in data[1:]:
            action = item[0]
            value = -item[1]
            if value == 0:
                need_fix = False
                draw = True
                draw_cnt += 1
                break
            state = senv.step(state, action)
            is_red_turn = not is_red_turn
            real_data.append([action, value])
        if not draw:
            game_over, v, final_move = senv.done(state)
            if final_move:
                v = -v
                is_red_turn = not is_red_turn
            if not is_red_turn:
                v = -v
            if not game_over:
                v = 1
            # print(game_over, v, final_move, state)
            if v == data[1][1]:
                need_fix = False
            else:
                need_fix = True
        if need_fix:
            write_game_data_to_file(filename, real_data)
            # print(filename)
            fix += 1
        cnt += 1
        if cnt % 1000 == 0:
            print(cnt, fix, draw_cnt)
    print(f"all {cnt}, fix {fix}, draw {draw_cnt}")
예제 #6
0
    def MCTS_search(self, state, history=[], is_root_node=False) -> float:
        """
        Monte Carlo Tree Search
        """
        while True:
            # logger.debug(f"start MCTS, state = {state}, history = {history}")
            game_over, v, _ = senv.done(state)
            if game_over:
                self.executor.submit(self.update_tree, None, v, history)
                break

            with self.node_lock[state]:
                if state not in self.tree:
                    # Expand and Evaluate
                    self.tree[state].sum_n = 1
                    self.tree[state].legal_moves = senv.get_legal_moves(state)
                    self.tree[state].waiting = True
                    # logger.debug(f"expand_and_evaluate {state}, sum_n = {self.tree[state].sum_n}, history = {history}")
                    self.expand_and_evaluate(state, history)
                    break

                if state in history[:-1]:  # loop -> loss
                    # logger.debug(f"loop -> loss, state = {state}, history = {history[:-1]}")
                    self.executor.submit(self.update_tree, None, 0, history)
                    break

                # Select
                node = self.tree[state]
                if node.waiting:
                    node.visit.append(history)
                    # logger.debug(f"wait for prediction state = {state}")
                    break

                sel_action = self.select_action_q_and_u(state, is_root_node)

                virtual_loss = self.config.play.virtual_loss
                self.tree[state].sum_n += 1
                # logger.debug(f"node = {state}, sum_n = {node.sum_n}")

                action_state = self.tree[state].a[sel_action]
                action_state.n += virtual_loss
                action_state.w -= virtual_loss
                action_state.q = action_state.w / action_state.n

                # logger.debug(f"apply virtual_loss = {virtual_loss}, as.n = {action_state.n}, w = {action_state.w}, q = {action_state.q}")

                if action_state.next is None:
                    action_state.next = senv.step(state, sel_action)
                # logger.debug(f"step action {sel_action}, next = {action_state.next}")

            history.append(sel_action)
            state = action_state.next
            history.append(state)
예제 #7
0
    def MCTS_search(self, state, history=[], is_root_node=False) -> float:
        """
        Monte Carlo Tree Search
        """
        while True:
            # logger.debug(f"start MCTS, state = {state}, history = {history}")
            game_over, v, _ = senv.done(state)
            if game_over:
                self.executor.submit(self.update_tree, None, v, history)
                break

            with self.node_lock[state]:
                if state not in self.tree:
                    # Expand and Evaluate
                    self.tree[state].sum_n = 1
                    self.tree[state].legal_moves = senv.get_legal_moves(state)
                    self.tree[state].waiting = True
                    # logger.debug(f"expand_and_evaluate {state}, sum_n = {self.tree[state].sum_n}, history = {history}")
                    self.expand_and_evaluate(state, history)
                    break

                if state in history[:-1]: # loop -> loss
                    # logger.debug(f"loop -> loss, state = {state}, history = {history[:-1]}")
                    self.executor.submit(self.update_tree, None, 0, history)
                    break

                # Select
                node = self.tree[state]
                if node.waiting:
                    node.visit.append(history)
                    # logger.debug(f"wait for prediction state = {state}")
                    break

                sel_action = self.select_action_q_and_u(state, is_root_node)

                virtual_loss = self.config.play.virtual_loss
                self.tree[state].sum_n += 1
                # logger.debug(f"node = {state}, sum_n = {node.sum_n}")
                
                action_state = self.tree[state].a[sel_action]
                action_state.n += virtual_loss
                action_state.w -= virtual_loss
                action_state.q = action_state.w / action_state.n

                # logger.debug(f"apply virtual_loss = {virtual_loss}, as.n = {action_state.n}, w = {action_state.w}, q = {action_state.q}")
                
                if action_state.next is None:
                    action_state.next = senv.step(state, sel_action)
                # logger.debug(f"step action {sel_action}, next = {action_state.next}")

            history.append(sel_action)
            state = action_state.next
            history.append(state)
예제 #8
0
def self_play_buffer(config, pipes_bt, pipes_ng, idx, res_data, hist_base,
                     hist_ng) -> (tuple, list):
    sleep(random())
    playouts = randint(8, 12) * 100
    config.play.simulation_num_per_move = playouts
    logger.info(f"Set playouts = {config.play.simulation_num_per_move}")

    pipe1 = pipes_bt.pop()  # borrow
    pipe2 = pipes_ng.pop()

    player1 = CChessPlayer(config,
                           search_tree=defaultdict(VisitState),
                           pipes=pipe1,
                           enable_resign=False,
                           debugging=False,
                           use_history=hist_base)
    player2 = CChessPlayer(config,
                           search_tree=defaultdict(VisitState),
                           pipes=pipe2,
                           enable_resign=False,
                           debugging=False,
                           use_history=hist_ng)

    # even: bst = red, ng = black; odd: bst = black, ng = red
    if idx % 2 == 0:
        red = player1
        black = player2
        print(f"基准模型执红,待评测模型执黑")
    else:
        red = player2
        black = player1
        print(f"待评测模型执红,基准模型执黑")

    state = senv.INIT_STATE
    history = [state]
    # policys = []
    value = 0
    turns = 0
    game_over = False
    final_move = None
    no_eat_count = 0
    check = False
    increase_temp = False
    no_act = []

    while not game_over:
        start_time = time()
        if turns % 2 == 0:
            action, _ = red.action(state,
                                   turns,
                                   no_act=no_act,
                                   increase_temp=increase_temp)
        else:
            action, _ = black.action(state,
                                     turns,
                                     no_act=no_act,
                                     increase_temp=increase_temp)
        end_time = time()
        if action is None:
            print(f"{turns % 2} (0 = 红; 1 = 黑) 投降了!")
            value = -1
            break
        print(
            f"博弈中: 回合{turns / 2 + 1} {'红方走棋' if turns % 2 == 0 else '黑方走棋'}, 着法: {action}, 用时: {(end_time - start_time):.1f}s"
        )
        # policys.append(policy)
        history.append(action)
        try:
            state, no_eat = senv.new_step(state, action)
        except Exception as e:
            logger.error(f"{e}, no_act = {no_act}, policy = {policy}")
            game_over = True
            value = 0
            break
        turns += 1
        if no_eat:
            no_eat_count += 1
        else:
            no_eat_count = 0
        history.append(state)

        if no_eat_count >= 120 or turns / 2 >= config.play.max_game_length:
            game_over = True
            value = 0
        else:
            game_over, value, final_move, check = senv.done(state,
                                                            need_check=True)
            no_act = []
            increase_temp = False
            if not game_over:
                if not senv.has_attack_chessman(state):
                    logger.info(f"双方无进攻子力,作和。state = {state}")
                    game_over = True
                    value = 0
            if not game_over and not check and state in history[:-1]:
                free_move = defaultdict(int)
                for i in range(len(history) - 1):
                    if history[i] == state:
                        if senv.will_check_or_catch(state, history[i + 1]):
                            no_act.append(history[i + 1])
                        elif not senv.be_catched(state, history[i + 1]):
                            increase_temp = True
                            free_move[state] += 1
                            if free_move[state] >= 3:
                                # 作和棋处理
                                game_over = True
                                value = 0
                                logger.info("闲着循环三次,作和棋处理")
                                break

    if final_move:
        history.append(final_move)
        state = senv.step(state, final_move)
        turns += 1
        value = -value
        history.append(state)

    data = []
    if idx % 2 == 0:
        data = [res_data['base']['digest'], res_data['unchecked']['digest']]
    else:
        data = [res_data['unchecked']['digest'], res_data['base']['digest']]
    player1.close()
    player2.close()
    del player1, player2
    gc.collect()

    if turns % 2 == 1:  # balck turn
        value = -value

    v = value
    data.append(history[0])
    for i in range(turns):
        k = i * 2
        data.append([history[k + 1], value])
        value = -value

    pipes_bt.append(pipe1)
    pipes_ng.append(pipe2)
    return (turns, v, idx), data
예제 #9
0
    def start_game(self, idx, search_tree):
        pipes = self.cur_pipes.pop()

        if not self.config.play.share_mtcs_info_in_self_play or \
            idx % self.config.play.reset_mtcs_info_per_game == 0:
            search_tree = defaultdict(VisitState)

        if random() > self.config.play.enable_resign_rate:
            enable_resign = True
        else:
            enable_resign = False

        self.player = CChessPlayer(self.config,
                                   search_tree=search_tree,
                                   pipes=pipes,
                                   enable_resign=enable_resign,
                                   debugging=False,
                                   use_history=self.use_history)

        state = senv.INIT_STATE
        history = [state]
        # policys = []
        value = 0
        turns = 0  # even == red; odd == black
        game_over = False
        final_move = None
        no_eat_count = 0
        check = False
        no_act = []
        increase_temp = False

        while not game_over:
            start_time = time()
            action, policy = self.player.action(state,
                                                turns,
                                                no_act,
                                                increase_temp=increase_temp)
            end_time = time()
            if action is None:
                logger.error(f"{turns % 2} (0 = red; 1 = black) has resigned!")
                value = -1
                break
            # if self.config.opts.log_move:
            #     logger.info(f"Process{self.pid} Playing: {turns % 2}, action: {action}, time: {(end_time - start_time):.1f}s")
            # logger.info(f"Process{self.pid} Playing: {turns % 2}, action: {action}, time: {(end_time - start_time):.1f}s")
            history.append(action)
            # policys.append(policy)
            try:
                state, no_eat = senv.new_step(state, action)
            except Exception as e:
                logger.error(f"{e}, no_act = {no_act}, policy = {policy}")
                game_over = True
                value = 0
                break
            turns += 1
            if no_eat:
                no_eat_count += 1
            else:
                no_eat_count = 0
            history.append(state)

            if no_eat_count >= 120 or turns / 2 >= self.config.play.max_game_length:
                game_over = True
                value = 0
            else:
                game_over, value, final_move, check = senv.done(
                    state, need_check=True)
                if not game_over:
                    if not senv.has_attack_chessman(state):
                        logger.error(f"双方无进攻子力,作和。state = {state}")
                        game_over = True
                        value = 0
                increase_temp = False
                no_act = []
                if not game_over and not check and state in history[:-1]:
                    free_move = defaultdict(int)
                    for i in range(len(history) - 1):
                        if history[i] == state:
                            if senv.will_check_or_catch(state, history[i + 1]):
                                no_act.append(history[i + 1])
                            elif not senv.be_catched(state, history[i + 1]):
                                increase_temp = True
                                free_move[state] += 1
                                if free_move[state] >= 3:
                                    # 作和棋处理
                                    game_over = True
                                    value = 0
                                    logger.error("闲着循环三次,作和棋处理")
                                    break

        if final_move:
            # policy = self.build_policy(final_move, False)
            history.append(final_move)
            # policys.append(policy)
            state = senv.step(state, final_move)
            turns += 1
            value = -value
            history.append(state)

        self.player.close()
        del search_tree
        del self.player
        gc.collect()
        if turns % 2 == 1:  # balck turn
            value = -value

        v = value
        if turns < 10:
            if random() > 0.9:
                store = True
            else:
                store = False
        else:
            store = True

        if store:
            data = [history[0]]
            for i in range(turns):
                k = i * 2
                data.append([history[k + 1], value])
                value = -value
            self.save_play_data(idx, data)

        self.cur_pipes.append(pipes)
        self.remove_play_data()
        return v, turns, state, store
    def start_game(self, idx, search_tree):
        pipes = self.cur_pipes.pop()

        if not self.config.play.share_mtcs_info_in_self_play or \
            idx % self.config.play.reset_mtcs_info_per_game == 0:
            search_tree = defaultdict(VisitState)

        if random() > self.config.play.enable_resign_rate:
            enable_resign = True
        else:
            enable_resign = False

        self.player = CChessPlayer(self.config, search_tree=search_tree, pipes=pipes, enable_resign=enable_resign, debugging=False)

        state = senv.INIT_STATE
        history = [state]
        policys = [] 
        value = 0
        turns = 0       # even == red; odd == black
        game_over = False
        final_move = None

        while not game_over:
            no_act = None
            if state in history[:-1]:
                no_act = []
                for i in range(len(history) - 1):
                    if history[i] == state:
                        no_act.append(history[i + 1])
            start_time = time()
            action, policy = self.player.action(state, turns, no_act)
            end_time = time()
            if action is None:
                logger.debug(f"{turns % 2} (0 = red; 1 = black) has resigned!")
                value = -1
                break
            # logger.debug(f"Process{self.pid} Playing: {turns % 2}, action: {action}, time: {(end_time - start_time):.1f}s")
            # for move, action_state in self.player.search_results.items():
            #     if action_state[0] >= 20:
            #         logger.info(f"move: {move}, prob: {action_state[0]}, Q_value: {action_state[1]:.2f}, Prior: {action_state[2]:.3f}")
            # self.player.search_results = {}
            history.append(action)
            policys.append(policy)
            state = senv.step(state, action)
            turns += 1
            history.append(state)

            if turns / 2 >= self.config.play.max_game_length:
                game_over = True
                value = senv.evaluate(state)
            else:
                game_over, value, final_move = senv.done(state)

        if final_move:
            policy = self.build_policy(final_move, False)
            history.append(final_move)
            policys.append(policy)
            state = senv.step(state, final_move)
            history.append(state)

        self.player.close()
        if turns % 2 == 1:  # balck turn
            value = -value

        v = value
        if v == 0:
            if random() > 0.5:
                store = True
            else:
                store = False
        else:
            store = True

        if store:
            data = []
            for i in range(turns):
                k = i * 2
                data.append([history[k], policys[i], value])
                value = -value
            self.save_play_data(idx, data)

        self.cur_pipes.append(pipes)
        self.remove_play_data()
        return v, turns, state, search_tree, store
예제 #11
0
    def start_game(self, idx, search_tree):
        pipes = self.cur_pipes.pop()

        if not self.config.play.share_mtcs_info_in_self_play or \
            idx % self.config.play.reset_mtcs_info_per_game == 0:
            search_tree = defaultdict(VisitState)

        if random() > self.config.play.enable_resign_rate:
            enable_resign = True
        else:
            enable_resign = False

        self.player = CChessPlayer(self.config,
                                   search_tree=search_tree,
                                   pipes=pipes,
                                   enable_resign=enable_resign,
                                   debugging=False)

        state = senv.INIT_STATE
        history = [state]
        value = 0
        turns = 0  # even == red; odd == black
        game_over = False
        is_alpha_red = True if idx % 2 == 0 else False
        final_move = None
        check = False

        while not game_over:
            if (is_alpha_red and turns % 2 == 0) or (not is_alpha_red
                                                     and turns % 2 == 1):
                no_act = None
                if not check and state in history[:-1]:
                    no_act = []
                    for i in range(len(history) - 1):
                        if history[i] == state:
                            no_act.append(history[i + 1])
                action, _ = self.player.action(state, turns, no_act)
                if action is None:
                    logger.debug(
                        f"{turns % 2} (0 = red; 1 = black) has resigned!")
                    value = -1
                    break
            else:
                fen = senv.state_to_fen(state, turns)
                action = self.get_ucci_move(fen)
                if action is None:
                    logger.debug(
                        f"{turns % 2} (0 = red; 1 = black) has resigned!")
                    value = -1
                    break
                if turns % 2 == 1:
                    action = flip_move(action)
            history.append(action)
            state = senv.step(state, action)
            turns += 1
            history.append(state)

            if turns / 2 >= self.config.play.max_game_length:
                game_over = True
                value = 0
            else:
                game_over, value, final_move, check = senv.done(
                    state, need_check=True)

        if final_move:
            history.append(final_move)
            state = senv.step(state, final_move)
            history.append(state)
            turns += 1
            value = -value

        self.player.close()
        del search_tree
        del self.player
        gc.collect()
        if turns % 2 == 1:  # balck turn
            value = -value

        v = value
        if turns <= 10:
            if random() > 0.7:
                store = True
            else:
                store = False
        else:
            store = True

        if store:
            data = [history[0]]
            for i in range(turns):
                k = i * 2
                data.append([history[k + 1], value])
                value = -value
            self.save_play_data(idx, data)

        self.cur_pipes.append(pipes)
        self.remove_play_data()
        return v, turns, state, store
    def start_game(self, idx):
        pipe1 = self.pipes_bt.pop()
        pipe2 = self.pipes_ng.pop()
        search_tree1 = defaultdict(VisitState)
        search_tree2 = defaultdict(VisitState)

        self.player1 = CChessPlayer(self.config,
                                    search_tree=search_tree1,
                                    pipes=pipe1,
                                    debugging=False,
                                    enable_resign=True)
        self.player2 = CChessPlayer(self.config,
                                    search_tree=search_tree2,
                                    pipes=pipe2,
                                    debugging=False,
                                    enable_resign=True)

        # even: bst = red, ng = black; odd: bst = black, ng = red
        if idx % 2 == 0:
            red = self.player1
            black = self.player2
            logger.debug(f"best model is red, ng is black")
        else:
            red = self.player2
            black = self.player1
            logger.debug(f"best model is black, ng is red")

        state = senv.INIT_STATE
        value = 0  # best model's value
        turns = 0  # even == red; odd == black
        game_over = False

        while not game_over:
            start_time = time()
            if turns % 2 == 0:
                action, _ = red.action(state, turns)
            else:
                action, _ = black.action(state, turns)
            end_time = time()
            # logger.debug(f"pid = {self.pid}, idx = {idx}, action = {action}, turns = {turns}, time = {(end_time-start_time):.1f}")
            if action is None:
                logger.debug(f"{turn % 2} (0 = red; 1 = black) has resigned!")
                value = -1
                break

            state = senv.step(state, action)
            turns += 1

            if turns / 2 >= self.config.play.max_game_length:
                game_over = True
                value = 0
            else:
                game_over, value, final_move = senv.done(state)

        self.player1.close()
        self.player2.close()

        if turns % 2 == 1:  # black turn
            value = -value

        if idx % 2 == 1:  # return player1' value
            value = -value

        self.pipes_bt.append(pipe1)
        self.pipes_ng.append(pipe2)
        return value, turns
예제 #13
0
def self_play_buffer(config, cur) -> (tuple, list):
    pipe = cur.pop()  # borrow

    if random() > config.play.enable_resign_rate:
        enable_resign = True
    else:
        enable_resign = False

    player = CChessPlayer(config,
                          search_tree=defaultdict(VisitState),
                          pipes=pipe,
                          enable_resign=enable_resign,
                          debugging=False)

    state = senv.INIT_STATE
    history = [state]
    policys = []
    value = 0
    turns = 0
    game_over = False
    final_move = None

    while not game_over:
        no_act = None
        if state in history[:-1]:
            no_act = []
            for i in range(len(history) - 1):
                if history[i] == state:
                    no_act.append(history[i + 1])
        start_time = time()
        action, policy = player.action(state, turns, no_act)
        end_time = time()
        if action is None:
            logger.debug(f"{turns % 2} (0 = red; 1 = black) has resigned!")
            value = -1
            break
        # logger.debug(f"Playing: {turns % 2}, action: {action}, time: {(end_time - start_time):.1f}s")
        policys.append(policy)
        history.append(action)
        state = senv.step(state, action)
        turns += 1
        history.append(state)

        if turns / 2 >= config.play.max_game_length:
            game_over = True
            value = senv.evaluate(state)
        else:
            game_over, value, final_move = senv.done(state)

    if final_move:
        policy = build_policy(final_move, False)
        history.append(final_move)
        policys.append(policy)
        state = senv.step(state, final_move)
        history.append(state)

    player.close()

    if turns % 2 == 1:  # balck turn
        value = -value

    v = value
    data = []
    for i in range(turns):
        k = i * 2
        data.append([history[k], policys[i], value])
        value = -value

    cur.append(pipe)
    return (turns, v), data
    def start_game(self, idx, search_tree):
        pipes = self.cur_pipes.pop()

        if not self.config.play.share_mtcs_info_in_self_play or \
            idx % self.config.play.reset_mtcs_info_per_game == 0:
            search_tree = defaultdict(VisitState)

        if random() > self.config.play.enable_resign_rate:
            enable_resign = True
        else:
            enable_resign = False

        self.player = CChessPlayer(self.config, search_tree=search_tree, pipes=pipes, enable_resign=enable_resign, debugging=False)

        state = senv.INIT_STATE
        history = [state]
        policys = [] 
        value = 0
        turns = 0       # even == red; odd == black
        game_over = False
        is_alpha_red = True if idx % 2 == 0 else False
        final_move = None

        while not game_over:
            if (is_alpha_red and turns % 2 == 0) or (not is_alpha_red and turns % 2 == 1):
                no_act = None
                if state in history[:-1]:
                    no_act = []
                    for i in range(len(history) - 1):
                        if history[i] == state:
                            no_act.append(history[i + 1])
                action, policy = self.player.action(state, turns, no_act)
                if action is None:
                    logger.debug(f"{turns % 2} (0 = red; 1 = black) has resigned!")
                    value = -1
                    break
            else:
                fen = senv.state_to_fen(state, turns)
                action = self.get_ucci_move(fen)
                if action is None:
                    logger.debug(f"{turns % 2} (0 = red; 1 = black) has resigned!")
                    value = -1
                    break
                if turns % 2 == 1:
                    action = flip_move(action)
                try:
                    policy = self.build_policy(action, False)
                except Exception as e:
                    logger.error(f"Build policy error {e}, action = {action}, state = {state}, fen = {fen}")
                    value = 0
                    break
            history.append(action)
            policys.append(policy)
            state = senv.step(state, action)
            turns += 1
            history.append(state)

            if turns / 2 >= self.config.play.max_game_length:
                game_over = True
                value = senv.evaluate(state)
            else:
                game_over, value, final_move = senv.done(state)

        if final_move:
            policy = self.build_policy(final_move, False)
            history.append(final_move)
            policys.append(policy)
            state = senv.step(state, final_move)
            history.append(state)

        self.player.close()
        if turns % 2 == 1:  # balck turn
            value = -value

        v = value
        if v == 0 or turns <= 10:
            if random() > 0.7:
                store = True
            else:
                store = False
        else:
            store = True

        if store:
            data = []
            for i in range(turns):
                k = i * 2
                data.append([history[k], policys[i], value])
                value = -value
            self.save_play_data(idx, data)

        self.cur_pipes.append(pipes)
        self.remove_play_data()
        return v, turns, state, search_tree, store
예제 #15
0
    def start_game(self, idx):
        sleep(random())
        playouts = randint(8, 12) * 100
        self.config.play.simulation_num_per_move = playouts
        logger.info(f"Set playouts = {self.config.play.simulation_num_per_move}")

        pipe1 = self.pipes_bt.pop()
        pipe2 = self.pipes_ng.pop()
        search_tree1 = defaultdict(VisitState)
        search_tree2 = defaultdict(VisitState)

        self.player1 = CChessPlayer(self.config, search_tree=search_tree1, pipes=pipe1, 
                        debugging=False, enable_resign=False, use_history=self.hist_base)
        self.player2 = CChessPlayer(self.config, search_tree=search_tree2, pipes=pipe2, 
                        debugging=False, enable_resign=False, use_history=self.hist_ng)

        # even: bst = red, ng = black; odd: bst = black, ng = red
        if idx % 2 == 0:
            red = self.player1
            black = self.player2
            logger.info(f"进程id = {self.pid} 基准模型执红,待评测模型执黑")
        else:
            red = self.player2
            black = self.player1
            logger.info(f"进程id = {self.pid} 待评测模型执红,基准模型执黑")

        state = senv.INIT_STATE
        history = [state]
        value = 0       # best model's value
        turns = 0       # even == red; odd == black
        game_over = False
        no_eat_count = 0
        check = False
        increase_temp = False
        no_act = []

        while not game_over:
            start_time = time()
            if turns % 2 == 0:
                action, _ = red.action(state, turns, no_act=no_act, increase_temp=increase_temp)
            else:
                action, _ = black.action(state, turns, no_act=no_act, increase_temp=increase_temp)
            end_time = time()
            if self.config.opts.log_move:
                logger.debug(f"进程id = {self.pid}, action = {action}, turns = {turns}, time = {(end_time-start_time):.1f}")
            if action is None:
                logger.debug(f"{turns % 2} (0 = red; 1 = black) has resigned!")
                value = -1
                break
            history.append(action)
            state, no_eat = senv.new_step(state, action)
            turns += 1
            if no_eat:
                no_eat_count += 1
            else:
                no_eat_count = 0
            history.append(state)

            if no_eat_count >= 120 or turns / 2 >= self.config.play.max_game_length:
                game_over = True
                value = 0
            else:
                game_over, value, final_move, check = senv.done(state, need_check=True)
                no_act = []
                increase_temp = False
                if not game_over:
                    if not senv.has_attack_chessman(state):
                        logger.info(f"双方无进攻子力,作和。state = {state}")
                        game_over = True
                        value = 0
                if not game_over and not check and state in history[:-1]:
                    free_move = defaultdict(int)
                    for i in range(len(history) - 1):
                        if history[i] == state:
                            if senv.will_check_or_catch(state, history[i+1]):
                                no_act.append(history[i + 1])
                            elif not senv.be_catched(state, history[i+1]):
                                increase_temp = True
                                free_move[state] += 1
                                if free_move[state] >= 3:
                                    # 作和棋处理
                                    game_over = True
                                    value = 0
                                    logger.info("闲着循环三次,作和棋处理")
                                    break

        if final_move:
            history.append(final_move)
            state = senv.step(state, final_move)
            turns += 1
            value = - value
            history.append(state)

        data = []
        if idx % 2 == 0:
            data = [self.data['base']['digest'], self.data['unchecked']['digest']]
        else:
            data = [self.data['unchecked']['digest'], self.data['base']['digest']]
        self.player1.close()
        self.player2.close()

        if turns % 2 == 1:  # black turn
            value = -value

        v = value
        data.append(history[0])
        for i in range(turns):
            k = i * 2
            data.append([history[k + 1], v])
            v = -v

        self.pipes_bt.append(pipe1)
        self.pipes_ng.append(pipe2)
        return value, turns, data
예제 #16
0
def self_play_buffer(config, cur, use_history=False) -> (tuple, list):
    pipe = cur.pop() # borrow

    if random() > config.play.enable_resign_rate:
        enable_resign = True
    else:
        enable_resign = False

    player = CChessPlayer(config, search_tree=defaultdict(VisitState), pipes=pipe, 
                            enable_resign=enable_resign, debugging=False, use_history=use_history)

    state = senv.INIT_STATE
    history = [state]
    # policys = [] 
    value = 0
    turns = 0
    game_over = False
    final_move = None
    no_eat_count = 0
    check = False
    no_act = None
    increase_temp = False

    while not game_over:
        start_time = time()
        action, policy = player.action(state, turns, no_act, increase_temp=increase_temp)
        end_time = time()
        if action is None:
            print(f"{turns % 2} (0 = 红; 1 = 黑) 投降了!")
            value = -1
            break
        print(f"博弈中: 回合{turns / 2 + 1} {'红方走棋' if turns % 2 == 0 else '黑方走棋'}, 着法: {action}, 用时: {(end_time - start_time):.1f}s")
        # policys.append(policy)
        history.append(action)
        try:
            state, no_eat = senv.new_step(state, action)
        except Exception as e:
            logger.error(f"{e}, no_act = {no_act}, policy = {policy}")
            game_over = True
            value = 0
            break
        turns += 1
        if no_eat:
            no_eat_count += 1
        else:
            no_eat_count = 0
        history.append(state)

        if no_eat_count >= 120 or turns / 2 >= config.play.max_game_length:
            game_over = True
            value = 0
        else:
            game_over, value, final_move, check = senv.done(state, need_check=True)
            no_act = []
            increase_temp = False
            if not game_over:
                if not senv.has_attack_chessman(state):
                    logger.info(f"双方无进攻子力,作和。state = {state}")
                    game_over = True
                    value = 0
            if not game_over and not check and state in history[:-1]:
                free_move = defaultdict(int)
                for i in range(len(history) - 1):
                    if history[i] == state:
                        if senv.will_check_or_catch(state, history[i+1]):
                            no_act.append(history[i + 1])
                        elif not senv.be_catched(state, history[i+1]):
                            increase_temp = True
                            free_move[state] += 1
                            if free_move[state] >= 3:
                                # 作和棋处理
                                game_over = True
                                value = 0
                                logger.info("闲着循环三次,作和棋处理")
                                break

    if final_move:
        # policy = build_policy(final_move, False)
        history.append(final_move)
        # policys.append(policy)
        state = senv.step(state, final_move)
        turns += 1
        value = -value
        history.append(state)

    player.close()
    del player
    gc.collect()

    if turns % 2 == 1:  # balck turn
        value = -value
    
    v = value
    data = [history[0]]
    for i in range(turns):
        k = i * 2
        data.append([history[k + 1], value])
        value = -value

    cur.append(pipe)
    return (turns, v), data
예제 #17
0
    def start_game(self, idx, search_tree):
        pipes = self.cur_pipes.pop()

        if not self.config.play.share_mtcs_info_in_self_play or \
            idx % self.config.play.reset_mtcs_info_per_game == 0:
            search_tree = defaultdict(VisitState)

        if random() > self.config.play.enable_resign_rate:
            enable_resign = True
        else:
            enable_resign = False

        self.player = CChessPlayer(self.config,
                                   search_tree=search_tree,
                                   pipes=pipes,
                                   enable_resign=enable_resign,
                                   debugging=False)

        state = senv.INIT_STATE
        history = [state]
        policys = []
        value = 0
        turns = 0  # even == red; odd == black
        game_over = False
        final_move = None

        while not game_over:
            no_act = None
            if state in history[:-1]:
                no_act = []
                for i in range(len(history) - 1):
                    if history[i] == state:
                        no_act.append(history[i + 1])
            start_time = time()
            action, policy = self.player.action(state, turns, no_act)
            end_time = time()
            if action is None:
                logger.debug(f"{turns % 2} (0 = red; 1 = black) has resigned!")
                value = -1
                break
            # logger.debug(f"Process{self.pid} Playing: {turns % 2}, action: {action}, time: {(end_time - start_time):.1f}s")
            # for move, action_state in self.player.search_results.items():
            #     if action_state[0] >= 20:
            #         logger.info(f"move: {move}, prob: {action_state[0]}, Q_value: {action_state[1]:.2f}, Prior: {action_state[2]:.3f}")
            # self.player.search_results = {}
            history.append(action)
            policys.append(policy)
            state = senv.step(state, action)
            turns += 1
            history.append(state)

            if turns / 2 >= self.config.play.max_game_length:
                game_over = True
                value = senv.evaluate(state)
            else:
                game_over, value, final_move = senv.done(state)

        if final_move:
            policy = self.build_policy(final_move, False)
            history.append(final_move)
            policys.append(policy)
            state = senv.step(state, final_move)
            history.append(state)

        self.player.close()
        if turns % 2 == 1:  # balck turn
            value = -value

        v = value
        if v == 0:
            if random() > 0.5:
                store = True
            else:
                store = False
        else:
            store = True

        if store:
            data = []
            for i in range(turns):
                k = i * 2
                data.append([history[k], policys[i], value])
                value = -value
            self.save_play_data(idx, data)

        self.cur_pipes.append(pipes)
        self.remove_play_data()
        return v, turns, state, search_tree, store
예제 #18
0
 def ai_move(self):
     ai_move_first = not self.human_move_first
     self.history = [self.env.get_state()]
     no_act = None
     while not self.env.done:
         if ai_move_first == self.env.red_to_move:
             labels = ActionLabelsRed
             labels_n = len(ActionLabelsRed)
             self.ai.search_results = {}
             state = self.env.get_state()
             logger.info(f"state = {state}")
             _, _, _, check = senv.done(state, need_check=True)
             if not check and state in self.history[:-1]:
                 no_act = []
                 free_move = defaultdict(int)
                 for i in range(len(self.history) - 1):
                     if self.history[i] == state:
                         # 如果走了下一步是将军或捉:禁止走那步
                         if senv.will_check_or_catch(
                                 state, self.history[i + 1]):
                             no_act.append(self.history[i + 1])
                         # 否则当作闲着处理
                         else:
                             free_move[state] += 1
                             if free_move[state] >= 2:
                                 # 作和棋处理
                                 self.env.winner = Winner.draw
                                 self.env.board.winner = Winner.draw
                                 break
                 if no_act:
                     logger.debug(f"no_act = {no_act}")
             action, policy = self.ai.action(state, self.env.num_halfmoves,
                                             no_act)
             if action is None:
                 logger.info("AI has resigned!")
                 return
             self.history.append(action)
             if not self.env.red_to_move:
                 action = flip_move(action)
             key = self.env.get_state()
             p, v = self.ai.debug[key]
             logger.info(f"check = {check}, NN value = {v:.3f}")
             self.nn_value = v
             logger.info("MCTS results:")
             self.mcts_moves = {}
             for move, action_state in self.ai.search_results.items():
                 move_cn = self.env.board.make_single_record(
                     int(move[0]), int(move[1]), int(move[2]), int(move[3]))
                 logger.info(
                     f"move: {move_cn}-{move}, visit count: {action_state[0]}, Q_value: {action_state[1]:.3f}, Prior: {action_state[2]:.3f}"
                 )
                 self.mcts_moves[move_cn] = action_state
             x0, y0, x1, y1 = int(action[0]), int(action[1]), int(
                 action[2]), int(action[3])
             chessman_sprite = select_sprite_from_group(
                 self.chessmans, x0, y0)
             sprite_dest = select_sprite_from_group(self.chessmans, x1, y1)
             if sprite_dest:
                 self.chessmans.remove(sprite_dest)
                 sprite_dest.kill()
             chessman_sprite.move(x1, y1, self.chessman_w, self.chessman_h)
             self.history.append(self.env.get_state())
예제 #19
0
    def MCTS_search(self,
                    state,
                    history=[],
                    is_root_node=False,
                    real_hist=None) -> float:
        """
        Monte Carlo Tree Search
        """
        while True:
            # logger.debug(f"start MCTS, state = {state}, history = {history}")
            game_over, v, _ = senv.done(state)
            if game_over:
                v = v * 2
                self.executor.submit(self.update_tree, None, v, history)
                break

            with self.node_lock[state]:
                if state not in self.tree:
                    # Expand and Evaluate
                    self.tree[state].sum_n = 1
                    self.tree[state].legal_moves = senv.get_legal_moves(state)
                    self.tree[state].waiting = True
                    # logger.debug(f"expand_and_evaluate {state}, sum_n = {self.tree[state].sum_n}, history = {history}")
                    if is_root_node and real_hist:
                        self.expand_and_evaluate(state, history, real_hist)
                    else:
                        self.expand_and_evaluate(state, history)
                    break

                if state in history[:-1]:  # loop
                    for i in range(len(history) - 1):
                        if history[i] == state:
                            if senv.will_check_or_catch(state, history[i + 1]):
                                self.executor.submit(self.update_tree, None,
                                                     -1, history)
                            elif senv.be_catched(state, history[i + 1]):
                                self.executor.submit(self.update_tree, None, 1,
                                                     history)
                            else:
                                # logger.debug(f"loop -> loss, state = {state}, history = {history[:-1]}")
                                self.executor.submit(self.update_tree, None, 0,
                                                     history)
                            break
                    break

                # Select
                node = self.tree[state]
                if node.waiting:
                    node.visit.append(history)
                    # logger.debug(f"wait for prediction state = {state}")
                    break

                sel_action = self.select_action_q_and_u(state, is_root_node)

                virtual_loss = self.config.play.virtual_loss
                self.tree[state].sum_n += 1
                # logger.debug(f"node = {state}, sum_n = {node.sum_n}")

                action_state = self.tree[state].a[sel_action]
                action_state.n += virtual_loss
                action_state.w -= virtual_loss
                action_state.q = action_state.w / action_state.n

                # logger.debug(f"apply virtual_loss = {virtual_loss}, as.n = {action_state.n}, w = {action_state.w}, q = {action_state.q}")

                # if action_state.next is None:
                history.append(sel_action)
                state = senv.step(state, sel_action)
                history.append(state)