예제 #1
0
def self_play_buffer(config, cur) -> (CChessEnv, list):
    pipes = cur.pop()  # borrow
    env = CChessEnv(config).reset()
    search_tree = defaultdict(VisitState)

    red = CChessPlayer(config, search_tree=search_tree, pipes=pipes)
    black = CChessPlayer(config, search_tree=search_tree, pipes=pipes)

    history = []

    cc = 0
    while not env.done:
        start_time = time()
        if env.red_to_move:
            action = red.action(env)
        else:
            action = black.action(env)
        end_time = time()
        logger.debug(
            f"Playing: {env.red_to_move}, action: {action}, time: {end_time - start_time}s"
        )
        env.step(action)
        history.append(action)
        if len(history) > 6 and history[-1] == history[-5]:
            cc = cc + 1
        else:
            cc = 0
        if env.num_halfmoves / 2 >= config.play.max_game_length:
            env.winner = Winner.draw
        if cc >= 4:
            if env.red_to_move:
                env.winner = Winner.black
            else:
                env.winner = Winner.red
    if env.winner == Winner.red:
        black_win = -1
    elif env.winner == Winner.black:
        black_win = 1
    else:
        black_win = 0

    black.finish_game(black_win)
    red.finish_game(-black_win)

    data = []
    for i in range(len(red.moves)):
        data.append(red.moves[i])
        if i < len(black.moves):
            data.append(black.moves[i])

    cur.append(pipes)
    return env, data
예제 #2
0
class ObSelfPlay:
    def __init__(self, config: Config):
        self.config = config
        self.env = CChessEnv()
        self.model = None
        self.pipe = None
        self.ai = None
        self.chessmans = None

    def load_model(self):
        self.model = CChessModel(self.config)
        if self.config.opts.new or not load_best_model_weight(self.model):
            self.model.build()

    def start(self):
        self.env.reset()
        self.load_model()
        self.pipe = self.model.get_pipes()
        self.ai = CChessPlayer(self.config,
                               search_tree=defaultdict(VisitState),
                               pipes=self.pipe,
                               enable_resign=True,
                               debugging=False)

        labels = ActionLabelsRed
        labels_n = len(ActionLabelsRed)

        self.env.board.print_to_cl()
        history = [self.env.get_state()]

        while not self.env.board.is_end():
            no_act = None
            state = self.env.get_state()
            if state in history[:-1]:
                no_act = []
                for i in range(len(history) - 1):
                    if history[i] == state:
                        no_act.append(history[i + 1])
            action, _ = self.ai.action(state, self.env.num_halfmoves, no_act)
            history.append(action)
            if action is None:
                print("AI投降了!")
                break
            move = self.env.board.make_single_record(int(action[0]),
                                                     int(action[1]),
                                                     int(action[2]),
                                                     int(action[3]))
            if not self.env.red_to_move:
                action = flip_move(action)
            self.env.step(action)
            history.append(self.env.get_state())
            print(f"AI选择移动 {move}")
            self.env.board.print_to_cl()
            sleep(1)

        self.ai.close()
        print(f"胜者是 is {self.env.board.winner} !!!")
        self.env.board.print_record()
class ObSelfPlay:
    def __init__(self, config: Config):
        self.config = config
        self.env = CChessEnv()
        self.model = None
        self.pipe = None
        self.ai = None
        self.chessmans = None

    def load_model(self):
        self.model = CChessModel(self.config)
        if self.config.opts.new or not load_best_model_weight(self.model):
            self.model.build()

    def start(self):
        self.env.reset()
        self.load_model()
        self.pipe = self.model.get_pipes()
        self.ai = CChessPlayer(self.config, search_tree=defaultdict(VisitState), pipes=self.pipe,
                              enable_resign=True, debugging=False)

        labels = ActionLabelsRed
        labels_n = len(ActionLabelsRed)

        self.env.board.print_to_cl()
        history = [self.env.get_state()]

        while not self.env.board.is_end():
            no_act = None
            state = self.env.get_state()
            if state in history[:-1]:
                no_act = []
                for i in range(len(history) - 1):
                    if history[i] == state:
                        no_act.append(history[i + 1])
            action, _ = self.ai.action(state, self.env.num_halfmoves, no_act)
            history.append(action)
            if action is None:
                print("AI投降了!")
                break
            move = self.env.board.make_single_record(int(action[0]), int(action[1]), int(action[2]), int(action[3]))
            if not self.env.red_to_move:
                action = flip_move(action)
            self.env.step(action)
            history.append(self.env.get_state())
            print(f"AI选择移动 {move}")
            self.env.board.print_to_cl()
            sleep(1)

        self.ai.close()
        print(f"胜者是 is {self.env.board.winner} !!!")
        self.env.board.print_record()
예제 #4
0
def self_play_buffer(config, cur, use_history=False) -> (tuple, list):
    pipe = cur.pop() # borrow

    if random() > config.play.enable_resign_rate:
        enable_resign = True
    else:
        enable_resign = False

    player = CChessPlayer(config, search_tree=defaultdict(VisitState), pipes=pipe, 
                            enable_resign=enable_resign, debugging=False, use_history=use_history)

    state = senv.INIT_STATE
    history = [state]
    # policys = [] 
    value = 0
    turns = 0
    game_over = False
    final_move = None
    no_eat_count = 0
    check = False
    no_act = None
    increase_temp = False

    while not game_over:
        start_time = time()
        action, policy = player.action(state, turns, no_act, increase_temp=increase_temp)
        end_time = time()
        if action is None:
            print(f"{turns % 2} (0 = 红; 1 = 黑) 投降了!")
            value = -1
            break
        print(f"博弈中: 回合{turns / 2 + 1} {'红方走棋' if turns % 2 == 0 else '黑方走棋'}, 着法: {action}, 用时: {(end_time - start_time):.1f}s")
        # policys.append(policy)
        history.append(action)
        try:
            state, no_eat = senv.new_step(state, action)
        except Exception as e:
            logger.error(f"{e}, no_act = {no_act}, policy = {policy}")
            game_over = True
            value = 0
            break
        turns += 1
        if no_eat:
            no_eat_count += 1
        else:
            no_eat_count = 0
        history.append(state)

        if no_eat_count >= 120 or turns / 2 >= config.play.max_game_length:
            game_over = True
            value = 0
        else:
            game_over, value, final_move, check = senv.done(state, need_check=True)
            no_act = []
            increase_temp = False
            if not game_over:
                if not senv.has_attack_chessman(state):
                    logger.info(f"双方无进攻子力,作和。state = {state}")
                    game_over = True
                    value = 0
            if not game_over and not check and state in history[:-1]:
                free_move = defaultdict(int)
                for i in range(len(history) - 1):
                    if history[i] == state:
                        if senv.will_check_or_catch(state, history[i+1]):
                            no_act.append(history[i + 1])
                        elif not senv.be_catched(state, history[i+1]):
                            increase_temp = True
                            free_move[state] += 1
                            if free_move[state] >= 3:
                                # 作和棋处理
                                game_over = True
                                value = 0
                                logger.info("闲着循环三次,作和棋处理")
                                break

    if final_move:
        # policy = build_policy(final_move, False)
        history.append(final_move)
        # policys.append(policy)
        state = senv.step(state, final_move)
        turns += 1
        value = -value
        history.append(state)

    player.close()
    del player
    gc.collect()

    if turns % 2 == 1:  # balck turn
        value = -value
    
    v = value
    data = [history[0]]
    for i in range(turns):
        k = i * 2
        data.append([history[k + 1], value])
        value = -value

    cur.append(pipe)
    return (turns, v), data
예제 #5
0
class PlayWithHuman:
    def __init__(self, config: Config):
        self.config = config
        self.env = CChessEnv()
        self.model = None
        self.pipe = None
        self.ai = None
        self.chessmans = None
        self.human_move_first = True

    def load_model(self):
        self.model = CChessModel(self.config)
        if self.config.opts.new or not load_best_model_weight(self.model):
            self.model.build()

    def start(self, human_first=True):
        self.env.reset()
        self.load_model()
        self.pipe = self.model.get_pipes()
        self.ai = CChessPlayer(self.config,
                               search_tree=defaultdict(VisitState),
                               pipes=self.pipe,
                               enable_resign=True,
                               debugging=False)
        self.human_move_first = human_first

        labels = ActionLabelsRed
        labels_n = len(ActionLabelsRed)

        self.env.board.print_to_cl()

        while not self.env.board.is_end():
            if human_first == self.env.red_to_move:
                self.env.board.calc_chessmans_moving_list()
                is_correct_chessman = False
                is_correct_position = False
                chessman = None
                while not is_correct_chessman:
                    title = "请输入棋子位置: "
                    input_chessman_pos = input(title)
                    x, y = int(input_chessman_pos[0]), int(
                        input_chessman_pos[1])
                    chessman = self.env.board.chessmans[x][y]
                    if chessman != None and chessman.is_red == self.env.board.is_red_turn:
                        is_correct_chessman = True
                        print(f"当前棋子为{chessman.name_cn},可以落子的位置有:")
                        for point in chessman.moving_list:
                            print(point.x, point.y)
                    else:
                        print("没有找到此名字的棋子或未轮到此方走子")
                while not is_correct_position:
                    title = "请输入落子的位置: "
                    input_chessman_pos = input(title)
                    x, y = int(input_chessman_pos[0]), int(
                        input_chessman_pos[1])
                    is_correct_position = chessman.move(x, y)
                    if is_correct_position:
                        self.env.board.print_to_cl()
                        self.env.board.clear_chessmans_moving_list()
            else:
                action, policy = self.ai.action(self.env.get_state(),
                                                self.env.num_halfmoves)
                if not self.env.red_to_move:
                    action = flip_move(action)
                if action is None:
                    print("AI投降了!")
                    break
                self.env.step(action)
                print(f"AI选择移动 {action}")
                self.env.board.print_to_cl()

        self.ai.close()
        print(f"胜者是 is {self.env.board.winner} !!!")
        self.env.board.print_record()
class PlayWithHuman:
    def __init__(self, config: Config):
        self.config = config
        self.env = CChessEnv()
        self.model = None
        self.pipe = None
        self.ai = None
        self.chessmans = None
        self.human_move_first = True

    def load_model(self):
        self.model = CChessModel(self.config)
        if self.config.opts.new or not load_best_model_weight(self.model):
            self.model.build()

    def start(self, human_first=True):
        self.env.reset()
        self.load_model()
        self.pipe = self.model.get_pipes()
        self.ai = CChessPlayer(self.config, search_tree=defaultdict(VisitState), pipes=self.pipe,
                              enable_resign=True, debugging=False)
        self.human_move_first = human_first

        labels = ActionLabelsRed
        labels_n = len(ActionLabelsRed)

        self.env.board.print_to_cl()

        while not self.env.board.is_end():
            if human_first == self.env.red_to_move:
                self.env.board.calc_chessmans_moving_list()
                is_correct_chessman = False
                is_correct_position = False
                chessman = None
                while not is_correct_chessman:
                    title = "请输入棋子位置: "
                    input_chessman_pos = input(title)
                    x, y = int(input_chessman_pos[0]), int(input_chessman_pos[1])
                    chessman = self.env.board.chessmans[x][y]
                    if chessman != None and chessman.is_red == self.env.board.is_red_turn:
                        is_correct_chessman = True
                        print(f"当前棋子为{chessman.name_cn},可以落子的位置有:")
                        for point in chessman.moving_list:
                            print(point.x, point.y)
                    else:
                        print("没有找到此名字的棋子或未轮到此方走子")
                while not is_correct_position:
                    title = "请输入落子的位置: "
                    input_chessman_pos = input(title)
                    x, y = int(input_chessman_pos[0]), int(input_chessman_pos[1])
                    is_correct_position = chessman.move(x, y)
                    if is_correct_position:
                        self.env.board.print_to_cl()
                        self.env.board.clear_chessmans_moving_list()
            else:
                action, policy = self.ai.action(self.env.get_state(), self.env.num_halfmoves)
                if not self.env.red_to_move:
                    action = flip_move(action)
                if action is None:
                    print("AI投降了!")
                    break
                self.env.step(action)
                print(f"AI选择移动 {action}")
                self.env.board.print_to_cl()

        self.ai.close()
        print(f"胜者是 is {self.env.board.winner} !!!")
        self.env.board.print_record()
예제 #7
0
class PlayWithHuman:
    def __init__(self, config: Config):
        self.config = config
        self.env = CChessEnv()
        self.model = None
        self.pipe = None
        self.ai = None
        self.chessmans = None
        self.human_move_first = True

    def load_model(self):
        self.model = CChessModel(self.config)
        if self.config.opts.new or not load_best_model_weight(self.model):
            self.model.build()

    def start(self, human_first=True):
        self.env.reset()
        self.load_model()
        self.pipe = self.model.get_pipes()
        self.ai = CChessPlayer(self.config,
                               search_tree=defaultdict(VisitState),
                               pipes=self.pipe,
                               enable_resign=True,
                               debugging=False)
        self.human_move_first = human_first

        labels = ActionLabelsRed
        labels_n = len(ActionLabelsRed)

        self.env.board.print_to_cl()

        while not self.env.board.is_end():
            if human_first == self.env.red_to_move:
                self.env.board.calc_chessmans_moving_list()
                is_correct_chessman = False
                is_correct_position = False
                chessman = None
                while not is_correct_chessman:
                    title = "Please enter the chess piece position: "
                    input_chessman_pos = input(title)
                    print(input_chessman_pos)
                    x, y = int(input_chessman_pos[0]), int(
                        input_chessman_pos[1])
                    chessman = self.env.board.chessmans[x][y]
                    if chessman != None and chessman.is_red == self.env.board.is_red_turn:
                        is_correct_chessman = True
                        print(
                            f"The current chess piece is {chessman.name},places where you can play:"
                        )
                        for point in chessman.moving_list:
                            print(point.x, point.y)
                    else:
                        print(
                            "No chess piece with this name was found or it was not his turn to walk"
                        )
                while not is_correct_position:
                    title = "Please enter the location of the child: "
                    input_chessman_pos = input(title)
                    x, y = int(input_chessman_pos[0]), int(
                        input_chessman_pos[1])
                    is_correct_position = chessman.move(x, y)
                    if is_correct_position:
                        self.env.board.print_to_cl()
                        self.env.board.clear_chessmans_moving_list()
            else:
                action, policy = self.ai.action(self.env.get_state(),
                                                self.env.num_halfmoves)
                if not self.env.red_to_move:
                    action = flip_move(action)
                if action is None:
                    print("AI surrendered!")
                    break
                self.env.step(action)
                print(f"AI chooses to move {action}")
                self.env.board.print_to_cl()

        self.ai.close()
        print(f"The winner is is {self.env.board.winner} !!!")
        self.env.board.print_record()
예제 #8
0
class ObSelfPlayUCCI:
    def __init__(self, config: Config, ai_move_first=True):
        self.config = config
        self.env = CChessEnv()
        self.model = None
        self.pipe = None
        self.ai = None
        self.chessmans = None
        self.ai_move_first = ai_move_first

    def load_model(self):
        self.model = CChessModel(self.config)
        if self.config.opts.new or not load_best_model_weight(self.model):
            self.model.build()

    def start(self):
        self.env.reset()
        self.load_model()
        self.pipe = self.model.get_pipes()
        self.ai = CChessPlayer(self.config,
                               search_tree=defaultdict(VisitState),
                               pipes=self.pipe,
                               enable_resign=True,
                               debugging=False)

        labels = ActionLabelsRed
        labels_n = len(ActionLabelsRed)

        self.env.board.print_to_cl()
        history = [self.env.get_state()]
        turns = 0
        game_over = False
        final_move = None

        while not game_over:
            if (self.ai_move_first
                    and turns % 2 == 0) or (not self.ai_move_first
                                            and turns % 2 == 1):
                start_time = time()
                no_act = None
                state = self.env.get_state()
                if state in history[:-1]:
                    no_act = []
                    for i in range(len(history) - 1):
                        if history[i] == state:
                            act = history[i + 1]
                            if not self.env.red_to_move:
                                act = flip_move(act)
                            no_act.append(act)
                action, _ = self.ai.action(state, self.env.num_halfmoves,
                                           no_act)
                end_time = time()
                if action is None:
                    print("AlphaZero 投降了!")
                    break
                move = self.env.board.make_single_record(
                    int(action[0]), int(action[1]), int(action[2]),
                    int(action[3]))
                print(
                    f"AlphaZero 选择移动 {move}, 消耗时间 {(end_time - start_time):.2f}s"
                )
                if not self.env.red_to_move:
                    action = flip_move(action)
            else:
                state = self.env.get_state()
                print(state)
                fen = senv.state_to_fen(state, turns)
                action = self.get_ucci_move(fen)
                if action is None:
                    print("Eleeye 投降了!")
                    break
                print(action)
                if not self.env.red_to_move:
                    rec_action = flip_move(action)
                else:
                    rec_action = action
                move = self.env.board.make_single_record(
                    int(rec_action[0]), int(rec_action[1]), int(rec_action[2]),
                    int(rec_action[3]))
                print(f"Eleeye 选择移动 {move}")
            history.append(action)
            self.env.step(action)
            history.append(self.env.get_state())
            self.env.board.print_to_cl()
            turns += 1
            sleep(1)
            game_over, final_move = self.env.board.is_end_final_move()
            print(game_over, final_move)

        if final_move:
            move = self.env.board.make_single_record(int(final_move[0]),
                                                     int(final_move[1]),
                                                     int(final_move[2]),
                                                     int(final_move[3]))
            print(f"Final Move {move}")
            if not self.env.red_to_move:
                final_move = flip_move(final_move)
            self.env.step(final_move)
            self.env.board.print_to_cl()

        self.ai.close()
        print(f"胜者是 is {self.env.board.winner} !!!")
        self.env.board.print_record()

    def get_ucci_move(self, fen, time=3):
        p = subprocess.Popen(self.config.resource.eleeye_path,
                             stdin=subprocess.PIPE,
                             stdout=subprocess.PIPE,
                             stderr=subprocess.PIPE,
                             universal_newlines=True)
        setfen = f'position fen {fen}\n'
        setrandom = 'setoption randomness small\n'
        cmd = 'ucci\n' + setrandom + setfen + f'go time {time * 1000}\n'
        try:
            out, err = p.communicate(cmd, timeout=time + 0.5)
        except:
            p.kill()
            try:
                out, err = p.communicate()
            except Exception as e:
                logger.error(f"{e}, cmd = {cmd}")
                return self.get_ucci_move(fen, time + 1)
        print(out)
        lines = out.split('\n')
        if lines[-2] == 'nobestmove':
            return None
        move = lines[-2].split(' ')[1]
        if move == 'depth':
            move = lines[-1].split(' ')[6]
        return senv.parse_ucci_move(move)
예제 #9
0
class UCI:
    def __init__(self, config: Config):
        self.config = config
        self.args = None
        self.state = None
        self.is_red_turn = None
        self.player = None
        self.model = None
        self.pipe = None
        self.is_ready = False
        self.search_tree = defaultdict(VisitState)
        self.remain_time = None
        self.history = None
        self.turns = 0
        self.start_time = None
        self.end_time = None
        self.t = None
        self.use_history = False

    def main(self):
        while True:
            cmd = input()
            logger.debug(f"CMD: {cmd}")
            cmds = cmd.split(' ')
            self.args = cmds[1:]
            method = getattr(self, 'cmd_' + cmds[0], None)
            if method != None:
                method()
            else:
                logger.error(f"Error command: {cmd}")

    def cmd_uci(self):
        print('id name CCZero')
        print('id author https://cczero.org')
        print('id version 2.4')
        print('option name gpu spin default 0 min 0 max 7')
        print('option name Threads spin default 10 min 0 max 1024')
        print('uciok')
        sys.stdout.flush()
        set_session_config(per_process_gpu_memory_fraction=1,
                           allow_growth=True,
                           device_list=self.config.opts.device_list)
        self.use_history = self.load_model()
        self.is_ready = True
        self.turns = 0
        self.remain_time = None
        self.state = senv.INIT_STATE
        self.history = [self.state]
        self.is_red_turn = True

    def cmd_ucinewgame(self):
        self.state = senv.INIT_STATE
        self.history = [self.state]
        self.is_ready = True
        self.is_red_turn = True
        self.search_tree = defaultdict(VisitState)

    def cmd_setoption(self):
        '''
        setoption name <id> [value <x>]
        '''
        if len(self.args) > 3:
            id = self.args[1]
            if id == 'gpu':
                value = int(self.args[3])
                self.config.opts.device_list = value
                set_session_config(per_process_gpu_memory_fraction=1,
                                   allow_growth=True,
                                   device_list=self.config.opts.device_list)
            if id == 'Threads':
                value = int(self.args[3])
                self.config.play.search_threads = value

    def cmd_isready(self):
        if self.is_ready == True:
            print('readyok')
            sys.stdout.flush()
        logger.debug(f"is_ready = {self.is_ready}")

    def cmd_position(self):
        '''
        position {fen <fenstring> | startpos } [moves <move1> .... <moven>]
        '''
        if not self.is_ready:
            return
        move_idx = -1
        if len(self.args) > 0:
            if self.args[0] == 'fen':
                # init with fen string
                fen = self.args[1]
                try:
                    self.state = senv.fen_to_state(fen)
                except Exception as e:
                    logger.error(f"cmd position error! cmd = {self.args}, {e}")
                    return
                self.history = [self.state]
                turn = self.args[2]
                if turn == 'b':
                    self.state = senv.fliped_state(self.state)
                    self.is_red_turn = False
                    self.turns = (int(self.args[6]) - 1) * 2 + 1
                else:
                    self.is_red_turn = True
                    self.turns = (int(self.args[6]) - 1) * 2
                if len(self.args) > 7 and self.args[7] == 'moves':
                    move_idx = 8
            elif self.args[0] == 'startpos':
                self.state = senv.INIT_STATE
                self.is_red_turn = True
                self.history = [self.state]
                self.turns = 0
                if len(self.args) > 1 and self.args[1] == 'moves':
                    move_idx = 2
            elif self.args[0] == 'moves':
                move_idx = 1
        else:
            self.state = senv.INIT_STATE
            self.is_red_turn = True
            self.history = [self.state]
            self.turns = 0
        logger.debug(f"state = {self.state}")
        # senv.render(self.state)
        # execute moves
        if move_idx != -1:
            for i in range(move_idx, len(self.args)):
                action = senv.parse_ucci_move(self.args[i])
                if not self.is_red_turn:
                    action = flip_move(action)
                self.history.append(action)
                self.state = senv.step(self.state, action)
                self.is_red_turn = not self.is_red_turn
                self.turns += 1
                self.history.append(self.state)
            logger.debug(f"state = {self.state}")
            # senv.render(self.state)

    def cmd_fen(self):
        self.args.insert(0, 'fen')
        self.cmd_position()

    def cmd_go(self):
        '''
        go ...
          让引擎根据内置棋盘的设置和设定的搜索方式来思考,有以下搜索方式可供选择(可以多选,直接跟在go后面):
          ❌(1) searchmoves <move1> .... <moven>,只让引擎在这几步中选择一步;
          ✅(2) wtime <x>,白方剩余时间(单位是毫秒);
                btime <x>,黑方剩余时间;
                ❌winc <x>,白方每步增加的时间(适用于Fischer制);
                ❌binc <x>,黑方每步增加的时间;
                ❌movestogo <x>,还有多少回合进入下一时段(适用于时段制);
          这些选项用来设定时钟,它决定了引擎的思考时间;
          ❌(3) ponder,让引擎进行后台思考(即对手在用时,引擎的时钟不起作用);
          ✅(4) depth <x>,指定搜索深度;
          ❌(5) nodes <x>,指定搜索的节点数(即分析的局面数,一般它和时间成正比);
          ❌(6) mate <x>,在指定步数内只搜索杀棋;
          ✅(7) movetime <x>,只花规定的时间搜索;
          ✅(8) infinite,无限制搜索,直到杀棋。
        '''
        if not self.is_ready:
            return
        self.start_time = time()
        self.t = None
        depth = None
        infinite = True
        self.remain_time = None
        self.model.close_pipes()
        self.pipe = self.model.get_pipes(need_reload=False)
        self.search_tree = defaultdict(VisitState)
        self.player = CChessPlayer(self.config,
                                   search_tree=self.search_tree,
                                   pipes=self.pipe,
                                   enable_resign=False,
                                   debugging=True,
                                   uci=True,
                                   use_history=self.use_history,
                                   side=self.turns % 2)
        for i in range(len(self.args)):
            if self.args[i] == 'depth':
                depth = int(self.args[i + 1]) * 100
                infinite = False
            if self.args[i] == 'movetime' or self.args[i] == 'time':
                self.remain_time = int(self.args[i + 1]) / 1000
            if self.args[i] == 'infinite':
                infinite = True
            if self.args[i] == 'wtime':
                if self.is_red_turn:
                    self.remain_time = int(self.args[i + 1]) / 1000
                    depth = 3000
                    infinite = False
            if self.args[i] == 'btime':
                if not self.is_red_turn:
                    self.remain_time = int(self.args[i + 1]) / 1000
                    depth = 3000
                    infinite = False
        logger.debug(
            f"depth = {depth}, infinite = {infinite}, remain_time = {self.remain_time}"
        )
        search_worker = Thread(target=self.search_action,
                               args=(depth, infinite))
        search_worker.daemon = True
        search_worker.start()

        if self.remain_time:
            self.t = Timer(self.remain_time - 0.01, self.cmd_stop)
            self.t.start()

    def cmd_stop(self):
        if not self.is_ready:
            return
        if self.player:
            no_act = None
            if self.state in self.history[:-1]:
                no_act = []
                for i in range(len(self.history) - 1):
                    if self.history[i] == self.state:
                        no_act.append(self.history[i + 1])
            action, value, depth = self.player.close_and_return_action(
                self.state, self.turns, no_act)
            self.player = None
            self.model.close_pipes()
            self.info_best_move(action, value, depth)
        else:
            logger.error(f"bestmove none")

    def cmd_quit(self):
        sys.exit()

    def load_model(self, config_file=None):
        use_history = True
        self.model = CChessModel(self.config)
        weight_path = self.config.resource.model_best_weight_path
        if not config_file:
            config_path = config.resource.model_best_path
            use_history = False
        else:
            config_path = os.path.join(config.resource.model_dir, config_file)
        try:
            if not load_model_weight(self.model, config_path, weight_path):
                self.model.build()
                use_history = True
        except Exception as e:
            logger.info(f"Exception {e}, 重新加载权重")
            return self.load_model(config_file='model_128_l1_config.json')
        logger.info(f"use_history = {use_history}")
        return use_history

    def search_action(self, depth, infinite):
        no_act = None
        _, _, _, check = senv.done(self.state, need_check=True)
        logger.debug(f"Check = {check}, state = {self.state}")
        if not check and self.state in self.history[:-1]:
            no_act = []
            for i in range(len(self.history) - 1):
                if self.history[i] == self.state:
                    if senv.will_check_or_catch(self.state,
                                                self.history[i + 1]):
                        no_act.append(self.history[i + 1])
                        logger.debug(f"Foul: no act = {no_act}")
        action, _ = self.player.action(self.state,
                                       self.turns,
                                       no_act=no_act,
                                       depth=depth,
                                       infinite=infinite,
                                       hist=self.history)
        if self.t:
            self.t.cancel()
        _, value = self.player.debug[self.state]
        depth = self.player.done_tasks // 100
        self.player.close(wait=False)
        self.player = None
        self.model.close_pipes()
        self.info_best_move(action, value, depth)

    def info_best_move(self, action, value, depth):
        self.end_time = time()
        if not self.is_red_turn:
            value = -value
        score = int(value * 1000)
        duration = self.end_time - self.start_time
        nps = int(depth * 100 / duration) * 1000
        print(
            f"info depth {depth} score {score} time {int(duration * 1000)} nps {nps}"
        )
        logger.debug(
            f"info depth {depth} score {score} time {int((self.end_time - self.start_time) * 1000)}"
        )
        sys.stdout.flush()
        # get ponder
        state = senv.step(self.state, action)
        ponder = None
        if state in self.search_tree:
            node = self.search_tree[state]
            cnt = 0
            for mov, action_state in node.a.items():
                if action_state.n > cnt:
                    ponder = mov
                    cnt = action_state.n
        if not self.is_red_turn:
            action = flip_move(action)
        action = senv.to_uci_move(action)
        output = f"bestmove {action}"
        if ponder:
            if self.is_red_turn:
                ponder = flip_move(ponder)
            ponder = senv.to_uci_move(ponder)
            output += f" ponder {ponder}"
        print(output)
        logger.debug(output)
        sys.stdout.flush()
class SelfPlayWorker:
    def __init__(self, config: Config, pipes=None, pid=None):
        self.config = config
        self.player = None
        self.cur_pipes = pipes
        self.id = pid
        self.buffer = []
        self.pid = os.getpid()

    def start(self):
        logger.debug(f"Selfplay#Start Process index = {self.id}, pid = {self.pid}")

        idx = 1
        self.buffer = []
        search_tree = defaultdict(VisitState)

        while True:
            start_time = time()
            value, turns, state, search_tree, store = self.start_game(idx, search_tree)
            end_time = time()
            logger.debug(f"Process {self.pid}-{self.id} play game {idx} time={(end_time - start_time):.1f} sec, "
                         f"turn={turns / 2}, winner = {value:.2f} (1 = red, -1 = black, 0 draw)")
            if turns <= 10:
                senv.render(state)
            if store:
                idx += 1

    def start_game(self, idx, search_tree):
        pipes = self.cur_pipes.pop()

        if not self.config.play.share_mtcs_info_in_self_play or \
            idx % self.config.play.reset_mtcs_info_per_game == 0:
            search_tree = defaultdict(VisitState)

        if random() > self.config.play.enable_resign_rate:
            enable_resign = True
        else:
            enable_resign = False

        self.player = CChessPlayer(self.config, search_tree=search_tree, pipes=pipes, enable_resign=enable_resign, debugging=False)

        state = senv.INIT_STATE
        history = [state]
        policys = [] 
        value = 0
        turns = 0       # even == red; odd == black
        game_over = False
        final_move = None

        while not game_over:
            no_act = None
            if state in history[:-1]:
                no_act = []
                for i in range(len(history) - 1):
                    if history[i] == state:
                        no_act.append(history[i + 1])
            start_time = time()
            action, policy = self.player.action(state, turns, no_act)
            end_time = time()
            if action is None:
                logger.debug(f"{turns % 2} (0 = red; 1 = black) has resigned!")
                value = -1
                break
            # logger.debug(f"Process{self.pid} Playing: {turns % 2}, action: {action}, time: {(end_time - start_time):.1f}s")
            # for move, action_state in self.player.search_results.items():
            #     if action_state[0] >= 20:
            #         logger.info(f"move: {move}, prob: {action_state[0]}, Q_value: {action_state[1]:.2f}, Prior: {action_state[2]:.3f}")
            # self.player.search_results = {}
            history.append(action)
            policys.append(policy)
            state = senv.step(state, action)
            turns += 1
            history.append(state)

            if turns / 2 >= self.config.play.max_game_length:
                game_over = True
                value = senv.evaluate(state)
            else:
                game_over, value, final_move = senv.done(state)

        if final_move:
            policy = self.build_policy(final_move, False)
            history.append(final_move)
            policys.append(policy)
            state = senv.step(state, final_move)
            history.append(state)

        self.player.close()
        if turns % 2 == 1:  # balck turn
            value = -value

        v = value
        if v == 0:
            if random() > 0.5:
                store = True
            else:
                store = False
        else:
            store = True

        if store:
            data = []
            for i in range(turns):
                k = i * 2
                data.append([history[k], policys[i], value])
                value = -value
            self.save_play_data(idx, data)

        self.cur_pipes.append(pipes)
        self.remove_play_data()
        return v, turns, state, search_tree, store

    def save_play_data(self, idx, data):
        self.buffer += data

        if not idx % self.config.play_data.nb_game_in_file == 0:
            return

        rc = self.config.resource
        game_id = datetime.now().strftime("%Y%m%d-%H%M%S.%f")
        filename = rc.play_data_filename_tmpl % game_id
        path = os.path.join(rc.play_data_dir, filename)
        logger.info(f"Process {self.pid} save play data to {path}")
        write_game_data_to_file(path, self.buffer)
        if self.config.internet.distributed:
            upload_worker = Thread(target=self.upload_play_data, args=(path, filename), name="upload_worker")
            upload_worker.daemon = True
            upload_worker.start()
        self.buffer = []

    def upload_play_data(self, path, filename):
        digest = CChessModel.fetch_digest(self.config.resource.model_best_weight_path)
        data = {'digest': digest, 'username': self.config.internet.username}
        response = upload_file(self.config.internet.upload_url, path, filename, data, rm=False)
        if response is not None and response['status'] == 0:
            logger.info(f"Upload play data {filename} finished.")
        else:
            logger.error(f'Upload play data {filename} failed. {response.msg if response is not None else None}')

    def remove_play_data(self):
        files = get_game_data_filenames(self.config.resource)
        if len(files) < self.config.play_data.max_file_num:
            return
        try:
            for i in range(len(files) - self.config.play_data.max_file_num):
                os.remove(files[i])
        except:
            pass

    def build_policy(self, action, flip):
        labels_n = len(ActionLabelsRed)
        move_lookup = {move: i for move, i in zip(ActionLabelsRed, range(labels_n))}
        policy = np.zeros(labels_n)

        policy[move_lookup[action]] = 1

        if flip:
            policy = flip_policy(policy)
        return list(policy)
예제 #11
0
class SelfPlayWorker:
    def __init__(self, config: Config, pipes=None, pid=None):
        self.config = config
        self.player = None
        self.cur_pipes = pipes
        self.id = pid
        self.buffer = []
        self.pid = os.getpid()

    def start(self):
        self.pid = os.getpid()
        logger.debug(
            f"Selfplay#Start Process index = {self.id}, pid = {self.pid}")

        idx = 1
        self.buffer = []

        while True:
            search_tree = defaultdict(VisitState)
            start_time = time()
            value, turns, state, store = self.start_game(idx, search_tree)
            end_time = time()
            if value != 1 and value != -1:
                winner = 'Draw'
            elif idx % 2 == 0 and value == 1 or idx % 2 == 1 and value == -1:
                winner = 'AlphaHe'
            else:
                winner = 'Eleeye'

            logger.debug(
                f"Process {self.pid}-{self.id} play game {idx} time={(end_time - start_time):.1f} sec, "
                f"turn={turns / 2}, value = {value:.2f}, winner is {winner}")
            if turns <= 10 and store:
                senv.render(state)
            if store:
                idx += 1

    def start_game(self, idx, search_tree):
        pipes = self.cur_pipes.pop()

        if not self.config.play.share_mtcs_info_in_self_play or \
            idx % self.config.play.reset_mtcs_info_per_game == 0:
            search_tree = defaultdict(VisitState)

        if random() > self.config.play.enable_resign_rate:
            enable_resign = True
        else:
            enable_resign = False

        self.player = CChessPlayer(self.config,
                                   search_tree=search_tree,
                                   pipes=pipes,
                                   enable_resign=enable_resign,
                                   debugging=False)

        state = senv.INIT_STATE
        history = [state]
        value = 0
        turns = 0  # even == red; odd == black
        game_over = False
        is_alpha_red = True if idx % 2 == 0 else False
        final_move = None
        check = False

        while not game_over:
            if (is_alpha_red and turns % 2 == 0) or (not is_alpha_red
                                                     and turns % 2 == 1):
                no_act = None
                if not check and state in history[:-1]:
                    no_act = []
                    for i in range(len(history) - 1):
                        if history[i] == state:
                            no_act.append(history[i + 1])
                action, _ = self.player.action(state, turns, no_act)
                if action is None:
                    logger.debug(
                        f"{turns % 2} (0 = red; 1 = black) has resigned!")
                    value = -1
                    break
            else:
                fen = senv.state_to_fen(state, turns)
                action = self.get_ucci_move(fen)
                if action is None:
                    logger.debug(
                        f"{turns % 2} (0 = red; 1 = black) has resigned!")
                    value = -1
                    break
                if turns % 2 == 1:
                    action = flip_move(action)
            history.append(action)
            state = senv.step(state, action)
            turns += 1
            history.append(state)

            if turns / 2 >= self.config.play.max_game_length:
                game_over = True
                value = 0
            else:
                game_over, value, final_move, check = senv.done(
                    state, need_check=True)

        if final_move:
            history.append(final_move)
            state = senv.step(state, final_move)
            history.append(state)
            turns += 1
            value = -value

        self.player.close()
        del search_tree
        del self.player
        gc.collect()
        if turns % 2 == 1:  # balck turn
            value = -value

        v = value
        if turns <= 10:
            if random() > 0.7:
                store = True
            else:
                store = False
        else:
            store = True

        if store:
            data = [history[0]]
            for i in range(turns):
                k = i * 2
                data.append([history[k + 1], value])
                value = -value
            self.save_play_data(idx, data)

        self.cur_pipes.append(pipes)
        self.remove_play_data()
        return v, turns, state, store

    def get_ucci_move(self, fen, time=3):
        p = subprocess.Popen(self.config.resource.eleeye_path,
                             stdin=subprocess.PIPE,
                             stdout=subprocess.PIPE,
                             stderr=subprocess.PIPE,
                             universal_newlines=True)
        setfen = f'position fen {fen}\n'
        setrandom = f'setoption randomness {self.config.opts.random}\n'
        cmd = 'ucci\n' + setrandom + setfen + f'go time {time * 1000}\n'
        try:
            out, err = p.communicate(cmd, timeout=time + 0.5)
        except subprocess.TimeoutExpired:
            p.kill()
            try:
                out, err = p.communicate()
            except Exception as e:
                logger.error(f"{e}, cmd = {cmd}")
                return self.get_ucci_move(fen, time + 1)
        lines = out.split('\n')
        if lines[-2] == 'nobestmove':
            return None
        move = lines[-2].split(' ')[1]
        if move == 'depth':
            move = lines[-1].split(' ')[6]
        return senv.parse_ucci_move(move)

    def save_play_data(self, idx, data):
        self.buffer += data

        if not idx % self.config.play_data.nb_game_in_file == 0:
            return

        rc = self.config.resource
        game_id = datetime.now().strftime("%Y%m%d-%H%M%S.%f")
        path = os.path.join(rc.play_data_dir,
                            rc.play_data_filename_tmpl % game_id)
        logger.info(f"Process {self.pid} save play data to {path}")
        write_game_data_to_file(path, self.buffer)
        self.buffer = []

    def remove_play_data(self):
        files = get_game_data_filenames(self.config.resource)
        if len(files) < self.config.play_data.max_file_num:
            return
        try:
            for i in range(len(files) - self.config.play_data.max_file_num):
                os.remove(files[i])
        except:
            pass

    def build_policy(self, action, flip):
        labels_n = len(ActionLabelsRed)
        move_lookup = {
            move: i
            for move, i in zip(ActionLabelsRed, range(labels_n))
        }
        policy = np.zeros(labels_n)

        policy[move_lookup[action]] = 1

        if flip:
            policy = flip_policy(policy)
        return list(policy)
class ObSelfPlayUCCI:
    def __init__(self, config: Config, ai_move_first=True):
        self.config = config
        self.env = CChessEnv()
        self.model = None
        self.pipe = None
        self.ai = None
        self.chessmans = None
        self.ai_move_first = ai_move_first

    def load_model(self):
        self.model = CChessModel(self.config)
        if self.config.opts.new or not load_best_model_weight(self.model):
            self.model.build()

    def start(self):
        self.env.reset()
        self.load_model()
        self.pipe = self.model.get_pipes()
        self.ai = CChessPlayer(self.config, search_tree=defaultdict(VisitState), pipes=self.pipe,
                              enable_resign=True, debugging=False)

        labels = ActionLabelsRed
        labels_n = len(ActionLabelsRed)

        self.env.board.print_to_cl()
        history = [self.env.get_state()]
        turns = 0
        game_over = False
        final_move = None

        while not game_over:
            if (self.ai_move_first and turns % 2 == 0) or (not self.ai_move_first and turns % 2 == 1):
                start_time = time()
                no_act = None
                state = self.env.get_state()
                if state in history[:-1]:
                    no_act = []
                    for i in range(len(history) - 1):
                        if history[i] == state:
                            act = history[i + 1]
                            if not self.env.red_to_move:
                                act = flip_move(act)
                            no_act.append(act)
                action, _ = self.ai.action(state, self.env.num_halfmoves, no_act)
                end_time = time()
                if action is None:
                    print("AlphaZero 投降了!")
                    break
                move = self.env.board.make_single_record(int(action[0]), int(action[1]), int(action[2]), int(action[3]))
                print(f"AlphaZero 选择移动 {move}, 消耗时间 {(end_time - start_time):.2f}s")
                if not self.env.red_to_move:
                    action = flip_move(action)
            else:
                state = self.env.get_state()
                print(state)
                fen = senv.state_to_fen(state, turns)
                action = self.get_ucci_move(fen)
                if action is None:
                    print("Eleeye 投降了!")
                    break
                print(action)
                if not self.env.red_to_move:
                    rec_action = flip_move(action)
                else:
                    rec_action = action
                move = self.env.board.make_single_record(int(rec_action[0]), int(rec_action[1]), int(rec_action[2]), int(rec_action[3]))
                print(f"Eleeye 选择移动 {move}")
            history.append(action)
            self.env.step(action)
            history.append(self.env.get_state())
            self.env.board.print_to_cl()
            turns += 1
            sleep(1)
            game_over, final_move = self.env.board.is_end_final_move()
            print(game_over, final_move)

        if final_move:
            move = self.env.board.make_single_record(int(final_move[0]), int(final_move[1]), int(final_move[2]), int(final_move[3]))
            print(f"Final Move {move}")
            if not self.env.red_to_move:
                final_move = flip_move(final_move)
            self.env.step(final_move)
            self.env.board.print_to_cl()

        self.ai.close()
        print(f"胜者是 is {self.env.board.winner} !!!")
        self.env.board.print_record()

    def get_ucci_move(self, fen, time=3):
        p = subprocess.Popen(self.config.resource.eleeye_path,
                            stdin=subprocess.PIPE,
                            stdout=subprocess.PIPE,
                            stderr=subprocess.PIPE,
                            universal_newlines=True)
        setfen = f'position fen {fen}\n'
        setrandom = 'setoption randomness small\n'
        cmd = 'ucci\n' + setrandom + setfen + f'go time {time * 1000}\n'
        try:
            out, err = p.communicate(cmd, timeout=time+0.5)
        except:
            p.kill()
            try:
                out, err = p.communicate()
            except Exception as e:
                logger.error(f"{e}, cmd = {cmd}")
                return self.get_ucci_move(fen, time+1)
        print(out)
        lines = out.split('\n')
        if lines[-2] == 'nobestmove':
            return None
        move = lines[-2].split(' ')[1]
        if move == 'depth':
            move = lines[-1].split(' ')[6]
        return senv.parse_ucci_move(move)
예제 #13
0
def self_play_buffer(config, cur) -> (tuple, list):
    pipe = cur.pop()  # borrow

    if random() > config.play.enable_resign_rate:
        enable_resign = True
    else:
        enable_resign = False

    player = CChessPlayer(config,
                          search_tree=defaultdict(VisitState),
                          pipes=pipe,
                          enable_resign=enable_resign,
                          debugging=False)

    state = senv.INIT_STATE
    history = [state]
    policys = []
    value = 0
    turns = 0
    game_over = False
    final_move = None

    while not game_over:
        no_act = None
        if state in history[:-1]:
            no_act = []
            for i in range(len(history) - 1):
                if history[i] == state:
                    no_act.append(history[i + 1])
        start_time = time()
        action, policy = player.action(state, turns, no_act)
        end_time = time()
        if action is None:
            logger.debug(f"{turns % 2} (0 = red; 1 = black) has resigned!")
            value = -1
            break
        # logger.debug(f"Playing: {turns % 2}, action: {action}, time: {(end_time - start_time):.1f}s")
        policys.append(policy)
        history.append(action)
        state = senv.step(state, action)
        turns += 1
        history.append(state)

        if turns / 2 >= config.play.max_game_length:
            game_over = True
            value = senv.evaluate(state)
        else:
            game_over, value, final_move = senv.done(state)

    if final_move:
        policy = build_policy(final_move, False)
        history.append(final_move)
        policys.append(policy)
        state = senv.step(state, final_move)
        history.append(state)

    player.close()

    if turns % 2 == 1:  # balck turn
        value = -value

    v = value
    data = []
    for i in range(turns):
        k = i * 2
        data.append([history[k], policys[i], value])
        value = -value

    cur.append(pipe)
    return (turns, v), data
class SelfPlayWorker:
    def __init__(self, config: Config, pipes=None, pid=None):
        self.config = config
        self.red = None
        self.black = None
        self.cur_pipes = pipes
        self.pid = pid
        self.buffer = []

    def start(self):
        logger.debug(
            f"Selfplay#Start Process index = {self.pid}, pid = {os.getpid()}")

        idx = 1
        self.buffer = []
        search_tree = defaultdict(VisitState)

        while True:
            start_time = time()
            env, search_tree = self.start_game(idx, search_tree)
            end_time = time()
            logger.debug(
                f"Process{self.pid} play game {idx} time={end_time - start_time} sec, "
                f"turn={env.num_halfmoves / 2}:{env.winner}")
            if env.num_halfmoves <= 10:
                for i in range(10):
                    logger.debug(f"{env.board.screen[i]}")

            idx += 1

    def start_game(self, idx, search_tree):
        pipes = self.cur_pipes.pop()
        env = CChessEnv(self.config).reset()

        if not self.config.play.share_mtcs_info_in_self_play or \
            idx % self.config.play.reset_mtcs_info_per_game == 0:
            search_tree = defaultdict(VisitState)

        self.red = CChessPlayer(self.config,
                                search_tree=search_tree,
                                pipes=pipes)
        self.black = CChessPlayer(self.config,
                                  search_tree=search_tree,
                                  pipes=pipes)

        history = []
        cc = 0

        while not env.done:
            start_time = time()
            if env.red_to_move:
                action = self.red.action(env)
            else:
                action = self.black.action(env)
            end_time = time()
            logger.debug(
                f"Process{self.pid} Playing: {env.red_to_move}, action: {action}, time: {end_time - start_time}s"
            )
            env.step(action)
            history.append(action)
            if len(history) > 6 and history[-1] == history[-5]:
                cc = cc + 1
            else:
                cc = 0
            if env.num_halfmoves / 2 >= self.config.play.max_game_length:
                env.winner = Winner.draw

        if env.winner == Winner.red:
            red_win = 1
        elif env.winner == Winner.black:
            red_win = -1
        else:
            red_win = 0

        if env.num_halfmoves <= 10:
            logger.debug(f"History moves: {history}")

        self.red.finish_game(red_win)
        self.black.finish_game(-red_win)

        self.cur_pipes.append(pipes)
        self.save_record_data(env,
                              write=idx %
                              self.config.play_data.nb_game_save_record == 0)
        self.save_play_data(idx)
        self.remove_play_data()
        return env, search_tree

    def save_play_data(self, idx):
        data = []
        for i in range(len(self.red.moves)):
            data.append(self.red.moves[i])
            if i < len(self.black.moves):
                data.append(self.black.moves[i])

        self.buffer += data

        if not idx % self.config.play_data.nb_game_in_file == 0:
            return

        rc = self.config.resource
        game_id = datetime.now().strftime("%Y%m%d-%H%M%S.%f")
        path = os.path.join(rc.play_data_dir,
                            rc.play_data_filename_tmpl % game_id)
        logger.info(f"Process {self.pid} save play data to {path}")
        write_game_data_to_file(path, self.buffer)
        self.buffer = []

    def save_record_data(self, env, write=False):
        if not write:
            return
        rc = self.config.resource
        game_id = datetime.now().strftime("%Y%m%d-%H%M%S.%f")
        path = os.path.join(rc.play_record_dir,
                            rc.play_record_filename_tmpl % game_id)
        env.save_records(path)

    def remove_play_data(self):
        files = get_game_data_filenames(self.config.resource)
        if len(files) < self.config.play_data.max_file_num:
            return
        try:
            for i in range(len(files) - self.config.play_data.max_file_num):
                os.remove(files[i])
        except:
            pass
예제 #15
0
class SelfPlayWorker:
    def __init__(self,
                 config: Config,
                 pipes=None,
                 pid=None,
                 use_history=False):
        self.config = config
        self.player = None
        self.cur_pipes = pipes
        self.id = pid
        self.buffer = []
        self.pid = os.getpid()
        self.use_history = use_history

    def start(self):
        # logger.info("pengge pengge pengge222")
        # logger.error("pengge pengge pengge111")
        # logger.debug("pengge pengge pengge")
        self.pid = os.getpid()
        ran = self.config.play.max_processes if self.config.play.max_processes > 5 else self.config.play.max_processes * 2
        sleep((self.pid % ran) * 10)
        logger.error(
            f"Selfplay#Start Process index = {self.id}, pid = {self.pid}")

        idx = 1
        self.buffer = []
        search_tree = defaultdict(VisitState)

        while True:
            start_time = time()
            search_tree = defaultdict(VisitState)
            value, turns, state, store = self.start_game(idx, search_tree)
            end_time = time()
            logger.error(
                f"Process {self.pid}-{self.id} play game {idx} time={(end_time - start_time):.1f} sec, "
                f"turn={turns / 2}, winner = {value:.2f} (1 = red, -1 = black, 0 draw)"
            )
            if turns <= 10:
                senv.render(state)
            if store:
                idx += 1
            sleep(random())

    def start_game(self, idx, search_tree):
        pipes = self.cur_pipes.pop()

        if not self.config.play.share_mtcs_info_in_self_play or \
            idx % self.config.play.reset_mtcs_info_per_game == 0:
            search_tree = defaultdict(VisitState)

        if random() > self.config.play.enable_resign_rate:
            enable_resign = True
        else:
            enable_resign = False

        self.player = CChessPlayer(self.config,
                                   search_tree=search_tree,
                                   pipes=pipes,
                                   enable_resign=enable_resign,
                                   debugging=False,
                                   use_history=self.use_history)

        state = senv.INIT_STATE
        history = [state]
        # policys = []
        value = 0
        turns = 0  # even == red; odd == black
        game_over = False
        final_move = None
        no_eat_count = 0
        check = False
        no_act = []
        increase_temp = False

        while not game_over:
            start_time = time()
            action, policy = self.player.action(state,
                                                turns,
                                                no_act,
                                                increase_temp=increase_temp)
            end_time = time()
            if action is None:
                logger.error(f"{turns % 2} (0 = red; 1 = black) has resigned!")
                value = -1
                break
            # if self.config.opts.log_move:
            #     logger.info(f"Process{self.pid} Playing: {turns % 2}, action: {action}, time: {(end_time - start_time):.1f}s")
            # logger.info(f"Process{self.pid} Playing: {turns % 2}, action: {action}, time: {(end_time - start_time):.1f}s")
            history.append(action)
            # policys.append(policy)
            try:
                state, no_eat = senv.new_step(state, action)
            except Exception as e:
                logger.error(f"{e}, no_act = {no_act}, policy = {policy}")
                game_over = True
                value = 0
                break
            turns += 1
            if no_eat:
                no_eat_count += 1
            else:
                no_eat_count = 0
            history.append(state)

            if no_eat_count >= 120 or turns / 2 >= self.config.play.max_game_length:
                game_over = True
                value = 0
            else:
                game_over, value, final_move, check = senv.done(
                    state, need_check=True)
                if not game_over:
                    if not senv.has_attack_chessman(state):
                        logger.error(f"双方无进攻子力,作和。state = {state}")
                        game_over = True
                        value = 0
                increase_temp = False
                no_act = []
                if not game_over and not check and state in history[:-1]:
                    free_move = defaultdict(int)
                    for i in range(len(history) - 1):
                        if history[i] == state:
                            if senv.will_check_or_catch(state, history[i + 1]):
                                no_act.append(history[i + 1])
                            elif not senv.be_catched(state, history[i + 1]):
                                increase_temp = True
                                free_move[state] += 1
                                if free_move[state] >= 3:
                                    # 作和棋处理
                                    game_over = True
                                    value = 0
                                    logger.error("闲着循环三次,作和棋处理")
                                    break

        if final_move:
            # policy = self.build_policy(final_move, False)
            history.append(final_move)
            # policys.append(policy)
            state = senv.step(state, final_move)
            turns += 1
            value = -value
            history.append(state)

        self.player.close()
        del search_tree
        del self.player
        gc.collect()
        if turns % 2 == 1:  # balck turn
            value = -value

        v = value
        if turns < 10:
            if random() > 0.9:
                store = True
            else:
                store = False
        else:
            store = True

        if store:
            data = [history[0]]
            for i in range(turns):
                k = i * 2
                data.append([history[k + 1], value])
                value = -value
            self.save_play_data(idx, data)

        self.cur_pipes.append(pipes)
        self.remove_play_data()
        return v, turns, state, store

    def save_play_data(self, idx, data):
        self.buffer += data

        if not idx % self.config.play_data.nb_game_in_file == 0:
            return

        rc = self.config.resource
        utc_dt = datetime.utcnow().replace(tzinfo=timezone.utc)
        bj_dt = utc_dt.astimezone(timezone(timedelta(hours=8)))
        game_id = bj_dt.strftime("%Y%m%d-%H%M%S.%f")
        filename = rc.play_data_filename_tmpl % game_id
        path = os.path.join(rc.play_data_dir, filename)
        logger.error(f"Process {self.pid} save play data to {path}")
        write_game_data_to_file(path, self.buffer)
        if self.config.internet.distributed:
            upload_worker = Thread(target=self.upload_play_data,
                                   args=(path, filename),
                                   name="upload_worker")
            upload_worker.daemon = True
            upload_worker.start()
        self.buffer = []

    def upload_play_data(self, path, filename):
        digest = CChessModel.fetch_digest(
            self.config.resource.model_best_weight_path)
        data = {
            'digest': digest,
            'username': self.config.internet.username,
            'version': '2.4'
        }
        response = upload_file(self.config.internet.upload_url,
                               path,
                               filename,
                               data,
                               rm=False)
        if response is not None and response['status'] == 0:
            logger.error(f"Upload play data {filename} finished.")
        else:
            logger.error(
                f'Upload play data {filename} failed. {response.msg if response is not None else None}'
            )

    def remove_play_data(self):
        files = get_game_data_filenames(self.config.resource)
        if len(files) < self.config.play_data.max_file_num:
            return
        try:
            for i in range(len(files) - self.config.play_data.max_file_num):
                os.remove(files[i])
        except:
            pass

    def build_policy(self, action, flip):
        labels_n = len(ActionLabelsRed)
        move_lookup = {
            move: i
            for move, i in zip(ActionLabelsRed, range(labels_n))
        }
        policy = np.zeros(labels_n)

        policy[move_lookup[action]] = 1

        if flip:
            policy = flip_policy(policy)
        return list(policy)
예제 #16
0
class SelfPlayWorker:
    def __init__(self, config: Config, pipes=None, pid=None):
        self.config = config
        self.player = None
        self.cur_pipes = pipes
        self.id = pid
        self.buffer = []
        self.pid = os.getpid()

    def start(self):
        logger.debug(
            f"Selfplay#Start Process index = {self.id}, pid = {self.pid}")

        idx = 1
        self.buffer = []
        search_tree = defaultdict(VisitState)

        while True:
            start_time = time()
            value, turns, state, search_tree, store = self.start_game(
                idx, search_tree)
            end_time = time()
            logger.debug(
                f"Process {self.pid}-{self.id} play game {idx} time={(end_time - start_time):.1f} sec, "
                f"turn={turns / 2}, winner = {value:.2f} (1 = red, -1 = black, 0 draw)"
            )
            if turns <= 10:
                senv.render(state)
            if store:
                idx += 1

    def start_game(self, idx, search_tree):
        pipes = self.cur_pipes.pop()

        if not self.config.play.share_mtcs_info_in_self_play or \
            idx % self.config.play.reset_mtcs_info_per_game == 0:
            search_tree = defaultdict(VisitState)

        if random() > self.config.play.enable_resign_rate:
            enable_resign = True
        else:
            enable_resign = False

        self.player = CChessPlayer(self.config,
                                   search_tree=search_tree,
                                   pipes=pipes,
                                   enable_resign=enable_resign,
                                   debugging=False)

        state = senv.INIT_STATE
        history = [state]
        policys = []
        value = 0
        turns = 0  # even == red; odd == black
        game_over = False
        final_move = None

        while not game_over:
            no_act = None
            if state in history[:-1]:
                no_act = []
                for i in range(len(history) - 1):
                    if history[i] == state:
                        no_act.append(history[i + 1])
            start_time = time()
            action, policy = self.player.action(state, turns, no_act)
            end_time = time()
            if action is None:
                logger.debug(f"{turns % 2} (0 = red; 1 = black) has resigned!")
                value = -1
                break
            # logger.debug(f"Process{self.pid} Playing: {turns % 2}, action: {action}, time: {(end_time - start_time):.1f}s")
            # for move, action_state in self.player.search_results.items():
            #     if action_state[0] >= 20:
            #         logger.info(f"move: {move}, prob: {action_state[0]}, Q_value: {action_state[1]:.2f}, Prior: {action_state[2]:.3f}")
            # self.player.search_results = {}
            history.append(action)
            policys.append(policy)
            state = senv.step(state, action)
            turns += 1
            history.append(state)

            if turns / 2 >= self.config.play.max_game_length:
                game_over = True
                value = senv.evaluate(state)
            else:
                game_over, value, final_move = senv.done(state)

        if final_move:
            policy = self.build_policy(final_move, False)
            history.append(final_move)
            policys.append(policy)
            state = senv.step(state, final_move)
            history.append(state)

        self.player.close()
        if turns % 2 == 1:  # balck turn
            value = -value

        v = value
        if v == 0:
            if random() > 0.5:
                store = True
            else:
                store = False
        else:
            store = True

        if store:
            data = []
            for i in range(turns):
                k = i * 2
                data.append([history[k], policys[i], value])
                value = -value
            self.save_play_data(idx, data)

        self.cur_pipes.append(pipes)
        self.remove_play_data()
        return v, turns, state, search_tree, store

    def save_play_data(self, idx, data):
        self.buffer += data

        if not idx % self.config.play_data.nb_game_in_file == 0:
            return

        rc = self.config.resource
        game_id = datetime.now().strftime("%Y%m%d-%H%M%S.%f")
        filename = rc.play_data_filename_tmpl % game_id
        path = os.path.join(rc.play_data_dir, filename)
        logger.info(f"Process {self.pid} save play data to {path}")
        write_game_data_to_file(path, self.buffer)
        if self.config.internet.distributed:
            upload_worker = Thread(target=self.upload_play_data,
                                   args=(path, filename),
                                   name="upload_worker")
            upload_worker.daemon = True
            upload_worker.start()
        self.buffer = []

    def upload_play_data(self, path, filename):
        digest = CChessModel.fetch_digest(
            self.config.resource.model_best_weight_path)
        data = {'digest': digest, 'username': self.config.internet.username}
        response = upload_file(self.config.internet.upload_url,
                               path,
                               filename,
                               data,
                               rm=False)
        if response is not None and response['status'] == 0:
            logger.info(f"Upload play data {filename} finished.")
        else:
            logger.error(
                f'Upload play data {filename} failed. {response.msg if response is not None else None}'
            )

    def remove_play_data(self):
        files = get_game_data_filenames(self.config.resource)
        if len(files) < self.config.play_data.max_file_num:
            return
        try:
            for i in range(len(files) - self.config.play_data.max_file_num):
                os.remove(files[i])
        except:
            pass

    def build_policy(self, action, flip):
        labels_n = len(ActionLabelsRed)
        move_lookup = {
            move: i
            for move, i in zip(ActionLabelsRed, range(labels_n))
        }
        policy = np.zeros(labels_n)

        policy[move_lookup[action]] = 1

        if flip:
            policy = flip_policy(policy)
        return list(policy)
예제 #17
0
class PlayWithHuman:
    def __init__(self, config: Config):
        self.config = config
        self.env = CChessEnv()
        self.model = None
        self.pipe = None
        self.ai = None
        self.winstyle = 0
        self.chessmans = None
        self.human_move_first = True
        self.screen_width = 720
        self.height = 577
        self.width = 521
        self.chessman_w = 57
        self.chessman_h = 57
        self.disp_record_num = 15
        self.rec_labels = [None] * self.disp_record_num
        self.nn_value = 0
        self.mcts_moves = {}
        self.history = []
        if self.config.opts.bg_style == 'WOOD':
            self.chessman_w += 1
            self.chessman_h += 1

    def load_model(self):
        self.model = CChessModel(self.config)
        if self.config.opts.new or not load_best_model_weight(self.model):
            self.model.build()

    def init_screen(self):
        bestdepth = pygame.display.mode_ok([self.screen_width, self.height],
                                           self.winstyle, 32)
        screen = pygame.display.set_mode([self.screen_width, self.height],
                                         self.winstyle, bestdepth)
        pygame.display.set_caption("中国象棋Zero")
        # create the background, tile the bgd image
        bgdtile = load_image(f'{self.config.opts.bg_style}.GIF')
        bgdtile = pygame.transform.scale(bgdtile, (self.width, self.height))
        board_background = pygame.Surface([self.width, self.height])
        board_background.blit(bgdtile, (0, 0))
        widget_background = pygame.Surface(
            [self.screen_width - self.width, self.height])
        white_rect = Rect(0, 0, self.screen_width - self.width, self.height)
        widget_background.fill((255, 255, 255), white_rect)

        #create text label
        font_file = self.config.resource.font_path
        font = pygame.font.Font(font_file, 16)
        font_color = (0, 0, 0)
        font_background = (255, 255, 255)
        t = font.render("着法记录", True, font_color, font_background)
        t_rect = t.get_rect()
        t_rect.x = 10
        t_rect.y = 10
        widget_background.blit(t, t_rect)

        screen.blit(board_background, (0, 0))
        screen.blit(widget_background, (self.width, 0))
        pygame.display.flip()
        self.chessmans = pygame.sprite.Group()
        creat_sprite_group(self.chessmans, self.env.board.chessmans_hash,
                           self.chessman_w, self.chessman_h)
        return screen, board_background, widget_background

    def start(self, human_first=True):
        self.env.reset()
        self.load_model()
        self.pipe = self.model.get_pipes()
        self.ai = CChessPlayer(self.config,
                               search_tree=defaultdict(VisitState),
                               pipes=self.pipe,
                               enable_resign=True,
                               debugging=True)
        self.human_move_first = human_first

        pygame.init()
        screen, board_background, widget_background = self.init_screen()
        framerate = pygame.time.Clock()

        labels = ActionLabelsRed
        labels_n = len(ActionLabelsRed)

        current_chessman = None
        if human_first:
            self.env.board.calc_chessmans_moving_list()

        ai_worker = Thread(target=self.ai_move, name="ai_worker")
        ai_worker.daemon = True
        ai_worker.start()

        while not self.env.board.is_end():
            for event in pygame.event.get():
                if event.type == pygame.QUIT:
                    self.env.board.print_record()
                    self.ai.close(wait=False)
                    game_id = datetime.now().strftime("%Y%m%d-%H%M%S")
                    path = os.path.join(
                        self.config.resource.play_record_dir,
                        self.config.resource.play_record_filename_tmpl %
                        game_id)
                    self.env.board.save_record(path)
                    sys.exit()
                elif event.type == VIDEORESIZE:
                    pass
                elif event.type == MOUSEBUTTONDOWN:
                    if human_first == self.env.red_to_move:
                        pressed_array = pygame.mouse.get_pressed()
                        for index in range(len(pressed_array)):
                            if index == 0 and pressed_array[index]:
                                mouse_x, mouse_y = pygame.mouse.get_pos()
                                col_num, row_num = translate_hit_area(
                                    mouse_x, mouse_y, self.chessman_w,
                                    self.chessman_h)
                                chessman_sprite = select_sprite_from_group(
                                    self.chessmans, col_num, row_num)
                                if current_chessman is None and chessman_sprite != None:
                                    if chessman_sprite.chessman.is_red == self.env.red_to_move:
                                        current_chessman = chessman_sprite
                                        chessman_sprite.is_selected = True
                                elif current_chessman != None and chessman_sprite != None:
                                    if chessman_sprite.chessman.is_red == self.env.red_to_move:
                                        current_chessman.is_selected = False
                                        current_chessman = chessman_sprite
                                        chessman_sprite.is_selected = True
                                    else:
                                        move = str(current_chessman.chessman.col_num) + str(current_chessman.chessman.row_num) +\
                                               str(col_num) + str(row_num)
                                        success = current_chessman.move(
                                            col_num, row_num, self.chessman_w,
                                            self.chessman_h)
                                        self.history.append(move)
                                        if success:
                                            self.chessmans.remove(
                                                chessman_sprite)
                                            chessman_sprite.kill()
                                            current_chessman.is_selected = False
                                            current_chessman = None
                                            self.history.append(
                                                self.env.get_state())
                                elif current_chessman != None and chessman_sprite is None:
                                    move = str(current_chessman.chessman.col_num) + str(current_chessman.chessman.row_num) +\
                                           str(col_num) + str(row_num)
                                    success = current_chessman.move(
                                        col_num, row_num, self.chessman_w,
                                        self.chessman_h)
                                    self.history.append(move)
                                    if success:
                                        current_chessman.is_selected = False
                                        current_chessman = None
                                        self.history.append(
                                            self.env.get_state())

            self.draw_widget(screen, widget_background)
            framerate.tick(20)
            # clear/erase the last drawn sprites
            self.chessmans.clear(screen, board_background)

            # update all the sprites
            self.chessmans.update()
            self.chessmans.draw(screen)
            pygame.display.update()

        self.ai.close(wait=False)
        logger.info(f"Winner is {self.env.board.winner} !!!")
        self.env.board.print_record()
        game_id = datetime.now().strftime("%Y%m%d-%H%M%S")
        path = os.path.join(
            self.config.resource.play_record_dir,
            self.config.resource.play_record_filename_tmpl % game_id)
        self.env.board.save_record(path)
        sleep(3)

    def ai_move(self):
        ai_move_first = not self.human_move_first
        self.history = [self.env.get_state()]
        no_act = None
        while not self.env.done:
            if ai_move_first == self.env.red_to_move:
                labels = ActionLabelsRed
                labels_n = len(ActionLabelsRed)
                self.ai.search_results = {}
                state = self.env.get_state()
                logger.info(f"state = {state}")
                _, _, _, check = senv.done(state, need_check=True)
                if not check and state in self.history[:-1]:
                    no_act = []
                    free_move = defaultdict(int)
                    for i in range(len(self.history) - 1):
                        if self.history[i] == state:
                            # 如果走了下一步是将军或捉:禁止走那步
                            if senv.will_check_or_catch(
                                    state, self.history[i + 1]):
                                no_act.append(self.history[i + 1])
                            # 否则当作闲着处理
                            else:
                                free_move[state] += 1
                                if free_move[state] >= 2:
                                    # 作和棋处理
                                    self.env.winner = Winner.draw
                                    self.env.board.winner = Winner.draw
                                    break
                    if no_act:
                        logger.debug(f"no_act = {no_act}")
                action, policy = self.ai.action(state, self.env.num_halfmoves,
                                                no_act)
                if action is None:
                    logger.info("AI has resigned!")
                    return
                self.history.append(action)
                if not self.env.red_to_move:
                    action = flip_move(action)
                key = self.env.get_state()
                p, v = self.ai.debug[key]
                logger.info(f"check = {check}, NN value = {v:.3f}")
                self.nn_value = v
                logger.info("MCTS results:")
                self.mcts_moves = {}
                for move, action_state in self.ai.search_results.items():
                    move_cn = self.env.board.make_single_record(
                        int(move[0]), int(move[1]), int(move[2]), int(move[3]))
                    logger.info(
                        f"move: {move_cn}-{move}, visit count: {action_state[0]}, Q_value: {action_state[1]:.3f}, Prior: {action_state[2]:.3f}"
                    )
                    self.mcts_moves[move_cn] = action_state
                x0, y0, x1, y1 = int(action[0]), int(action[1]), int(
                    action[2]), int(action[3])
                chessman_sprite = select_sprite_from_group(
                    self.chessmans, x0, y0)
                sprite_dest = select_sprite_from_group(self.chessmans, x1, y1)
                if sprite_dest:
                    self.chessmans.remove(sprite_dest)
                    sprite_dest.kill()
                chessman_sprite.move(x1, y1, self.chessman_w, self.chessman_h)
                self.history.append(self.env.get_state())

    def draw_widget(self, screen, widget_background):
        white_rect = Rect(0, 0, self.screen_width - self.width, self.height)
        widget_background.fill((255, 255, 255), white_rect)
        pygame.draw.line(widget_background, (255, 0, 0), (10, 285),
                         (self.screen_width - self.width - 10, 285))
        screen.blit(widget_background, (self.width, 0))
        self.draw_records(screen, widget_background)
        self.draw_evaluation(screen, widget_background)

    def draw_records(self, screen, widget_background):
        text = '着法记录'
        self.draw_label(screen, widget_background, text, 10, 16, 10)
        records = self.env.board.record.split('\n')
        font_file = self.config.resource.font_path
        font = pygame.font.Font(font_file, 12)
        i = 0
        for record in records[-self.disp_record_num:]:
            self.rec_labels[i] = font.render(record, True, (0, 0, 0),
                                             (255, 255, 255))
            t_rect = self.rec_labels[i].get_rect()
            # t_rect.centerx = (self.screen_width - self.width) / 2
            t_rect.y = 35 + i * 15
            t_rect.x = 10
            t_rect.width = self.screen_width - self.width
            widget_background.blit(self.rec_labels[i], t_rect)
            i += 1
        screen.blit(widget_background, (self.width, 0))

    def draw_evaluation(self, screen, widget_background):
        title_label = 'CC-Zero信息'
        self.draw_label(screen, widget_background, title_label, 300, 16, 10)
        info_label = f'MCTS搜索次数:{self.config.play.simulation_num_per_move}'
        self.draw_label(screen, widget_background, info_label, 335, 14, 10)
        eval_label = f"当前局势评估: {self.nn_value:.3f}"
        self.draw_label(screen, widget_background, eval_label, 360, 14, 10)
        label = f"MCTS搜索结果:"
        self.draw_label(screen, widget_background, label, 395, 14, 10)
        label = f"着法 访问计数 动作价值 先验概率"
        self.draw_label(screen, widget_background, label, 415, 12, 10)
        i = 0
        tmp = copy.deepcopy(self.mcts_moves)
        for mov, action_state in tmp.items():
            label = f"{mov}"
            self.draw_label(screen, widget_background, label, 435 + i * 20, 12,
                            10)
            label = f"{action_state[0]}"
            self.draw_label(screen, widget_background, label, 435 + i * 20, 12,
                            70)
            label = f"{action_state[1]:.2f}"
            self.draw_label(screen, widget_background, label, 435 + i * 20, 12,
                            100)
            label = f"{action_state[2]:.3f}"
            self.draw_label(screen, widget_background, label, 435 + i * 20, 12,
                            150)
            i += 1

    def draw_label(self,
                   screen,
                   widget_background,
                   text,
                   y,
                   font_size,
                   x=None):
        font_file = self.config.resource.font_path
        font = pygame.font.Font(font_file, font_size)
        label = font.render(text, True, (0, 0, 0), (255, 255, 255))
        t_rect = label.get_rect()
        t_rect.y = y
        if x != None:
            t_rect.x = x
        else:
            t_rect.centerx = (self.screen_width - self.width) / 2
        widget_background.blit(label, t_rect)
        screen.blit(widget_background, (self.width, 0))
class SelfPlayWorker:
    def __init__(self, config: Config, pipes=None, pid=None):
        self.config = config
        self.player = None
        self.cur_pipes = pipes
        self.id = pid
        self.buffer = []
        self.pid = os.getpid()

    def start(self):
        self.pid = os.getpid()
        logger.debug(f"Selfplay#Start Process index = {self.id}, pid = {self.pid}")

        idx = 1
        self.buffer = []
        search_tree = defaultdict(VisitState)

        while True:
            start_time = time()
            value, turns, state, search_tree, store = self.start_game(idx, search_tree)
            end_time = time()
            if value != 1 and value != -1:
                winner = 'Draw'
            elif idx % 2 == 0 and value == 1 or idx % 2 == 1 and value == -1:
                winner = 'AlphaHe'
            else:
                winner = 'Eleeye'

            logger.debug(f"Process {self.pid}-{self.id} play game {idx} time={(end_time - start_time):.1f} sec, "
                         f"turn={turns / 2}, value = {value:.2f}, winner is {winner}")
            if turns <= 10 and store:
                senv.render(state)
            if store:
                idx += 1

    def start_game(self, idx, search_tree):
        pipes = self.cur_pipes.pop()

        if not self.config.play.share_mtcs_info_in_self_play or \
            idx % self.config.play.reset_mtcs_info_per_game == 0:
            search_tree = defaultdict(VisitState)

        if random() > self.config.play.enable_resign_rate:
            enable_resign = True
        else:
            enable_resign = False

        self.player = CChessPlayer(self.config, search_tree=search_tree, pipes=pipes, enable_resign=enable_resign, debugging=False)

        state = senv.INIT_STATE
        history = [state]
        policys = [] 
        value = 0
        turns = 0       # even == red; odd == black
        game_over = False
        is_alpha_red = True if idx % 2 == 0 else False
        final_move = None

        while not game_over:
            if (is_alpha_red and turns % 2 == 0) or (not is_alpha_red and turns % 2 == 1):
                no_act = None
                if state in history[:-1]:
                    no_act = []
                    for i in range(len(history) - 1):
                        if history[i] == state:
                            no_act.append(history[i + 1])
                action, policy = self.player.action(state, turns, no_act)
                if action is None:
                    logger.debug(f"{turns % 2} (0 = red; 1 = black) has resigned!")
                    value = -1
                    break
            else:
                fen = senv.state_to_fen(state, turns)
                action = self.get_ucci_move(fen)
                if action is None:
                    logger.debug(f"{turns % 2} (0 = red; 1 = black) has resigned!")
                    value = -1
                    break
                if turns % 2 == 1:
                    action = flip_move(action)
                try:
                    policy = self.build_policy(action, False)
                except Exception as e:
                    logger.error(f"Build policy error {e}, action = {action}, state = {state}, fen = {fen}")
                    value = 0
                    break
            history.append(action)
            policys.append(policy)
            state = senv.step(state, action)
            turns += 1
            history.append(state)

            if turns / 2 >= self.config.play.max_game_length:
                game_over = True
                value = senv.evaluate(state)
            else:
                game_over, value, final_move = senv.done(state)

        if final_move:
            policy = self.build_policy(final_move, False)
            history.append(final_move)
            policys.append(policy)
            state = senv.step(state, final_move)
            history.append(state)

        self.player.close()
        if turns % 2 == 1:  # balck turn
            value = -value

        v = value
        if v == 0 or turns <= 10:
            if random() > 0.7:
                store = True
            else:
                store = False
        else:
            store = True

        if store:
            data = []
            for i in range(turns):
                k = i * 2
                data.append([history[k], policys[i], value])
                value = -value
            self.save_play_data(idx, data)

        self.cur_pipes.append(pipes)
        self.remove_play_data()
        return v, turns, state, search_tree, store

    def get_ucci_move(self, fen, time=3):
        p = subprocess.Popen(self.config.resource.eleeye_path,
                            stdin=subprocess.PIPE,
                            stdout=subprocess.PIPE,
                            stderr=subprocess.PIPE,
                            universal_newlines=True)
        setfen = f'position fen {fen}\n'
        setrandom = f'setoption randomness {self.config.opts.random}\n'
        cmd = 'ucci\n' + setrandom + setfen + f'go time {time * 1000}\n'
        try:
            out, err = p.communicate(cmd, timeout=time+0.5)
        except subprocess.TimeoutExpired:
            p.kill()
            try:
                out, err = p.communicate()
            except Exception as e:
                logger.error(f"{e}, cmd = {cmd}")
                return self.get_ucci_move(fen, time+1)
        lines = out.split('\n')
        if lines[-2] == 'nobestmove':
            return None
        move = lines[-2].split(' ')[1]
        if move == 'depth':
            move = lines[-1].split(' ')[6]
        return senv.parse_ucci_move(move)

    def save_play_data(self, idx, data):
        self.buffer += data

        if not idx % self.config.play_data.nb_game_in_file == 0:
            return

        rc = self.config.resource
        game_id = datetime.now().strftime("%Y%m%d-%H%M%S.%f")
        path = os.path.join(rc.play_data_dir, rc.play_data_filename_tmpl % game_id)
        logger.info(f"Process {self.pid} save play data to {path}")
        write_game_data_to_file(path, self.buffer)
        self.buffer = []

    def remove_play_data(self):
        files = get_game_data_filenames(self.config.resource)
        if len(files) < self.config.play_data.max_file_num:
            return
        try:
            for i in range(len(files) - self.config.play_data.max_file_num):
                os.remove(files[i])
        except:
            pass

    def build_policy(self, action, flip):
        labels_n = len(ActionLabelsRed)
        move_lookup = {move: i for move, i in zip(ActionLabelsRed, range(labels_n))}
        policy = np.zeros(labels_n)

        policy[move_lookup[action]] = 1

        if flip:
            policy = flip_policy(policy)
        return list(policy)