Python CChessPlayer.close Examples

Programming Language: Python

Namespace/Package Name: src.agent.mcts

Class/Type: CChessPlayer

Method/Function: close

Examples at hotexamples.com: 7

Python CChessPlayer.close - 7 examples found. These are the top rated real world Python examples of src.agent.mcts.CChessPlayer.close extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

CChessPlayer(7)

close(7)

action(4)

Frequently Used Methods

CChessPlayer (7)

close (7)

action (4)

Example #1

Show file

File: compute_elo_windows.py Project: ArronHZG/AlphaZero_ChineseChess

def self_play_buffer(config, pipes_bt, pipes_ng, idx,
                     res_data) -> (tuple, list):
    sleep(random())
    playouts = randint(8, 12) * 100
    config.play.simulation_num_per_move = playouts
    logger.info(f"Set playouts = {config.play.simulation_num_per_move}")

    pipe1 = pipes_bt.pop()  # borrow
    pipe2 = pipes_ng.pop()

    player1 = CChessPlayer(config,
                           search_tree=defaultdict(VisitState),
                           pipes=pipe1,
                           enable_resign=False,
                           debugging=False)
    player2 = CChessPlayer(config,
                           search_tree=defaultdict(VisitState),
                           pipes=pipe2,
                           enable_resign=False,
                           debugging=False)

    # even: bst = red, ng = black; odd: bst = black, ng = red
    if idx % 2 == 0:
        red = player1
        black = player2
        print(f"基准模型执红，待评测模型执黑")
    else:
        red = player2
        black = player1
        print(f"待评测模型执红，基准模型执黑")

    state = senv.INIT_STATE
    history = [state]
    # policys = []
    value = 0
    turns = 0
    game_over = False
    final_move = None
    no_eat_count = 0
    check = False

    while not game_over:
        no_act = None
        if not check and state in history[:-1]:
            no_act = []
            free_move = defaultdict(int)
            for i in range(len(history) - 1):
                if history[i] == state:
                    # 如果走了下一步是将军或捉：禁止走那步
                    if senv.will_check_or_catch(state, history[i + 1]):
                        no_act.append(history[i + 1])
                    # 否则当作闲着处理
                    else:
                        free_move[state] += 1
                        if free_move[state] >= 2:
                            # 作和棋处理
                            game_over = True
                            value = 0
                            logger.info("闲着循环三次，作和棋处理")
                            break
        if game_over:
            break
        start_time = time()
        if turns % 2 == 0:
            action, _ = red.action(state, turns, no_act=no_act)
        else:
            action, _ = black.action(state, turns, no_act=no_act)
        end_time = time()
        if action is None:
            print(f"{turns % 2} (0 = 红; 1 = 黑) 投降了!")
            value = -1
            break
        print(
            f"博弈中: 回合{turns / 2 + 1} {'红方走棋' if turns % 2 == 0 else '黑方走棋'}, 着法: {action}, 用时: {(end_time - start_time):.1f}s"
        )
        # policys.append(policy)
        history.append(action)
        try:
            state, no_eat = senv.new_step(state, action)
        except Exception as e:
            logger.error(f"{e}, no_act = {no_act}, policy = {policy}")
            value = 0
            break
        turns += 1
        if no_eat:
            no_eat_count += 1
        else:
            no_eat_count = 0
        history.append(state)

        if no_eat_count >= 120 or turns / 2 >= config.play.max_game_length:
            game_over = True
            value = 0
        else:
            game_over, value, final_move, check = senv.done(state,
                                                            need_check=True)
            if not game_over:
                if not senv.has_attack_chessman(state):
                    logger.info(f"双方无进攻子力，作和。state = {state}")
                    game_over = True
                    value = 0

    if final_move:
        history.append(final_move)
        state = senv.step(state, final_move)
        turns += 1
        value = -value
        history.append(state)

    data = []
    if idx % 2 == 0:
        data = [res_data['base']['digest'], res_data['unchecked']['digest']]
    else:
        data = [res_data['unchecked']['digest'], res_data['base']['digest']]
    player1.close()
    player2.close()
    del player1, player2
    gc.collect()

    if turns % 2 == 1:  # balck turn
        value = -value

    v = value
    data.append(history[0])
    for i in range(turns):
        k = i * 2
        data.append([history[k + 1], value])
        value = -value

    pipes_bt.append(pipe1)
    pipes_ng.append(pipe2)
    return (turns, v, idx), data

Example #2

Show file

File: play_with_ucci_engine.py Project: ArronHZG/AlphaZero_ChineseChess

class SelfPlayWorker:
    def __init__(self, config: Config, pipes=None, pid=None):
        self.config = config
        self.player = None
        self.cur_pipes = pipes
        self.id = pid
        self.buffer = []
        self.pid = os.getpid()

    def start(self):
        self.pid = os.getpid()
        logger.debug(
            f"Selfplay#Start Process index = {self.id}, pid = {self.pid}")

        idx = 1
        self.buffer = []

        while True:
            search_tree = defaultdict(VisitState)
            start_time = time()
            value, turns, state, store = self.start_game(idx, search_tree)
            end_time = time()
            if value != 1 and value != -1:
                winner = 'Draw'
            elif idx % 2 == 0 and value == 1 or idx % 2 == 1 and value == -1:
                winner = 'AlphaHe'
            else:
                winner = 'Eleeye'

            logger.debug(
                f"Process {self.pid}-{self.id} play game {idx} time={(end_time - start_time):.1f} sec, "
                f"turn={turns / 2}, value = {value:.2f}, winner is {winner}")
            if turns <= 10 and store:
                senv.render(state)
            if store:
                idx += 1

    def start_game(self, idx, search_tree):
        pipes = self.cur_pipes.pop()

        if not self.config.play.share_mtcs_info_in_self_play or \
            idx % self.config.play.reset_mtcs_info_per_game == 0:
            search_tree = defaultdict(VisitState)

        if random() > self.config.play.enable_resign_rate:
            enable_resign = True
        else:
            enable_resign = False

        self.player = CChessPlayer(self.config,
                                   search_tree=search_tree,
                                   pipes=pipes,
                                   enable_resign=enable_resign,
                                   debugging=False)

        state = senv.INIT_STATE
        history = [state]
        value = 0
        turns = 0  # even == red; odd == black
        game_over = False
        is_alpha_red = True if idx % 2 == 0 else False
        final_move = None
        check = False

        while not game_over:
            if (is_alpha_red and turns % 2 == 0) or (not is_alpha_red
                                                     and turns % 2 == 1):
                no_act = None
                if not check and state in history[:-1]:
                    no_act = []
                    for i in range(len(history) - 1):
                        if history[i] == state:
                            no_act.append(history[i + 1])
                action, _ = self.player.action(state, turns, no_act)
                if action is None:
                    logger.debug(
                        f"{turns % 2} (0 = red; 1 = black) has resigned!")
                    value = -1
                    break
            else:
                fen = senv.state_to_fen(state, turns)
                action = self.get_ucci_move(fen)
                if action is None:
                    logger.debug(
                        f"{turns % 2} (0 = red; 1 = black) has resigned!")
                    value = -1
                    break
                if turns % 2 == 1:
                    action = flip_move(action)
            history.append(action)
            state = senv.step(state, action)
            turns += 1
            history.append(state)

            if turns / 2 >= self.config.play.max_game_length:
                game_over = True
                value = 0
            else:
                game_over, value, final_move, check = senv.done(
                    state, need_check=True)

        if final_move:
            history.append(final_move)
            state = senv.step(state, final_move)
            history.append(state)
            turns += 1
            value = -value

        self.player.close()
        del search_tree
        del self.player
        gc.collect()
        if turns % 2 == 1:  # balck turn
            value = -value

        v = value
        if turns <= 10:
            if random() > 0.7:
                store = True
            else:
                store = False
        else:
            store = True

        if store:
            data = [history[0]]
            for i in range(turns):
                k = i * 2
                data.append([history[k + 1], value])
                value = -value
            self.save_play_data(idx, data)

        self.cur_pipes.append(pipes)
        self.remove_play_data()
        return v, turns, state, store

    def get_ucci_move(self, fen, time=3):
        p = subprocess.Popen(self.config.resource.eleeye_path,
                             stdin=subprocess.PIPE,
                             stdout=subprocess.PIPE,
                             stderr=subprocess.PIPE,
                             universal_newlines=True)
        setfen = f'position fen {fen}\n'
        setrandom = f'setoption randomness {self.config.opts.random}\n'
        cmd = 'ucci\n' + setrandom + setfen + f'go time {time * 1000}\n'
        try:
            out, err = p.communicate(cmd, timeout=time + 0.5)
        except subprocess.TimeoutExpired:
            p.kill()
            try:
                out, err = p.communicate()
            except Exception as e:
                logger.error(f"{e}, cmd = {cmd}")
                return self.get_ucci_move(fen, time + 1)
        lines = out.split('\n')
        if lines[-2] == 'nobestmove':
            return None
        move = lines[-2].split(' ')[1]
        if move == 'depth':
            move = lines[-1].split(' ')[6]
        return senv.parse_ucci_move(move)

    def save_play_data(self, idx, data):
        self.buffer += data

        if not idx % self.config.play_data.nb_game_in_file == 0:
            return

        rc = self.config.resource
        game_id = datetime.now().strftime("%Y%m%d-%H%M%S.%f")
        path = os.path.join(rc.play_data_dir,
                            rc.play_data_filename_tmpl % game_id)
        logger.info(f"Process {self.pid} save play data to {path}")
        write_game_data_to_file(path, self.buffer)
        self.buffer = []

    def remove_play_data(self):
        files = get_game_data_filenames(self.config.resource)
        if len(files) < self.config.play_data.max_file_num:
            return
        try:
            for i in range(len(files) - self.config.play_data.max_file_num):
                os.remove(files[i])
        except:
            pass

    def build_policy(self, action, flip):
        labels_n = len(ActionLabelsRed)
        move_lookup = {
            move: i
            for move, i in zip(ActionLabelsRed, range(labels_n))
        }
        policy = np.zeros(labels_n)

        policy[move_lookup[action]] = 1

        if flip:
            policy = flip_policy(policy)
        return list(policy)

Example #3

Show file

class SelfPlayWorker:
    def __init__(self, config: Config, pipes=None, pid=None):
        self.config = config
        self.player = None
        self.cur_pipes = pipes
        self.id = pid
        self.buffer = []
        self.pid = os.getpid()

    def start(self):
        self.pid = os.getpid()
        logger.debug(f"Selfplay#Start Process index = {self.id}, pid = {self.pid}")

        idx = 1
        self.buffer = []
        search_tree = defaultdict(VisitState)

        while True:
            start_time = time()
            search_tree = defaultdict(VisitState)
            value, turns, state, store = self.start_game(idx, search_tree)
            end_time = time()
            logger.debug(f"Process {self.pid}-{self.id} play game {idx} time={(end_time - start_time):.1f} sec, "
                         f"turn={turns / 2}, winner = {value:.2f} (1 = red, -1 = black, 0 draw)")
            if turns <= 10:
                senv.render(state)
            if store:
                idx += 1
            sleep(random())

    def start_game(self, idx, search_tree):
        pipes = self.cur_pipes.pop()

        if not self.config.play.share_mtcs_info_in_self_play or \
            idx % self.config.play.reset_mtcs_info_per_game == 0:
            search_tree = defaultdict(VisitState)

        if random() > self.config.play.enable_resign_rate:
            enable_resign = True
        else:
            enable_resign = False

        self.player = CChessPlayer(self.config, search_tree=search_tree, pipes=pipes, enable_resign=enable_resign, debugging=False)

        state = senv.INIT_STATE
        history = [state]
        # policys = [] 
        value = 0
        turns = 0       # even == red; odd == black
        game_over = False
        final_move = None
        no_eat_count = 0
        check = False

        while not game_over:
            no_act = None
            if not check and state in history[:-1]:
                no_act = []
                free_move = defaultdict(int)
                for i in range(len(history) - 1):
                    if history[i] == state:
                        # 如果走了下一步是将军或捉：禁止走那步
                        if senv.will_check_or_catch(state, history[i+1]):
                            no_act.append(history[i + 1])
                        # 否则当作闲着处理
                        else:
                            free_move[state] += 1
                            if free_move[state] >= 2:
                                # 作和棋处理
                                game_over = True
                                value = 0
                                logger.info("闲着循环三次，作和棋处理")
                                break
            if game_over:
                break
            start_time = time()
            action, policy = self.player.action(state, turns, no_act)
            end_time = time()
            if action is None:
                logger.debug(f"{turns % 2} (0 = red; 1 = black) has resigned!")
                value = -1
                break
            if self.config.opts.log_move:
                logger.info(f"Process{self.pid} Playing: {turns % 2}, action: {action}, time: {(end_time - start_time):.1f}s")
            # logger.info(f"Process{self.pid} Playing: {turns % 2}, action: {action}, time: {(end_time - start_time):.1f}s")
            # for move, action_state in self.player.search_results.items():
            #     if action_state[0] >= 20:
            #         logger.info(f"move: {move}, prob: {action_state[0]}, Q_value: {action_state[1]:.2f}, Prior: {action_state[2]:.3f}")
            # self.player.search_results = {}
            history.append(action)
            # policys.append(policy)
            try:
                state, no_eat = senv.new_step(state, action)
            except Exception as e:
                logger.error(f"{e}, no_act = {no_act}, policy = {policy}")
                game_over = True
                value = 0
                break
            turns += 1
            if no_eat:
                no_eat_count += 1
            else:
                no_eat_count = 0
            history.append(state)

            if no_eat_count >= 120 or turns / 2 >= self.config.play.max_game_length:
                game_over = True
                value = 0
            else:
                game_over, value, final_move, check = senv.done(state, need_check=True)
                if not game_over:
                    if not senv.has_attack_chessman(state):
                        logger.info(f"双方无进攻子力，作和。state = {state}")
                        game_over = True
                        value = 0

        if final_move:
            # policy = self.build_policy(final_move, False)
            history.append(final_move)
            # policys.append(policy)
            state = senv.step(state, final_move)
            turns += 1
            value = -value
            history.append(state)

        self.player.close()
        del search_tree
        del self.player
        gc.collect()
        if turns % 2 == 1:  # balck turn
            value = -value

        v = value
        if turns < 10:
            if random() > 0.9:
                store = True
            else:
                store = False
        else:
            store = True

        if store:
            data = [history[0]]
            for i in range(turns):
                k = i * 2
                data.append([history[k + 1], value])
                value = -value
            self.save_play_data(idx, data)

        self.cur_pipes.append(pipes)
        self.remove_play_data()
        return v, turns, state, store

    def save_play_data(self, idx, data):
        self.buffer += data

        if not idx % self.config.play_data.nb_game_in_file == 0:
            return

        rc = self.config.resource
        utc_dt = datetime.utcnow().replace(tzinfo=timezone.utc)
        bj_dt = utc_dt.astimezone(timezone(timedelta(hours=8)))
        game_id = bj_dt.strftime("%Y%m%d-%H%M%S.%f")
        filename = rc.play_data_filename_tmpl % game_id
        path = os.path.join(rc.play_data_dir, filename)
        logger.info(f"Process {self.pid} save play data to {path}")
        write_game_data_to_file(path, self.buffer)
        self.buffer = []


    def remove_play_data(self):
        files = get_game_data_filenames(self.config.resource)
        if len(files) < self.config.play_data.max_file_num:
            return
        try:
            for i in range(len(files) - self.config.play_data.max_file_num):
                os.remove(files[i])
        except:
            pass

    def build_policy(self, action, flip):
        labels_n = len(ActionLabelsRed)
        move_lookup = {move: i for move, i in zip(ActionLabelsRed, range(labels_n))}
        policy = np.zeros(labels_n)

        policy[move_lookup[action]] = 1

        if flip:
            policy = flip_policy(policy)
        return list(policy)

Example #4

Show file

class EvaluateWorker:
    def __init__(self,
                 config: Config,
                 pipes1=None,
                 pipes2=None,
                 pid=None,
                 data=None,
                 hist_base=True,
                 hist_ng=True):
        self.config = config
        self.player_bt = None
        self.player_ng = None
        self.pid = pid
        self.pipes_bt = pipes1
        self.pipes_ng = pipes2
        self.data = data
        self.hist_base = hist_base
        self.hist_ng = hist_ng

    def start(self):
        logger.debug(
            f"Evaluate#Start Process index = {self.pid}, pid = {os.getpid()}")
        need_evaluate = True
        self.config.opts.evaluate = True

        while need_evaluate:
            idx = 0 if random() > 0.5 else 1
            start_time = time()
            value, turns, data = self.start_game(idx)
            end_time = time()

            if (value == 1 and idx == 0) or (value == -1 and idx == 1):
                result = '基准模型胜'
            elif (value == 1 and idx == 1) or (value == -1 and idx == 0):
                result = '待评测模型胜'
            else:
                result = '双方连续60回合未吃子，和棋'

            if value == -1:  # loss
                score = 0
            elif value == 1:  # win
                score = 1
            else:
                score = 0.5

            if idx == 0:
                score = 1 - score
            else:
                score = score

            logger.info(
                f"进程{self.pid}评测完毕 用时{(end_time - start_time):.1f}秒, "
                f"{turns / 2}回合, {result}, 得分：{score}, value = {value}, idx = {idx}"
            )

            response = self.save_play_data(idx, data, value, score)
            if response and int(response['status']) == 0:
                logger.info('评测结果上传成功！')
            else:
                logger.info(f"评测结果上传失败，服务器返回{response}")

            response = http_request(
                self.config.internet.get_evaluate_model_url)
            if int(response['status']) == 0 and response['data']['base']['digest'] == self.data['base']['digest']\
                and response['data']['unchecked']['digest'] == self.data['unchecked']['digest']:
                need_evaluate = True
                logger.info(f"进程{self.pid}继续评测")
            else:
                need_evaluate = False
                logger.info(f"进程{self.pid}终止评测")

    def start_game(self, idx):
        sleep(random())
        playouts = randint(8, 12) * 100
        self.config.play.simulation_num_per_move = playouts
        logger.info(
            f"Set playouts = {self.config.play.simulation_num_per_move}")

        pipe1 = self.pipes_bt.pop()
        pipe2 = self.pipes_ng.pop()
        search_tree1 = defaultdict(VisitState)
        search_tree2 = defaultdict(VisitState)

        self.player1 = CChessPlayer(self.config,
                                    search_tree=search_tree1,
                                    pipes=pipe1,
                                    debugging=False,
                                    enable_resign=False,
                                    use_history=self.hist_base)
        self.player2 = CChessPlayer(self.config,
                                    search_tree=search_tree2,
                                    pipes=pipe2,
                                    debugging=False,
                                    enable_resign=False,
                                    use_history=self.hist_ng)

        # even: bst = red, ng = black; odd: bst = black, ng = red
        if idx % 2 == 0:
            red = self.player1
            black = self.player2
            logger.info(f"进程id = {self.pid} 基准模型执红，待评测模型执黑")
        else:
            red = self.player2
            black = self.player1
            logger.info(f"进程id = {self.pid} 待评测模型执红，基准模型执黑")

        state = senv.INIT_STATE
        history = [state]
        value = 0  # best model's value
        turns = 0  # even == red; odd == black
        game_over = False
        no_eat_count = 0
        check = False

        while not game_over:
            start_time = time()
            no_act = None
            if not check and state in history[:-1]:
                no_act = []
                free_move = defaultdict(int)
                for i in range(len(history) - 1):
                    if history[i] == state:
                        # 如果走了下一步是将军或捉：禁止走那步
                        if senv.will_check_or_catch(state, history[i + 1]):
                            no_act.append(history[i + 1])
                        # 否则当作闲着处理
                        else:
                            free_move[state] += 1
                            if free_move[state] >= 2:
                                # 作和棋处理
                                game_over = True
                                value = 0
                                logger.info("闲着循环三次，作和棋处理")
                                break
            if game_over:
                break
            if turns % 2 == 0:
                action, _ = red.action(state, turns, no_act=no_act)
            else:
                action, _ = black.action(state, turns, no_act=no_act)
            end_time = time()
            if self.config.opts.log_move:
                logger.debug(
                    f"进程id = {self.pid}, action = {action}, turns = {turns}, time = {(end_time-start_time):.1f}"
                )
            if action is None:
                logger.debug(f"{turns % 2} (0 = red; 1 = black) has resigned!")
                value = -1
                break
            history.append(action)
            state, no_eat = senv.new_step(state, action)
            turns += 1
            if no_eat:
                no_eat_count += 1
            else:
                no_eat_count = 0
            history.append(state)

            if no_eat_count >= 120 or turns / 2 >= self.config.play.max_game_length:
                game_over = True
                value = 0
            else:
                game_over, value, final_move, check = senv.done(
                    state, need_check=True)
                if not game_over:
                    if not senv.has_attack_chessman(state):
                        logger.info(f"双方无进攻子力，作和。state = {state}")
                        game_over = True
                        value = 0

        if final_move:
            history.append(final_move)
            state = senv.step(state, final_move)
            turns += 1
            value = -value
            history.append(state)

        data = []
        if idx % 2 == 0:
            data = [
                self.data['base']['digest'], self.data['unchecked']['digest']
            ]
        else:
            data = [
                self.data['unchecked']['digest'], self.data['base']['digest']
            ]
        self.player1.close()
        self.player2.close()

        if turns % 2 == 1:  # black turn
            value = -value

        v = value
        data.append(history[0])
        for i in range(turns):
            k = i * 2
            data.append([history[k + 1], v])
            v = -v

        self.pipes_bt.append(pipe1)
        self.pipes_ng.append(pipe2)
        return value, turns, data

    def save_play_data(self, idx, data, value, score):
        rc = self.config.resource
        game_id = datetime.now().strftime("%Y%m%d-%H%M%S.%f")
        filename = rc.play_data_filename_tmpl % game_id
        path = os.path.join(rc.play_data_dir, filename)
        logger.info(f"Process {self.pid} save play data to {path}")
        write_game_data_to_file(path, data)
        logger.info(f"Uploading play data {filename} ...")
        red, black = data[0], data[1]
        return self.upload_eval_data(path, filename, red, black, value, score)

    def upload_eval_data(self, path, filename, red, black, result, score):
        hash = self.fetch_digest(path)
        data = {
            'digest': self.data['unchecked']['digest'],
            'red_digest': red,
            'black_digest': black,
            'result': result,
            'score': score,
            'hash': hash
        }
        response = upload_file(self.config.internet.upload_eval_url,
                               path,
                               filename,
                               data,
                               rm=False)
        return response

    def fetch_digest(self, file_path):
        if os.path.exists(file_path):
            m = hashlib.sha256()
            with open(file_path, "rb") as f:
                m.update(f.read())
            return m.hexdigest()
        return None

Example #5

Show file

File: self_play_windows.py Project: ArronHZG/AlphaZero_ChineseChess

def self_play_buffer(config, cur) -> (tuple, list):
    pipe = cur.pop()  # borrow

    if random() > config.play.enable_resign_rate:
        enable_resign = True
    else:
        enable_resign = False

    player = CChessPlayer(config,
                          search_tree=defaultdict(VisitState),
                          pipes=pipe,
                          enable_resign=enable_resign,
                          debugging=False)

    state = senv.INIT_STATE
    history = [state]
    # policys = []
    value = 0
    turns = 0
    game_over = False
    final_move = None
    no_eat_count = 0
    check = False

    while not game_over:
        no_act = None
        if not check and state in history[:-1]:
            no_act = []
            free_move = defaultdict(int)
            for i in range(len(history) - 1):
                if history[i] == state:
                    # 如果走了下一步是将军或捉：禁止走那步
                    if senv.will_check_or_catch(state, history[i + 1]):
                        no_act.append(history[i + 1])
                    # 否则当作闲着处理
                    else:
                        free_move[state] += 1
                        if free_move[state] >= 2:
                            # 作和棋处理
                            game_over = True
                            value = 0
                            logger.info("闲着循环三次，作和棋处理")
                            break
        if game_over:
            break
        start_time = time()
        action, policy = player.action(state, turns, no_act)
        end_time = time()
        if action is None:
            print(f"{turns % 2} (0 = 红; 1 = 黑) 投降了!")
            value = -1
            break
        print(
            f"博弈中: 回合{turns / 2 + 1} {'红方走棋' if turns % 2 == 0 else '黑方走棋'}, 着法: {action}, 用时: {(end_time - start_time):.1f}s"
        )
        # policys.append(policy)
        history.append(action)
        try:
            state, no_eat = senv.new_step(state, action)
        except Exception as e:
            logger.error(f"{e}, no_act = {no_act}, policy = {policy}")
            game_over = True
            value = 0
            break
        turns += 1
        if no_eat:
            no_eat_count += 1
        else:
            no_eat_count = 0
        history.append(state)

        if no_eat_count >= 120 or turns / 2 >= config.play.max_game_length:
            game_over = True
            value = 0
        else:
            game_over, value, final_move, check = senv.done(state,
                                                            need_check=True)
            if not game_over:
                if not senv.has_attack_chessman(state):
                    logger.info(f"双方无进攻子力，作和。state = {state}")
                    game_over = True
                    value = 0

    if final_move:
        # policy = build_policy(final_move, False)
        history.append(final_move)
        # policys.append(policy)
        state = senv.step(state, final_move)
        turns += 1
        value = -value
        history.append(state)

    player.close()
    del player
    gc.collect()

    if turns % 2 == 1:  # balck turn
        value = -value

    v = value
    data = [history[0]]
    for i in range(turns):
        k = i * 2
        data.append([history[k + 1], value])
        value = -value

    cur.append(pipe)
    return (turns, v), data

Example #6

Show file

class PlayWithHuman:
    def __init__(self, config: Config):
        self.config = config
        self.env = CChessEnv()
        self.model = None
        self.pipe = None
        self.ai = None
        self.winstyle = 0
        self.chessmans = None
        self.human_move_first = True
        self.screen_width = 720
        self.height = 577
        self.width = 521
        self.chessman_w = 57
        self.chessman_h = 57
        self.disp_record_num = 15
        self.rec_labels = [None] * self.disp_record_num
        self.nn_value = 0
        self.mcts_moves = {}
        self.history = []
        if self.config.opts.bg_style == 'WOOD':
            self.chessman_w += 1
            self.chessman_h += 1

    def load_model(self):
        self.model = CChessModel(self.config)
        if self.config.opts.new or not load_best_model_weight(self.model):
            self.model.build()

    def init_screen(self):
        bestdepth = pygame.display.mode_ok([self.screen_width, self.height],
                                           self.winstyle, 32)
        screen = pygame.display.set_mode([self.screen_width, self.height],
                                         self.winstyle, bestdepth)
        pygame.display.set_caption("中国象棋-AlphaZero")
        # create the background, tile the bgd image
        bgdtile = self.load_image(f'{self.config.opts.bg_style}.GIF')
        bgdtile = pygame.transform.scale(bgdtile, (self.width, self.height))
        board_background = pygame.Surface([self.width, self.height])
        board_background.blit(bgdtile, (0, 0))
        widget_background = pygame.Surface(
            [self.screen_width - self.width, self.height])
        white_rect = Rect(0, 0, self.screen_width - self.width, self.height)
        widget_background.fill((255, 255, 255), white_rect)

        # create text label
        font_file = self.config.resource.font_path
        font = pygame.font.Font(font_file, 16)
        font_color = (0, 0, 0)
        font_background = (255, 255, 255)
        t = font.render("着法记录", True, font_color, font_background)
        t_rect = t.get_rect()
        t_rect.x = 10
        t_rect.y = 10
        widget_background.blit(t, t_rect)

        screen.blit(board_background, (0, 0))
        screen.blit(widget_background, (self.width, 0))
        pygame.display.flip()
        self.chessmans = pygame.sprite.Group()
        self.creat_sprite_group(self.chessmans, self.env.board.chessmans_hash,
                                self.chessman_w, self.chessman_h)
        return screen, board_background, widget_background

    def start(self, human_first=True):
        self.env.reset()
        self.load_model()
        self.pipe = self.model.get_pipes()
        self.ai = CChessPlayer(self.config,
                               search_tree=defaultdict(VisitState),
                               pipes=self.pipe,
                               enable_resign=True,
                               debugging=True)
        self.human_move_first = human_first

        pygame.init()
        screen, board_background, widget_background = self.init_screen()
        framerate = pygame.time.Clock()

        current_chessman = None
        if human_first:
            self.env.board.calc_chessmans_moving_list()

        ai_worker = Thread(target=self.ai_move, name="ai_worker")
        ai_worker.daemon = True
        ai_worker.start()

        while not self.env.board.is_end():
            for event in pygame.event.get():
                if event.type == pygame.QUIT:
                    self.env.board.print_record()
                    self.ai.close(wait=False)
                    game_id = datetime.now().strftime("%Y%m%d-%H%M%S")
                    path = os.path.join(
                        self.config.resource.play_record_dir,
                        self.config.resource.play_record_filename_tmpl %
                        game_id)
                    self.env.board.save_record(path)
                    sys.exit()
                elif event.type == VIDEORESIZE:
                    pass
                elif event.type == MOUSEBUTTONDOWN:
                    if human_first == self.env.red_to_move:
                        pressed_array = pygame.mouse.get_pressed()
                        for index in range(len(pressed_array)):
                            if index == 0 and pressed_array[index]:
                                mouse_x, mouse_y = pygame.mouse.get_pos()
                                col_num, row_num = self.translate_hit_area(
                                    mouse_x, mouse_y, self.chessman_w,
                                    self.chessman_h)
                                chessman_sprite = self.select_sprite_from_group(
                                    self.chessmans, col_num, row_num)
                                if current_chessman is None and chessman_sprite != None:
                                    if chessman_sprite.chessman.is_red == self.env.red_to_move:
                                        current_chessman = chessman_sprite
                                        chessman_sprite.is_selected = True
                                elif current_chessman != None and chessman_sprite != None:
                                    if chessman_sprite.chessman.is_red == self.env.red_to_move:
                                        current_chessman.is_selected = False
                                        current_chessman = chessman_sprite
                                        chessman_sprite.is_selected = True
                                    else:
                                        move = str(current_chessman.chessman.col_num) + str(
                                            current_chessman.chessman.row_num) + \
                                               str(col_num) + str(row_num)
                                        success = current_chessman.move(
                                            col_num, row_num, self.chessman_w,
                                            self.chessman_h)
                                        self.history.append(move)
                                        if success:
                                            self.chessmans.remove(
                                                chessman_sprite)
                                            chessman_sprite.kill()
                                            current_chessman.is_selected = False
                                            current_chessman = None
                                            self.history.append(
                                                self.env.get_state())
                                elif current_chessman != None and chessman_sprite is None:
                                    move = str(current_chessman.chessman.col_num) + str(
                                        current_chessman.chessman.row_num) + \
                                           str(col_num) + str(row_num)
                                    success = current_chessman.move(
                                        col_num, row_num, self.chessman_w,
                                        self.chessman_h)
                                    self.history.append(move)
                                    if success:
                                        current_chessman.is_selected = False
                                        current_chessman = None
                                        self.history.append(
                                            self.env.get_state())

            self.draw_widget(screen, widget_background)
            framerate.tick(20)
            # clear/erase the last drawn sprites
            self.chessmans.clear(screen, board_background)

            # update all the sprites
            self.chessmans.update()
            self.chessmans.draw(screen)
            pygame.display.update()

        self.ai.close(wait=False)
        logger.info(f"Winner is {self.env.board.winner} !!!")
        self.env.board.print_record()
        game_id = datetime.now().strftime("%Y%m%d-%H%M%S")
        path = os.path.join(
            self.config.resource.play_record_dir,
            self.config.resource.play_record_filename_tmpl % game_id)
        self.env.board.save_record(path)
        sleep(3)

    def ai_move(self):
        ai_move_first = not self.human_move_first
        self.history = [self.env.get_state()]
        no_act = None
        while not self.env.done:
            if ai_move_first == self.env.red_to_move:
                self.ai.search_results = {}
                state = self.env.get_state()
                logger.info(f"state = {state}")
                _, _, _, check = senv.done(state, need_check=True)
                if not check and state in self.history[:-1]:
                    no_act = []
                    free_move = defaultdict(int)
                    for i in range(len(self.history) - 1):
                        if self.history[i] == state:
                            # 如果走了下一步是将军或捉：禁止走那步
                            if senv.will_check_or_catch(
                                    state, self.history[i + 1]):
                                no_act.append(self.history[i + 1])
                            # 否则当作闲着处理
                            else:
                                free_move[state] += 1
                                if free_move[state] >= 2:
                                    # 作和棋处理
                                    self.env.winner = Winner.draw
                                    self.env.board.winner = Winner.draw
                                    break
                    if no_act:
                        logger.debug(f"no_act = {no_act}")
                action, policy = self.ai.action(state, self.env.num_halfmoves,
                                                no_act)
                if action is None:
                    logger.info("AI has resigned!")
                    return
                self.history.append(action)
                if not self.env.red_to_move:
                    action = flip_move(action)
                key = self.env.get_state()
                p, v = self.ai.debug[key]
                logger.info(f"check = {check}, NN value = {v:.3f}")
                self.nn_value = v
                logger.info("MCTS results:")
                self.mcts_moves = {}
                for move, action_state in self.ai.search_results.items():
                    move_cn = self.env.board.make_single_record(
                        int(move[0]), int(move[1]), int(move[2]), int(move[3]))
                    logger.info(
                        f"move: {move_cn}-{move}, visit count: {action_state[0]}, Q_value: {action_state[1]:.3f}, Prior: {action_state[2]:.3f}"
                    )
                    self.mcts_moves[move_cn] = action_state
                x0, y0, x1, y1 = int(action[0]), int(action[1]), int(
                    action[2]), int(action[3])
                chessman_sprite = self.select_sprite_from_group(
                    self.chessmans, x0, y0)
                sprite_dest = self.select_sprite_from_group(
                    self.chessmans, x1, y1)
                if sprite_dest:
                    self.chessmans.remove(sprite_dest)
                    sprite_dest.kill()
                chessman_sprite.move(x1, y1, self.chessman_w, self.chessman_h)
                self.history.append(self.env.get_state())

    def draw_widget(self, screen, widget_background):
        white_rect = Rect(0, 0, self.screen_width - self.width, self.height)
        widget_background.fill((255, 255, 255), white_rect)
        pygame.draw.line(widget_background, (255, 0, 0), (10, 285),
                         (self.screen_width - self.width - 10, 285))
        screen.blit(widget_background, (self.width, 0))
        self.draw_records(screen, widget_background)
        self.draw_evaluation(screen, widget_background)

    def draw_records(self, screen, widget_background):
        text = '着法记录'
        self.draw_label(screen, widget_background, text, 10, 16, 10)
        records = self.env.board.record.split('\n')
        font_file = self.config.resource.font_path
        font = pygame.font.Font(font_file, 12)
        i = 0
        for record in records[-self.disp_record_num:]:
            self.rec_labels[i] = font.render(record, True, (0, 0, 0),
                                             (255, 255, 255))
            t_rect = self.rec_labels[i].get_rect()
            # t_rect.centerx = (self.screen_width - self.width) / 2
            t_rect.y = 35 + i * 15
            t_rect.x = 10
            t_rect.width = self.screen_width - self.width
            widget_background.blit(self.rec_labels[i], t_rect)
            i += 1
        screen.blit(widget_background, (self.width, 0))

    def draw_evaluation(self, screen, widget_background):
        title_label = 'AlphaZero信息'
        self.draw_label(screen, widget_background, title_label, 300, 16, 10)
        info_label = f'MCTS搜索次数：{self.config.play.simulation_num_per_move}'
        self.draw_label(screen, widget_background, info_label, 335, 14, 10)
        eval_label = f"当前局势评估: {self.nn_value:.3f}"
        self.draw_label(screen, widget_background, eval_label, 360, 14, 10)
        label = f"MCTS搜索结果:"
        self.draw_label(screen, widget_background, label, 395, 14, 10)
        label = f"着法 访问计数 动作价值 先验概率"
        self.draw_label(screen, widget_background, label, 415, 12, 10)
        i = 0
        tmp = copy.deepcopy(self.mcts_moves)
        for mov, action_state in tmp.items():
            label = f"{mov}"
            self.draw_label(screen, widget_background, label, 435 + i * 20, 12,
                            10)
            label = f"{action_state[0]}"
            self.draw_label(screen, widget_background, label, 435 + i * 20, 12,
                            70)
            label = f"{action_state[1]:.2f}"
            self.draw_label(screen, widget_background, label, 435 + i * 20, 12,
                            100)
            label = f"{action_state[2]:.3f}"
            self.draw_label(screen, widget_background, label, 435 + i * 20, 12,
                            150)
            i += 1

    def draw_label(self,
                   screen,
                   widget_background,
                   text,
                   y,
                   font_size,
                   x=None):
        font_file = self.config.resource.font_path
        font = pygame.font.Font(font_file, font_size)
        label = font.render(text, True, (0, 0, 0), (255, 255, 255))
        t_rect = label.get_rect()
        t_rect.y = y
        if x != None:
            t_rect.x = x
        else:
            t_rect.centerx = (self.screen_width - self.width) / 2
        widget_background.blit(label, t_rect)
        screen.blit(widget_background, (self.width, 0))

    def load_image(self, file, sub_dir=None):
        '''loads an image, prepares it for play'''
        if sub_dir:
            file = os.path.join(self.config.resource.image_path, sub_dir, file)
        else:
            file = os.path.join(self.config.resource.image_path, file)
        try:
            surface = pygame.image.load(file)
        except pygame.error:
            raise SystemExit('Could not load image "%s" %s' %
                             (file, pygame.get_error()))
        return surface.convert()

    def load_images(self, *files):
        global PIECE_STYLE
        imgs = []
        for file in files:
            imgs.append(self.load_image(file, PIECE_STYLE))
        return imgs

    def creat_sprite_group(self, sprite_group, chessmans_hash, w, h):
        for chess in chessmans_hash.values():
            if chess.is_red:
                if isinstance(chess, Rook):
                    images = self.load_images("RR.GIF", "RRS.GIF")
                elif isinstance(chess, Cannon):
                    images = self.load_images("RC.GIF", "RCS.GIF")
                elif isinstance(chess, Knight):
                    images = self.load_images("RN.GIF", "RNS.GIF")
                elif isinstance(chess, King):
                    images = self.load_images("RK.GIF", "RKS.GIF")
                elif isinstance(chess, Elephant):
                    images = self.load_images("RB.GIF", "RBS.GIF")
                elif isinstance(chess, Mandarin):
                    images = self.load_images("RA.GIF", "RAS.GIF")
                else:
                    images = self.load_images("RP.GIF", "RPS.GIF")
            else:
                if isinstance(chess, Rook):
                    images = self.load_images("BR.GIF", "BRS.GIF")
                elif isinstance(chess, Cannon):
                    images = self.load_images("BC.GIF", "BCS.GIF")
                elif isinstance(chess, Knight):
                    images = self.load_images("BN.GIF", "BNS.GIF")
                elif isinstance(chess, King):
                    images = self.load_images("BK.GIF", "BKS.GIF")
                elif isinstance(chess, Elephant):
                    images = self.load_images("BB.GIF", "BBS.GIF")
                elif isinstance(chess, Mandarin):
                    images = self.load_images("BA.GIF", "BAS.GIF")
                else:
                    images = self.load_images("BP.GIF", "BPS.GIF")
            chessman_sprite = Chessman_Sprite(images, chess, w, h)
            sprite_group.add(chessman_sprite)

    def select_sprite_from_group(self, sprite_group, col_num, row_num):
        for sprite in sprite_group:
            if sprite.chessman.col_num == col_num and sprite.chessman.row_num == row_num:
                return sprite
        return None

    def translate_hit_area(self, screen_x, screen_y, w=80, h=80):
        return screen_x // w, 9 - screen_y // h

Example #7

Show file

File: evaluator.py Project: ArronHZG/AlphaZero_ChineseChess

class EvaluateWorker:
    def __init__(self, config: Config, pipes1=None, pipes2=None, pid=None):
        self.config = config
        self.player_bt = None
        self.player_ng = None
        self.pid = pid
        self.pipes_bt = pipes1
        self.pipes_ng = pipes2

    def start(self):
        logger.debug(
            f"Evaluate#Start Process index = {self.pid}, pid = {os.getpid()}")
        score1 = 0
        score2 = 0

        for idx in range(self.config.eval.game_num):
            start_time = time()
            score, turns = self.start_game(idx)
            end_time = time()

            if score < 0:
                score2 += 1
            elif score > 0:
                score1 += 1
            else:
                score2 += 0.5
                score1 += 0.5

            logger.debug(
                f"Process{self.pid} play game {idx} time={(end_time - start_time):.1f} sec, "
                f"turn={turns / 2}, best model {score1} - {score2} next generation model"
            )
        return score2  # return next generation model's score

    def start_game(self, idx):
        pipe1 = self.pipes_bt.pop()
        pipe2 = self.pipes_ng.pop()
        search_tree1 = defaultdict(VisitState)
        search_tree2 = defaultdict(VisitState)

        self.player1 = CChessPlayer(self.config,
                                    search_tree=search_tree1,
                                    pipes=pipe1,
                                    debugging=False,
                                    enable_resign=True)
        self.player2 = CChessPlayer(self.config,
                                    search_tree=search_tree2,
                                    pipes=pipe2,
                                    debugging=False,
                                    enable_resign=True)

        # even: bst = red, ng = black; odd: bst = black, ng = red
        if idx % 2 == 0:
            red = self.player1
            black = self.player2
            logger.debug(f"best model is red, ng is black")
        else:
            red = self.player2
            black = self.player1
            logger.debug(f"best model is black, ng is red")

        state = senv.INIT_STATE
        value = 0  # best model's value
        turns = 0  # even == red; odd == black
        game_over = False

        while not game_over:
            start_time = time()
            if turns % 2 == 0:
                action, _ = red.action(state, turns)
            else:
                action, _ = black.action(state, turns)
            end_time = time()
            # logger.debug(f"pid = {self.pid}, idx = {idx}, action = {action}, turns = {turns}, time = {(end_time-start_time):.1f}")
            if action is None:
                logger.debug(f"{turn % 2} (0 = red; 1 = black) has resigned!")
                value = -1
                break

            state = senv.step(state, action)
            turns += 1

            if turns / 2 >= self.config.play.max_game_length:
                game_over = True
                value = 0
            else:
                game_over, value, final_move = senv.done(state)

        self.player1.close()
        self.player2.close()

        if turns % 2 == 1:  # black turn
            value = -value

        if idx % 2 == 1:
            value = -value

        self.pipes_bt.append(pipe1)
        self.pipes_ng.append(pipe2)
        return value, turns