コード例 #1
0
def self_play_buffer(config, cur) -> (CChessEnv, list):
    pipes = cur.pop()  # borrow
    env = CChessEnv(config).reset()
    search_tree = defaultdict(VisitState)

    red = CChessPlayer(config, search_tree=search_tree, pipes=pipes)
    black = CChessPlayer(config, search_tree=search_tree, pipes=pipes)

    history = []

    cc = 0
    while not env.done:
        start_time = time()
        if env.red_to_move:
            action = red.action(env)
        else:
            action = black.action(env)
        end_time = time()
        logger.debug(
            f"Playing: {env.red_to_move}, action: {action}, time: {end_time - start_time}s"
        )
        env.step(action)
        history.append(action)
        if len(history) > 6 and history[-1] == history[-5]:
            cc = cc + 1
        else:
            cc = 0
        if env.num_halfmoves / 2 >= config.play.max_game_length:
            env.winner = Winner.draw
        if cc >= 4:
            if env.red_to_move:
                env.winner = Winner.black
            else:
                env.winner = Winner.red
    if env.winner == Winner.red:
        black_win = -1
    elif env.winner == Winner.black:
        black_win = 1
    else:
        black_win = 0

    black.finish_game(black_win)
    red.finish_game(-black_win)

    data = []
    for i in range(len(red.moves)):
        data.append(red.moves[i])
        if i < len(black.moves):
            data.append(black.moves[i])

    cur.append(pipes)
    return env, data
コード例 #2
0
class SelfPlayWorker:
    def __init__(self, config: Config, pipes=None, pid=None):
        self.config = config
        self.red = None
        self.black = None
        self.cur_pipes = pipes
        self.pid = pid
        self.buffer = []

    def start(self):
        logger.debug(
            f"Selfplay#Start Process index = {self.pid}, pid = {os.getpid()}")

        idx = 1
        self.buffer = []
        search_tree = defaultdict(VisitState)

        while True:
            start_time = time()
            env, search_tree = self.start_game(idx, search_tree)
            end_time = time()
            logger.debug(
                f"Process{self.pid} play game {idx} time={end_time - start_time} sec, "
                f"turn={env.num_halfmoves / 2}:{env.winner}")
            if env.num_halfmoves <= 10:
                for i in range(10):
                    logger.debug(f"{env.board.screen[i]}")

            idx += 1

    def start_game(self, idx, search_tree):
        pipes = self.cur_pipes.pop()
        env = CChessEnv(self.config).reset()

        if not self.config.play.share_mtcs_info_in_self_play or \
            idx % self.config.play.reset_mtcs_info_per_game == 0:
            search_tree = defaultdict(VisitState)

        self.red = CChessPlayer(self.config,
                                search_tree=search_tree,
                                pipes=pipes)
        self.black = CChessPlayer(self.config,
                                  search_tree=search_tree,
                                  pipes=pipes)

        history = []
        cc = 0

        while not env.done:
            start_time = time()
            if env.red_to_move:
                action = self.red.action(env)
            else:
                action = self.black.action(env)
            end_time = time()
            logger.debug(
                f"Process{self.pid} Playing: {env.red_to_move}, action: {action}, time: {end_time - start_time}s"
            )
            env.step(action)
            history.append(action)
            if len(history) > 6 and history[-1] == history[-5]:
                cc = cc + 1
            else:
                cc = 0
            if env.num_halfmoves / 2 >= self.config.play.max_game_length:
                env.winner = Winner.draw

        if env.winner == Winner.red:
            red_win = 1
        elif env.winner == Winner.black:
            red_win = -1
        else:
            red_win = 0

        if env.num_halfmoves <= 10:
            logger.debug(f"History moves: {history}")

        self.red.finish_game(red_win)
        self.black.finish_game(-red_win)

        self.cur_pipes.append(pipes)
        self.save_record_data(env,
                              write=idx %
                              self.config.play_data.nb_game_save_record == 0)
        self.save_play_data(idx)
        self.remove_play_data()
        return env, search_tree

    def save_play_data(self, idx):
        data = []
        for i in range(len(self.red.moves)):
            data.append(self.red.moves[i])
            if i < len(self.black.moves):
                data.append(self.black.moves[i])

        self.buffer += data

        if not idx % self.config.play_data.nb_game_in_file == 0:
            return

        rc = self.config.resource
        game_id = datetime.now().strftime("%Y%m%d-%H%M%S.%f")
        path = os.path.join(rc.play_data_dir,
                            rc.play_data_filename_tmpl % game_id)
        logger.info(f"Process {self.pid} save play data to {path}")
        write_game_data_to_file(path, self.buffer)
        self.buffer = []

    def save_record_data(self, env, write=False):
        if not write:
            return
        rc = self.config.resource
        game_id = datetime.now().strftime("%Y%m%d-%H%M%S.%f")
        path = os.path.join(rc.play_record_dir,
                            rc.play_record_filename_tmpl % game_id)
        env.save_records(path)

    def remove_play_data(self):
        files = get_game_data_filenames(self.config.resource)
        if len(files) < self.config.play_data.max_file_num:
            return
        try:
            for i in range(len(files) - self.config.play_data.max_file_num):
                os.remove(files[i])
        except:
            pass