Ejemplo n.º 1
0
def self_play_buffer(config, cur) -> (CChessEnv, list):
    pipes = cur.pop()  # borrow
    env = CChessEnv(config).reset()
    search_tree = defaultdict(VisitState)

    red = CChessPlayer(config, search_tree=search_tree, pipes=pipes)
    black = CChessPlayer(config, search_tree=search_tree, pipes=pipes)

    history = []

    cc = 0
    while not env.done:
        start_time = time()
        if env.red_to_move:
            action = red.action(env)
        else:
            action = black.action(env)
        end_time = time()
        logger.debug(
            f"Playing: {env.red_to_move}, action: {action}, time: {end_time - start_time}s"
        )
        env.step(action)
        history.append(action)
        if len(history) > 6 and history[-1] == history[-5]:
            cc = cc + 1
        else:
            cc = 0
        if env.num_halfmoves / 2 >= config.play.max_game_length:
            env.winner = Winner.draw
        if cc >= 4:
            if env.red_to_move:
                env.winner = Winner.black
            else:
                env.winner = Winner.red
    if env.winner == Winner.red:
        black_win = -1
    elif env.winner == Winner.black:
        black_win = 1
    else:
        black_win = 0

    black.finish_game(black_win)
    red.finish_game(-black_win)

    data = []
    for i in range(len(red.moves)):
        data.append(red.moves[i])
        if i < len(black.moves):
            data.append(black.moves[i])

    cur.append(pipes)
    return env, data
    def start_game(self, idx, search_tree):
        pipes = self.cur_pipes.pop()
        env = CChessEnv(self.config).reset()

        if not self.config.play.share_mtcs_info_in_self_play or \
            idx % self.config.play.reset_mtcs_info_per_game == 0:
            search_tree = defaultdict(VisitState)

        self.red = CChessPlayer(self.config,
                                search_tree=search_tree,
                                pipes=pipes)
        self.black = CChessPlayer(self.config,
                                  search_tree=search_tree,
                                  pipes=pipes)

        history = []
        cc = 0

        while not env.done:
            start_time = time()
            if env.red_to_move:
                action = self.red.action(env)
            else:
                action = self.black.action(env)
            end_time = time()
            logger.debug(
                f"Process{self.pid} Playing: {env.red_to_move}, action: {action}, time: {end_time - start_time}s"
            )
            env.step(action)
            history.append(action)
            if len(history) > 6 and history[-1] == history[-5]:
                cc = cc + 1
            else:
                cc = 0
            if env.num_halfmoves / 2 >= self.config.play.max_game_length:
                env.winner = Winner.draw

        if env.winner == Winner.red:
            red_win = 1
        elif env.winner == Winner.black:
            red_win = -1
        else:
            red_win = 0

        if env.num_halfmoves <= 10:
            logger.debug(f"History moves: {history}")

        self.red.finish_game(red_win)
        self.black.finish_game(-red_win)

        self.cur_pipes.append(pipes)
        self.save_record_data(env,
                              write=idx %
                              self.config.play_data.nb_game_save_record == 0)
        self.save_play_data(idx)
        self.remove_play_data()
        return env, search_tree