예제 #1
0
    def start(self):
        """
        Do self play and write the data to the appropriate file.
        """
        self.buffer = []

        futures = deque()
        with ProcessPoolExecutor(
                max_workers=self.config.play.max_processes) as executor:
            for game_idx in range(self.config.play.max_processes * 2):
                futures.append(
                    executor.submit(self_play_buffer,
                                    self.config,
                                    cur=self.cur_pipes))
            game_idx = 0
            while True:
                game_idx += 1
                start_time = time()
                env, data = futures.popleft().result()
                print(f"game {game_idx:3} time={time() - start_time:5.1f}s "
                      f"halfmoves={env.num_halfmoves:3} {env.winner:12} "
                      f"{'by resign ' if env.resigned else '          '}")

                pretty_print(env, ("current_model", "current_model"))
                self.buffer += data
                if (game_idx % self.config.play_data.nb_game_in_file) == 0:
                    self.flush_buffer()
                    reload_best_model_weight_if_changed(self.current_model)
                futures.append(
                    executor.submit(self_play_buffer,
                                    self.config,
                                    cur=self.cur_pipes))  # Keep it going

        if len(data) > 0:
            self.flush_buffer()
예제 #2
0
    def evaluate_model(self, ng_model):
        """
        Given a model, evaluates it by playing a bunch of games against the current model.

        :param ChessModel ng_model: model to evaluate
        :return: true iff this model is better than the current_model
        """
        ng_pipes = self.m.list([
            ng_model.get_pipes(self.play_config.search_threads)
            for _ in range(self.play_config.max_processes)
        ])

        futures = []
        with ProcessPoolExecutor(
                max_workers=self.play_config.max_processes) as executor:
            for game_idx in range(self.config.eval.game_num):
                fut = executor.submit(play_game,
                                      self.config,
                                      cur=self.cur_pipes,
                                      ng=ng_pipes,
                                      current_white=(game_idx % 2 == 0))
                futures.append(fut)

            results = []
            for fut in as_completed(futures):
                # ng_score := if ng_model win -> 1, lose -> 0, draw -> 0.5
                ng_score, env, current_white = fut.result()
                results.append(ng_score)
                win_rate = sum(results) / len(results)
                game_idx = len(results)
                logger.debug(
                    f"game {game_idx:3}: ng_score={ng_score:.1f} as {'black' if current_white else 'white'} "
                    f"{'by resign ' if env.resigned else '          '}"
                    f"win_rate={win_rate*100:5.1f}% "
                    f"{env.board.fen().split(' ')[0]}")

                colors = ("current_model", "ng_model")
                if not current_white:
                    colors = reversed(colors)
                pretty_print(env, colors)

                if len(results) - sum(results) >= self.config.eval.game_num * (
                        1 - self.config.eval.replace_rate):
                    logger.debug(
                        f"lose count reach {results.count(0)} so give up challenge"
                    )
                    return False
                if sum(
                        results
                ) >= self.config.eval.game_num * self.config.eval.replace_rate:
                    logger.debug(
                        f"win count reach {results.count(1)} so change best model"
                    )
                    return True

        win_rate = sum(results) / len(results)
        logger.debug(f"winning rate {win_rate*100:.1f}%")
        return win_rate >= self.config.eval.replace_rate
예제 #3
0
    def evaluate_model(self, ng_model):
        """
        Given a model, evaluates it by playing a bunch of games against the current model.

        :param ChessModel ng_model: model to evaluate
        :return: true iff this model is better than the current_model
        """
        ng_pipes = self.m.list([ng_model.get_pipes(self.play_config.search_threads) for _ in range(self.play_config.max_processes)])

        futures = []
        with ProcessPoolExecutor(max_workers=self.play_config.max_processes) as executor:
            for game_idx in range(self.config.eval.game_num):
                fut = executor.submit(play_game, self.config, cur=self.cur_pipes, ng=ng_pipes, current_white=(game_idx % 2 == 0))
                futures.append(fut)

            results = []
            for fut in as_completed(futures):
                # ng_score := if ng_model win -> 1, lose -> 0, draw -> 0.5
                ng_score, env, current_white = fut.result()
                results.append(ng_score)
                win_rate = sum(results) / len(results)
                game_idx = len(results)
                logger.debug(f"game {game_idx:3}: ng_score={ng_score:.1f} as {'black' if current_white else 'white'} "
                             f"{'by resign ' if env.resigned else '          '}"
                             f"win_rate={win_rate*100:5.1f}% "
                             f"{env.board.fen().split(' ')[0]}")

                colors = ("current_model", "ng_model")
                if not current_white:
                    colors = reversed(colors)
                pretty_print(env, colors)

                if len(results)-sum(results) >= self.config.eval.game_num * (1-self.config.eval.replace_rate):
                    logger.debug(f"lose count reach {results.count(0)} so give up challenge")
                    return False
                if sum(results) >= self.config.eval.game_num * self.config.eval.replace_rate:
                    logger.debug(f"win count reach {results.count(1)} so change best model")
                    return True

        win_rate = sum(results) / len(results)
        logger.debug(f"winning rate {win_rate*100:.1f}%")
        return win_rate >= self.config.eval.replace_rate