def start(self): """ Do self play and write the data to the appropriate file. """ self.buffer = [] futures = deque() with ProcessPoolExecutor( max_workers=self.config.play.max_processes) as executor: for game_idx in range(self.config.play.max_processes * 2): futures.append( executor.submit(self_play_buffer, self.config, cur=self.cur_pipes)) game_idx = 0 while True: game_idx += 1 start_time = time() env, data = futures.popleft().result() print(f"game {game_idx:3} time={time() - start_time:5.1f}s " f"halfmoves={env.num_halfmoves:3} {env.winner:12} " f"{'by resign ' if env.resigned else ' '}") pretty_print(env, ("current_model", "current_model")) self.buffer += data if (game_idx % self.config.play_data.nb_game_in_file) == 0: self.flush_buffer() reload_best_model_weight_if_changed(self.current_model) futures.append( executor.submit(self_play_buffer, self.config, cur=self.cur_pipes)) # Keep it going if len(data) > 0: self.flush_buffer()
def evaluate_model(self, ng_model): """ Given a model, evaluates it by playing a bunch of games against the current model. :param ChessModel ng_model: model to evaluate :return: true iff this model is better than the current_model """ ng_pipes = self.m.list([ ng_model.get_pipes(self.play_config.search_threads) for _ in range(self.play_config.max_processes) ]) futures = [] with ProcessPoolExecutor( max_workers=self.play_config.max_processes) as executor: for game_idx in range(self.config.eval.game_num): fut = executor.submit(play_game, self.config, cur=self.cur_pipes, ng=ng_pipes, current_white=(game_idx % 2 == 0)) futures.append(fut) results = [] for fut in as_completed(futures): # ng_score := if ng_model win -> 1, lose -> 0, draw -> 0.5 ng_score, env, current_white = fut.result() results.append(ng_score) win_rate = sum(results) / len(results) game_idx = len(results) logger.debug( f"game {game_idx:3}: ng_score={ng_score:.1f} as {'black' if current_white else 'white'} " f"{'by resign ' if env.resigned else ' '}" f"win_rate={win_rate*100:5.1f}% " f"{env.board.fen().split(' ')[0]}") colors = ("current_model", "ng_model") if not current_white: colors = reversed(colors) pretty_print(env, colors) if len(results) - sum(results) >= self.config.eval.game_num * ( 1 - self.config.eval.replace_rate): logger.debug( f"lose count reach {results.count(0)} so give up challenge" ) return False if sum( results ) >= self.config.eval.game_num * self.config.eval.replace_rate: logger.debug( f"win count reach {results.count(1)} so change best model" ) return True win_rate = sum(results) / len(results) logger.debug(f"winning rate {win_rate*100:.1f}%") return win_rate >= self.config.eval.replace_rate
def evaluate_model(self, ng_model): """ Given a model, evaluates it by playing a bunch of games against the current model. :param ChessModel ng_model: model to evaluate :return: true iff this model is better than the current_model """ ng_pipes = self.m.list([ng_model.get_pipes(self.play_config.search_threads) for _ in range(self.play_config.max_processes)]) futures = [] with ProcessPoolExecutor(max_workers=self.play_config.max_processes) as executor: for game_idx in range(self.config.eval.game_num): fut = executor.submit(play_game, self.config, cur=self.cur_pipes, ng=ng_pipes, current_white=(game_idx % 2 == 0)) futures.append(fut) results = [] for fut in as_completed(futures): # ng_score := if ng_model win -> 1, lose -> 0, draw -> 0.5 ng_score, env, current_white = fut.result() results.append(ng_score) win_rate = sum(results) / len(results) game_idx = len(results) logger.debug(f"game {game_idx:3}: ng_score={ng_score:.1f} as {'black' if current_white else 'white'} " f"{'by resign ' if env.resigned else ' '}" f"win_rate={win_rate*100:5.1f}% " f"{env.board.fen().split(' ')[0]}") colors = ("current_model", "ng_model") if not current_white: colors = reversed(colors) pretty_print(env, colors) if len(results)-sum(results) >= self.config.eval.game_num * (1-self.config.eval.replace_rate): logger.debug(f"lose count reach {results.count(0)} so give up challenge") return False if sum(results) >= self.config.eval.game_num * self.config.eval.replace_rate: logger.debug(f"win count reach {results.count(1)} so change best model") return True win_rate = sum(results) / len(results) logger.debug(f"winning rate {win_rate*100:.1f}%") return win_rate >= self.config.eval.replace_rate