def start(self): if self.model is None: self.model = self.load_model() self.buffer = [] idx = 1 mtcs_info = None while True: start_time = time() if mtcs_info is None and self.config.play.share_mtcs_info_in_self_play: mtcs_info = ReversiPlayer.create_mtcs_info() env = self.start_game(idx, mtcs_info) end_time = time() logger.debug( f"play game {idx} time={end_time - start_time} sec, " f"turn={env.turn}:{env.board.number_of_black_and_white}:{env.winner}" ) if self.config.play.use_newest_next_generation_model: model_changed = reload_newest_next_generation_model_if_changed( self.model, clear_session=True) else: model_changed = reload_best_model_weight_if_changed( self.model, clear_session=True) if model_changed: mtcs_info = None idx += 1
def start(self): if self.model is None: self.model = self.load_model() self.buffer = [] idx = self.read_as_int(self.config.resource.self_play_game_idx_file) or 1 mtcs_info = None while True: start_time = time() if mtcs_info is None and self.config.play.share_mtcs_info_in_self_play: mtcs_info = ReversiPlayer.create_mtcs_info() env = self.start_game(idx, mtcs_info) end_time = time() logger.debug(f"play game {idx} time={end_time - start_time} sec, " f"turn={env.turn}:{env.board.number_of_black_and_white}:{env.winner}") while True: try: if self.config.play.use_newest_next_generation_model: reload_newest_next_generation_model_if_changed(self.model, clear_session=True) else: reload_best_model_weight_if_changed(self.model, clear_session=True) break except Exception as e: logger.error(e) if idx % self.config.play.reset_mtcs_info_per_game == 0: logger.debug("reset MTCS info") mtcs_info = None idx += 1 with open(self.config.resource.self_play_game_idx_file, "wt") as f: f.write(str(idx))
def _start(self): logger.debug("SelfPlayWorker#start()") np.random.seed(None) worker_name = f"worker{self.worker_index:03d}" self.tensor_board = TensorBoardLogger( os.path.join(self.config.resource.self_play_log_dir, worker_name)) self.buffer = [] mtcs_info = None local_idx = 0 while True: np.random.seed(None) local_idx += 1 game_idx = self.shared_var.game_idx start_time = time() if mtcs_info is None and self.config.play.share_mtcs_info_in_self_play: mtcs_info = ReversiPlayer.create_mtcs_info() # play game env = self.start_game(local_idx, game_idx, mtcs_info) game_idx = self.shared_var.incr_game_idx() # just log end_time = time() time_spent = end_time - start_time logger.debug( f"play game {game_idx} time={time_spent} sec, " f"turn={env.turn}:{env.board.number_of_black_and_white}:{env.winner}" ) # log play info to tensor board prefix = "self" log_info = { f"{prefix}/time": time_spent, f"{prefix}/turn": env.turn } if mtcs_info: log_info[f"{prefix}/mcts_buffer_size"] = len(mtcs_info.var_p) self.tensor_board.log_scaler(log_info, game_idx) # reset MCTS info per X games if self.config.play.reset_mtcs_info_per_game and local_idx % self.config.play.reset_mtcs_info_per_game == 0: logger.debug("reset MCTS info") mtcs_info = None with open(self.config.resource.self_play_game_idx_file, "wt") as f: f.write(str(game_idx))