def start_game(self, idx, mtcs_info): self.env.reset() enable_resign = self.config.play.disable_resignation_rate <= random() self.config.play.simulation_num_per_move = self.decide_simulation_num_per_move( idx) logger.debug( f"simulation_num_per_move = {self.config.play.simulation_num_per_move}" ) self.black = ReversiPlayer(self.config, self.model, enable_resign=enable_resign, mtcs_info=mtcs_info) self.white = ReversiPlayer(self.config, self.model, enable_resign=enable_resign, mtcs_info=mtcs_info) if not enable_resign: logger.debug("Resignation is disabled in the next game.") observation = self.env.observation # type: Board while not self.env.done: # logger.debug(f"turn={self.env.turn}") if self.env.next_player == Player.black: action = self.black.action(observation.black, observation.white) else: action = self.white.action(observation.white, observation.black) observation, info = self.env.step(action) self.finish_game(resign_enabled=enable_resign) self.save_play_data(write=idx % self.config.play_data.nb_game_in_file == 0) self.remove_play_data() return self.env
def play_game(self, best_model, ng_model): env = ReversiEnv().reset() best_player = ReversiPlayer(self.config, best_model, play_config=self.config.eval.play_config) ng_player = ReversiPlayer(self.config, ng_model, play_config=self.config.eval.play_config) best_is_black = random() < 0.5 if best_is_black: black, white = best_player, ng_player else: black, white = ng_player, best_player observation = env.observation while not env.done: if env.next_player == Player.black: action = black.action(observation.black, observation.white) else: action = white.action(observation.white, observation.black) observation, info = env.step(action) ng_win = None if env.winner == Winner.black: if best_is_black: ng_win = 0 else: ng_win = 1 elif env.winner == Winner.white: if best_is_black: ng_win = 1 else: ng_win = 0 return ng_win, best_is_black, observation.number_of_black_and_white
def start_game(self, idx): self.env.reset() self.black = ReversiPlayer(self.config, self.model) self.white = ReversiPlayer(self.config, self.model) observation = self.env.observation # type: Board while not self.env.done: # logger.debug(f"turn={self.env.turn}") if self.env.next_player == Player.black: action = self.black.action(observation.black, observation.white) else: action = self.white.action(observation.white, observation.black) observation, info = self.env.step(action) self.finish_game() self.save_play_data(write=idx % self.config.play_data.nb_game_in_file == 0) self.remove_play_data()
def start(self): if self.model is None: self.model = self.load_model() self.buffer = [] idx = 1 mtcs_info = None while True: start_time = time() if mtcs_info is None and self.config.play.share_mtcs_info_in_self_play: mtcs_info = ReversiPlayer.create_mtcs_info() env = self.start_game(idx, mtcs_info) end_time = time() logger.debug( f"play game {idx} time={end_time - start_time} sec, " f"turn={env.turn}:{env.board.number_of_black_and_white}:{env.winner}" ) if self.config.play.use_newest_next_generation_model: model_changed = reload_newest_next_generation_model_if_changed( self.model, clear_session=True) else: model_changed = reload_best_model_weight_if_changed( self.model, clear_session=True) if model_changed: mtcs_info = None idx += 1
def start(self): if self.model is None: self.model = self.load_model() self.buffer = [] idx = self.read_as_int(self.config.resource.self_play_game_idx_file) or 1 mtcs_info = None while True: start_time = time() if mtcs_info is None and self.config.play.share_mtcs_info_in_self_play: mtcs_info = ReversiPlayer.create_mtcs_info() env = self.start_game(idx, mtcs_info) end_time = time() logger.debug(f"play game {idx} time={end_time - start_time} sec, " f"turn={env.turn}:{env.board.number_of_black_and_white}:{env.winner}") while True: try: if self.config.play.use_newest_next_generation_model: reload_newest_next_generation_model_if_changed(self.model, clear_session=True) else: reload_best_model_weight_if_changed(self.model, clear_session=True) break except Exception as e: logger.error(e) if idx % self.config.play.reset_mtcs_info_per_game == 0: logger.debug("reset MTCS info") mtcs_info = None idx += 1 with open(self.config.resource.self_play_game_idx_file, "wt") as f: f.write(str(idx))
def _start(self): logger.debug("SelfPlayWorker#start()") np.random.seed(None) worker_name = f"worker{self.worker_index:03d}" self.tensor_board = TensorBoardLogger( os.path.join(self.config.resource.self_play_log_dir, worker_name)) self.buffer = [] mtcs_info = None local_idx = 0 while True: np.random.seed(None) local_idx += 1 game_idx = self.shared_var.game_idx start_time = time() if mtcs_info is None and self.config.play.share_mtcs_info_in_self_play: mtcs_info = ReversiPlayer.create_mtcs_info() # play game env = self.start_game(local_idx, game_idx, mtcs_info) game_idx = self.shared_var.incr_game_idx() # just log end_time = time() time_spent = end_time - start_time logger.debug( f"play game {game_idx} time={time_spent} sec, " f"turn={env.turn}:{env.board.number_of_black_and_white}:{env.winner}" ) # log play info to tensor board prefix = "self" log_info = { f"{prefix}/time": time_spent, f"{prefix}/turn": env.turn } if mtcs_info: log_info[f"{prefix}/mcts_buffer_size"] = len(mtcs_info.var_p) self.tensor_board.log_scaler(log_info, game_idx) # reset MCTS info per X games if self.config.play.reset_mtcs_info_per_game and local_idx % self.config.play.reset_mtcs_info_per_game == 0: logger.debug("reset MCTS info") mtcs_info = None with open(self.config.resource.self_play_game_idx_file, "wt") as f: f.write(str(game_idx))
def create_player(self): logger.debug("create new ReversiPlayer()") return ReversiPlayer(self.config, self.model, self.play_config, enable_resign=False)
def create_reversi_player(self, enable_resign=None, mtcs_info=None): return ReversiPlayer(self.config, None, enable_resign=enable_resign, mtcs_info=mtcs_info, api=self.api)
class SelfPlayWorker: def __init__(self, config: Config, env=None, model=None): """ :param config: :param ReversiEnv|None env: :param reversi_zero.agent.model.ReversiModel|None model: """ self.config = config self.model = model self.env = env self.black = None # type: ReversiPlayer self.white = None # type: ReversiPlayer self.buffer = [] self.false_positive_count_of_resign = 0 self.resign_test_game_count = 0 def start(self): if self.model is None: self.model = self.load_model() self.buffer = [] idx = self.read_as_int( self.config.resource.self_play_game_idx_file) or 1 mtcs_info = None while True: start_time = time() if mtcs_info is None and self.config.play.share_mtcs_info_in_self_play: mtcs_info = ReversiPlayer.create_mtcs_info() env = self.start_game(idx, mtcs_info) end_time = time() logger.debug( f"play game {idx} time={end_time - start_time} sec, " f"turn={env.turn}:{env.board.number_of_black_and_white}:{env.winner}" ) try: if self.config.play.use_newest_next_generation_model: reload_newest_next_generation_model_if_changed( self.model, clear_session=True) else: reload_best_model_weight_if_changed(self.model, clear_session=True) except Exception as e: logger.error(e) if idx % self.config.play.reset_mtcs_info_per_game == 0: logger.debug("reset MTCS info") mtcs_info = None idx += 1 with open(self.config.resource.self_play_game_idx_file, "wt") as f: f.write(str(idx)) def start_game(self, idx, mtcs_info): self.env.reset() enable_resign = self.config.play.disable_resignation_rate <= random() self.config.play.simulation_num_per_move = self.decide_simulation_num_per_move( idx) logger.debug( f"simulation_num_per_move = {self.config.play.simulation_num_per_move}" ) self.black = ReversiPlayer(self.config, self.model, enable_resign=enable_resign, mtcs_info=mtcs_info) self.white = ReversiPlayer(self.config, self.model, enable_resign=enable_resign, mtcs_info=mtcs_info) if not enable_resign: logger.debug("Resignation is disabled in the next game.") observation = self.env.observation # type: Board while not self.env.done: # logger.debug(f"turn={self.env.turn}") if self.env.next_player == Player.black: action = self.black.action(observation.black, observation.white) else: action = self.white.action(observation.white, observation.black) observation, info = self.env.step(action) self.finish_game(resign_enabled=enable_resign) self.save_play_data(write=idx % self.config.play_data.nb_game_in_file == 0) self.remove_play_data() return self.env def save_play_data(self, write=True): data = self.black.moves + self.white.moves self.buffer += data if not write: return rc = self.config.resource game_id = datetime.now().strftime("%Y%m%d-%H%M%S.%f") path = os.path.join(rc.play_data_dir, rc.play_data_filename_tmpl % game_id) logger.info(f"save play data to {path}") write_game_data_to_file(path, self.buffer) self.buffer = [] def remove_play_data(self): files = get_game_data_filenames(self.config.resource) if len(files) < self.config.play_data.max_file_num: return for i in range(len(files) - self.config.play_data.max_file_num): os.remove(files[i]) def finish_game(self, resign_enabled=True): if self.env.winner == Winner.black: black_win = 1 false_positive_of_resign = self.black.resigned elif self.env.winner == Winner.white: black_win = -1 false_positive_of_resign = self.white.resigned else: black_win = 0 false_positive_of_resign = self.black.resigned or self.white.resigned self.black.finish_game(black_win) self.white.finish_game(-black_win) if not resign_enabled: self.resign_test_game_count += 1 if false_positive_of_resign: self.false_positive_count_of_resign += 1 logger.debug("false positive of resignation happened") self.check_and_update_resignation_threshold() def load_model(self): from reversi_zero.agent.model import ReversiModel model = ReversiModel(self.config) loaded = False if not self.config.opts.new: if self.config.play.use_newest_next_generation_model: loaded = reload_newest_next_generation_model_if_changed( model) or load_best_model_weight(model) else: loaded = load_best_model_weight( model) or reload_newest_next_generation_model_if_changed( model) if not loaded: model.build() save_as_best_model(model) return model def reset_false_positive_count(self): self.false_positive_count_of_resign = 0 self.resign_test_game_count = 0 @property def false_positive_rate(self): if self.resign_test_game_count == 0: return 0 return self.false_positive_count_of_resign / self.resign_test_game_count def check_and_update_resignation_threshold(self): if self.resign_test_game_count < 100 or self.config.play.resign_threshold is None: return old_threshold = self.config.play.resign_threshold if self.false_positive_rate >= self.config.play.false_positive_threshold: self.config.play.resign_threshold -= self.config.play.resign_threshold_delta else: self.config.play.resign_threshold += self.config.play.resign_threshold_delta logger.debug( f"update resign_threshold: {old_threshold} -> {self.config.play.resign_threshold}" ) self.reset_false_positive_count() def decide_simulation_num_per_move(self, idx): ret = self.read_as_int(self.config.resource.force_simulation_num_file) if ret: logger.debug(f"loaded simulation num from file: {ret}") return ret for min_idx, num in self.config.play.schedule_of_simulation_num_per_move: if idx >= min_idx: ret = num return ret def read_as_int(self, filename): if os.path.exists(filename): try: with open(filename, "rt") as f: ret = int(str(f.read()).strip()) if ret: return ret except ValueError: pass
def start_game(self, human_is_black): self.human_color = Player.black if human_is_black else Player.white self.env = ReversiEnv().reset() self.ai = ReversiPlayer(self.config, self.model)
class PlayWithHuman: def __init__(self, config: Config): self.config = config self.human_color = None self.observers = [] self.env = ReversiEnv().reset() self.model = self._load_model() self.ai = None # type: ReversiPlayer self.last_evaluation = None self.last_history = None # type: HistoryItem def add_observer(self, observer_func): self.observers.append(observer_func) def notify_all(self, event): for ob_func in self.observers: ob_func(event) def start_game(self, human_is_black): self.human_color = Player.black if human_is_black else Player.white self.env = ReversiEnv().reset() self.ai = ReversiPlayer(self.config, self.model) def play_next_turn(self): self.notify_all(GameEvent.update) if self.over: self.notify_all(GameEvent.over) return if self.next_player != self.human_color: self.notify_all(GameEvent.ai_move) @property def over(self): return self.env.done @property def next_player(self): return self.env.next_player def stone(self, px, py): """left top=(0, 0), right bottom=(14,14)""" pos = int(py * 15 + px) assert 0 <= pos < 225 bit = 1 << pos if self.env.board.black & bit: return Player.black elif self.env.board.white & bit: return Player.white return None @property def number_of_black_and_white(self): return self.env.observation.number_of_black_and_white def available(self, px, py): pos = int(py * 15 + px) if pos < 0 or 225 <= pos: return False own, enemy = self.env.board.black, self.env.board.white if self.human_color == Player.white: own, enemy = enemy, own legal_moves = find_correct_moves(own, enemy) return legal_moves & (1 << pos) def move(self, px, py): pos = int(py * 15 + px) assert 0 <= pos < 225 if self.next_player != self.human_color: return False self.env.step(pos) def _load_model(self): return load_model(self.config) def move_by_ai(self): if self.next_player == self.human_color: return False own, enemy = self.get_state_of_next_player() action = self.ai.action(own, enemy) self.env.step(action) self.last_history = self.ai.ask_thought_about(own, enemy) self.last_evaluation = self.last_history.values[self.last_history.action] logger.debug(f"evaluation by ai={self.last_evaluation}") def get_state_of_next_player(self): if self.next_player == Player.black: own, enemy = self.env.board.black, self.env.board.white else: own, enemy = self.env.board.white, self.env.board.black return own, enemy
class SelfPlayWorker: def __init__(self, config: Config, env=None, model=None): """ :param config: :param ReversiEnv|None env: :param reversi_zero.agent.model.ReversiModel|None model: """ self.config = config self.model = model self.env = env self.black = None # type: ReversiPlayer self.white = None # type: ReversiPlayer self.buffer = [] self.false_positive_count_of_resign = 0 self.resign_test_game_count = 0 def start(self): if self.model is None: self.model = self.load_model() self.buffer = [] idx = 1 while True: start_time = time() env = self.start_game(idx) end_time = time() logger.debug( f"play game {idx} time={end_time - start_time} sec, " f"turn={env.turn}:{env.board.number_of_black_and_white}") if True or (idx % self.config.play_data.nb_game_in_file) == 0: if self.config.play.use_newest_next_generation_model: reload_newest_next_generation_model_if_changed(self.model) else: if reload_best_model_weight_if_changed(self.model): self.reset_false_positive_count() idx += 1 def start_game(self, idx): self.env.reset() enable_resign = self.config.play.disable_resignation_rate <= random() self.black = ReversiPlayer(self.config, self.model, enable_resign=enable_resign) self.white = ReversiPlayer(self.config, self.model, enable_resign=enable_resign) if not enable_resign: logger.debug("Resignation is disabled in the next game.") observation = self.env.observation # type: Board while not self.env.done: # logger.debug(f"turn={self.env.turn}") if self.env.next_player == Player.black: action = self.black.action(observation.black, observation.white) else: action = self.white.action(observation.white, observation.black) observation, info = self.env.step(action) self.finish_game(resign_enabled=enable_resign) self.save_play_data(write=idx % self.config.play_data.nb_game_in_file == 0) self.remove_play_data() return self.env def save_play_data(self, write=True): data = self.black.moves + self.white.moves self.buffer += data if not write: return rc = self.config.resource game_id = datetime.now().strftime("%Y%m%d-%H%M%S.%f") path = os.path.join(rc.play_data_dir, rc.play_data_filename_tmpl % game_id) logger.info(f"save play data to {path}") write_game_data_to_file(path, self.buffer) self.buffer = [] def remove_play_data(self): files = get_game_data_filenames(self.config.resource) if len(files) < self.config.play_data.max_file_num: return for i in range(len(files) - self.config.play_data.max_file_num): os.remove(files[i]) def finish_game(self, resign_enabled=True): if self.env.winner == Winner.black: black_win = 1 false_positive_of_resign = self.black.resigned elif self.env.winner == Winner.white: black_win = -1 false_positive_of_resign = self.white.resigned else: black_win = 0 false_positive_of_resign = self.black.resigned or self.white.resigned self.black.finish_game(black_win) self.white.finish_game(-black_win) if not resign_enabled: self.resign_test_game_count += 1 if false_positive_of_resign: self.false_positive_count_of_resign += 1 logger.debug("false positive of resignation happened") self.check_and_update_resignation_threshold() def load_model(self): from reversi_zero.agent.model import ReversiModel model = ReversiModel(self.config) loaded = False if not self.config.opts.new: if self.config.play.use_newest_next_generation_model: loaded = reload_newest_next_generation_model_if_changed( model) or load_best_model_weight(model) else: loaded = load_best_model_weight( model) or reload_newest_next_generation_model_if_changed( model) if not loaded: model.build() save_as_best_model(model) return model def reset_false_positive_count(self): self.false_positive_count_of_resign = 0 self.resign_test_game_count = 0 @property def false_positive_rate(self): if self.resign_test_game_count == 0: return 0 return self.false_positive_count_of_resign / self.resign_test_game_count def check_and_update_resignation_threshold(self): if self.resign_test_game_count < 100 or self.config.play.resign_threshold is None: return old_threshold = self.config.play.resign_threshold if self.false_positive_rate >= self.config.play.false_positive_threshold: self.config.play.resign_threshold -= self.config.play.resign_threshold_delta else: self.config.play.resign_threshold += self.config.play.resign_threshold_delta logger.debug( f"update resign_threshold: {old_threshold} -> {self.config.play.resign_threshold}" ) self.reset_false_positive_count()
class SelfPlayWorker: def __init__(self, config: Config, env=None, model=None): """ :param config: :param ReversiEnv|None env: :param reversi_zero.agent.model.ReversiModel|None model: """ self.config = config self.model = model self.env = env self.black = None # type: ReversiPlayer self.white = None # type: ReversiPlayer self.buffer = [] def start(self): if self.model is None: self.model = self.load_model() self.buffer = [] idx = 1 while True: start_time = time() self.start_game(idx) end_time = time() logger.debug(f"play game {idx} time={end_time - start_time} sec") if (idx % self.config.play_data.nb_game_in_file) == 0: reload_best_model_weight_if_changed(self.model) idx += 1 def start_game(self, idx): self.env.reset() self.black = ReversiPlayer(self.config, self.model) self.white = ReversiPlayer(self.config, self.model) observation = self.env.observation # type: Board while not self.env.done: # logger.debug(f"turn={self.env.turn}") if self.env.next_player == Player.black: action = self.black.action(observation.black, observation.white) else: action = self.white.action(observation.white, observation.black) observation, info = self.env.step(action) self.finish_game() self.save_play_data(write=idx % self.config.play_data.nb_game_in_file == 0) self.remove_play_data() def save_play_data(self, write=True): data = self.black.moves + self.white.moves self.buffer += data if not write: return rc = self.config.resource game_id = datetime.now().strftime("%Y%m%d-%H%M%S.%f") path = os.path.join(rc.play_data_dir, rc.play_data_filename_tmpl % game_id) logger.info(f"save play data to {path}") write_game_data_to_file(path, self.buffer) self.buffer = [] def remove_play_data(self): files = get_game_data_filenames(self.config.resource) if len(files) < self.config.play_data.max_file_num: return for i in range(len(files) - self.config.play_data.max_file_num): os.remove(files[i]) def finish_game(self): if self.env.winner == Winner.black: black_win = 1 elif self.env.winner == Winner.white: black_win = -1 else: black_win = 0 self.black.finish_game(black_win) self.white.finish_game(-black_win) # black_num, white_num = self.env.board.number_of_black_and_white # self.env.message(f"black={black_num} white={white_num} winner={self.env.winner}") # self.env.render() def load_model(self): from reversi_zero.agent.model import ReversiModel model = ReversiModel(self.config) if self.config.opts.new or not load_best_model_weight(model): model.build() save_as_best_model(model) return model
class PlayWithHuman: def __init__(self, config: Config): self.config = config self.human_color = None self.observers = [] self.env = ReversiEnv().reset() self.model = self._load_model() self.ai = None # type: ReversiPlayer self.last_evaluation = None self.last_history = None # type: HistoryItem def add_observer(self, observer_func): self.observers.append(observer_func) def notify_all(self, event): for ob_func in self.observers: ob_func(event) def start_game(self, human_is_black): self.human_color = Player.black if human_is_black else Player.white self.env = ReversiEnv().reset() self.ai = ReversiPlayer(self.config, self.model) def play_next_turn(self): self.notify_all(GameEvent.update) if self.over: self.notify_all(GameEvent.over) return if self.next_player != self.human_color: self.notify_all(GameEvent.ai_move) @property def over(self): return self.env.done @property def next_player(self): return self.env.next_player def stone(self, px, py): """left top=(0, 0), right bottom=(7,7)""" pos = int(py * 8 + px) assert 0 <= pos < 64 bit = 1 << pos if self.env.board.black & bit: return Player.black elif self.env.board.white & bit: return Player.white return None @property def number_of_black_and_white(self): return self.env.observation.number_of_black_and_white def available(self, px, py): pos = int(py * 8 + px) if pos < 0 or 64 <= pos: return False own, enemy = self.env.board.black, self.env.board.white if self.human_color == Player.white: own, enemy = enemy, own legal_moves = find_correct_moves(own, enemy) return legal_moves & (1 << pos) def move(self, px, py): pos = int(py * 8 + px) assert 0 <= pos < 64 if self.next_player != self.human_color: return False self.env.step(pos) def _load_model(self): from reversi_zero.agent.model import ReversiModel model = ReversiModel(self.config) if self.config.play.use_newest_next_generation_model: loaded = reload_newest_next_generation_model_if_changed( model) or load_best_model_weight(model) else: loaded = load_best_model_weight( model) or reload_newest_next_generation_model_if_changed(model) if not loaded: raise RuntimeError("No models found!") return model def move_by_ai(self): if self.next_player == self.human_color: return False own, enemy = self.get_state_of_next_player() action = self.ai.action(own, enemy) self.env.step(action) self.last_history = self.ai.ask_thought_about(own, enemy) self.last_evaluation = self.last_history.values[ self.last_history.action] logger.debug(f"evaluation by ai={self.last_evaluation}") def get_state_of_next_player(self): if self.next_player == Player.black: own, enemy = self.env.board.black, self.env.board.white else: own, enemy = self.env.board.white, self.env.board.black return own, enemy