class SelfPlayWorker:
    def __init__(self, config: Config, env=None, model=None):
        """

        :param config:
        :param ReversiEnv|None env:
        :param reversi_zero.agent.model.ReversiModel|None model:
        """
        self.config = config
        self.model = model
        self.env = env
        self.black = None  # type: ReversiPlayer
        self.white = None  # type: ReversiPlayer
        self.buffer = []
        self.false_positive_count_of_resign = 0
        self.resign_test_game_count = 0

    def start(self):
        if self.model is None:
            self.model = self.load_model()

        self.buffer = []
        idx = 1

        while True:
            start_time = time()
            env = self.start_game(idx)
            end_time = time()
            logger.debug(
                f"play game {idx} time={end_time - start_time} sec, "
                f"turn={env.turn}:{env.board.number_of_black_and_white}")
            if True or (idx % self.config.play_data.nb_game_in_file) == 0:
                if self.config.play.use_newest_next_generation_model:
                    reload_newest_next_generation_model_if_changed(self.model)
                else:
                    if reload_best_model_weight_if_changed(self.model):
                        self.reset_false_positive_count()

            idx += 1

    def start_game(self, idx):
        self.env.reset()
        enable_resign = self.config.play.disable_resignation_rate <= random()
        self.black = ReversiPlayer(self.config,
                                   self.model,
                                   enable_resign=enable_resign)
        self.white = ReversiPlayer(self.config,
                                   self.model,
                                   enable_resign=enable_resign)
        if not enable_resign:
            logger.debug("Resignation is disabled in the next game.")
        observation = self.env.observation  # type: Board
        while not self.env.done:
            # logger.debug(f"turn={self.env.turn}")
            if self.env.next_player == Player.black:
                action = self.black.action(observation.black,
                                           observation.white)
            else:
                action = self.white.action(observation.white,
                                           observation.black)
            observation, info = self.env.step(action)
        self.finish_game(resign_enabled=enable_resign)
        self.save_play_data(write=idx %
                            self.config.play_data.nb_game_in_file == 0)
        self.remove_play_data()
        return self.env

    def save_play_data(self, write=True):
        data = self.black.moves + self.white.moves
        self.buffer += data

        if not write:
            return

        rc = self.config.resource
        game_id = datetime.now().strftime("%Y%m%d-%H%M%S.%f")
        path = os.path.join(rc.play_data_dir,
                            rc.play_data_filename_tmpl % game_id)
        logger.info(f"save play data to {path}")
        write_game_data_to_file(path, self.buffer)
        self.buffer = []

    def remove_play_data(self):
        files = get_game_data_filenames(self.config.resource)
        if len(files) < self.config.play_data.max_file_num:
            return
        for i in range(len(files) - self.config.play_data.max_file_num):
            os.remove(files[i])

    def finish_game(self, resign_enabled=True):
        if self.env.winner == Winner.black:
            black_win = 1
            false_positive_of_resign = self.black.resigned
        elif self.env.winner == Winner.white:
            black_win = -1
            false_positive_of_resign = self.white.resigned
        else:
            black_win = 0
            false_positive_of_resign = self.black.resigned or self.white.resigned

        self.black.finish_game(black_win)
        self.white.finish_game(-black_win)

        if not resign_enabled:
            self.resign_test_game_count += 1
            if false_positive_of_resign:
                self.false_positive_count_of_resign += 1
                logger.debug("false positive of resignation happened")
            self.check_and_update_resignation_threshold()

    def load_model(self):
        from reversi_zero.agent.model import ReversiModel
        model = ReversiModel(self.config)
        loaded = False
        if not self.config.opts.new:
            if self.config.play.use_newest_next_generation_model:
                loaded = reload_newest_next_generation_model_if_changed(
                    model) or load_best_model_weight(model)
            else:
                loaded = load_best_model_weight(
                    model) or reload_newest_next_generation_model_if_changed(
                        model)

        if not loaded:
            model.build()
            save_as_best_model(model)
        return model

    def reset_false_positive_count(self):
        self.false_positive_count_of_resign = 0
        self.resign_test_game_count = 0

    @property
    def false_positive_rate(self):
        if self.resign_test_game_count == 0:
            return 0
        return self.false_positive_count_of_resign / self.resign_test_game_count

    def check_and_update_resignation_threshold(self):
        if self.resign_test_game_count < 100 or self.config.play.resign_threshold is None:
            return

        old_threshold = self.config.play.resign_threshold
        if self.false_positive_rate >= self.config.play.false_positive_threshold:
            self.config.play.resign_threshold -= self.config.play.resign_threshold_delta
        else:
            self.config.play.resign_threshold += self.config.play.resign_threshold_delta
        logger.debug(
            f"update resign_threshold: {old_threshold} -> {self.config.play.resign_threshold}"
        )
        self.reset_false_positive_count()
Beispiel #2
0
class SelfPlayWorker:
    def __init__(self, config: Config, env=None, model=None):
        """

        :param config:
        :param ReversiEnv|None env:
        :param reversi_zero.agent.model.ReversiModel|None model:
        """
        self.config = config
        self.model = model
        self.env = env
        self.black = None  # type: ReversiPlayer
        self.white = None  # type: ReversiPlayer
        self.buffer = []
        self.false_positive_count_of_resign = 0
        self.resign_test_game_count = 0

    def start(self):
        if self.model is None:
            self.model = self.load_model()

        self.buffer = []
        idx = self.read_as_int(
            self.config.resource.self_play_game_idx_file) or 1
        mtcs_info = None

        while True:
            start_time = time()
            if mtcs_info is None and self.config.play.share_mtcs_info_in_self_play:
                mtcs_info = ReversiPlayer.create_mtcs_info()
            env = self.start_game(idx, mtcs_info)
            end_time = time()
            logger.debug(
                f"play game {idx} time={end_time - start_time} sec, "
                f"turn={env.turn}:{env.board.number_of_black_and_white}:{env.winner}"
            )

            try:
                if self.config.play.use_newest_next_generation_model:
                    reload_newest_next_generation_model_if_changed(
                        self.model, clear_session=True)
                else:
                    reload_best_model_weight_if_changed(self.model,
                                                        clear_session=True)

            except Exception as e:
                logger.error(e)

            if idx % self.config.play.reset_mtcs_info_per_game == 0:
                logger.debug("reset MTCS info")
                mtcs_info = None

            idx += 1
            with open(self.config.resource.self_play_game_idx_file, "wt") as f:
                f.write(str(idx))

    def start_game(self, idx, mtcs_info):
        self.env.reset()
        enable_resign = self.config.play.disable_resignation_rate <= random()
        self.config.play.simulation_num_per_move = self.decide_simulation_num_per_move(
            idx)
        logger.debug(
            f"simulation_num_per_move = {self.config.play.simulation_num_per_move}"
        )
        self.black = ReversiPlayer(self.config,
                                   self.model,
                                   enable_resign=enable_resign,
                                   mtcs_info=mtcs_info)
        self.white = ReversiPlayer(self.config,
                                   self.model,
                                   enable_resign=enable_resign,
                                   mtcs_info=mtcs_info)
        if not enable_resign:
            logger.debug("Resignation is disabled in the next game.")
        observation = self.env.observation  # type: Board
        while not self.env.done:
            # logger.debug(f"turn={self.env.turn}")
            if self.env.next_player == Player.black:
                action = self.black.action(observation.black,
                                           observation.white)
            else:
                action = self.white.action(observation.white,
                                           observation.black)
            observation, info = self.env.step(action)
        self.finish_game(resign_enabled=enable_resign)
        self.save_play_data(write=idx %
                            self.config.play_data.nb_game_in_file == 0)
        self.remove_play_data()
        return self.env

    def save_play_data(self, write=True):
        data = self.black.moves + self.white.moves
        self.buffer += data

        if not write:
            return

        rc = self.config.resource
        game_id = datetime.now().strftime("%Y%m%d-%H%M%S.%f")
        path = os.path.join(rc.play_data_dir,
                            rc.play_data_filename_tmpl % game_id)
        logger.info(f"save play data to {path}")
        write_game_data_to_file(path, self.buffer)
        self.buffer = []

    def remove_play_data(self):
        files = get_game_data_filenames(self.config.resource)
        if len(files) < self.config.play_data.max_file_num:
            return
        for i in range(len(files) - self.config.play_data.max_file_num):
            os.remove(files[i])

    def finish_game(self, resign_enabled=True):
        if self.env.winner == Winner.black:
            black_win = 1
            false_positive_of_resign = self.black.resigned
        elif self.env.winner == Winner.white:
            black_win = -1
            false_positive_of_resign = self.white.resigned
        else:
            black_win = 0
            false_positive_of_resign = self.black.resigned or self.white.resigned

        self.black.finish_game(black_win)
        self.white.finish_game(-black_win)

        if not resign_enabled:
            self.resign_test_game_count += 1
            if false_positive_of_resign:
                self.false_positive_count_of_resign += 1
                logger.debug("false positive of resignation happened")
            self.check_and_update_resignation_threshold()

    def load_model(self):
        from reversi_zero.agent.model import ReversiModel
        model = ReversiModel(self.config)
        loaded = False
        if not self.config.opts.new:
            if self.config.play.use_newest_next_generation_model:
                loaded = reload_newest_next_generation_model_if_changed(
                    model) or load_best_model_weight(model)
            else:
                loaded = load_best_model_weight(
                    model) or reload_newest_next_generation_model_if_changed(
                        model)

        if not loaded:
            model.build()
            save_as_best_model(model)
        return model

    def reset_false_positive_count(self):
        self.false_positive_count_of_resign = 0
        self.resign_test_game_count = 0

    @property
    def false_positive_rate(self):
        if self.resign_test_game_count == 0:
            return 0
        return self.false_positive_count_of_resign / self.resign_test_game_count

    def check_and_update_resignation_threshold(self):
        if self.resign_test_game_count < 100 or self.config.play.resign_threshold is None:
            return

        old_threshold = self.config.play.resign_threshold
        if self.false_positive_rate >= self.config.play.false_positive_threshold:
            self.config.play.resign_threshold -= self.config.play.resign_threshold_delta
        else:
            self.config.play.resign_threshold += self.config.play.resign_threshold_delta
        logger.debug(
            f"update resign_threshold: {old_threshold} -> {self.config.play.resign_threshold}"
        )
        self.reset_false_positive_count()

    def decide_simulation_num_per_move(self, idx):
        ret = self.read_as_int(self.config.resource.force_simulation_num_file)

        if ret:
            logger.debug(f"loaded simulation num from file: {ret}")
            return ret

        for min_idx, num in self.config.play.schedule_of_simulation_num_per_move:
            if idx >= min_idx:
                ret = num
        return ret

    def read_as_int(self, filename):
        if os.path.exists(filename):
            try:
                with open(filename, "rt") as f:
                    ret = int(str(f.read()).strip())
                    if ret:
                        return ret
            except ValueError:
                pass
class SelfPlayWorker:
    def __init__(self, config: Config, env=None, model=None):
        """

        :param config:
        :param ReversiEnv|None env:
        :param reversi_zero.agent.model.ReversiModel|None model:
        """
        self.config = config
        self.model = model
        self.env = env
        self.black = None  # type: ReversiPlayer
        self.white = None  # type: ReversiPlayer
        self.buffer = []

    def start(self):
        if self.model is None:
            self.model = self.load_model()

        self.buffer = []
        idx = 1

        while True:
            start_time = time()
            self.start_game(idx)
            end_time = time()
            logger.debug(f"play game {idx} time={end_time - start_time} sec")
            if (idx % self.config.play_data.nb_game_in_file) == 0:
                reload_best_model_weight_if_changed(self.model)
            idx += 1

    def start_game(self, idx):
        self.env.reset()
        self.black = ReversiPlayer(self.config, self.model)
        self.white = ReversiPlayer(self.config, self.model)
        observation = self.env.observation  # type: Board
        while not self.env.done:
            # logger.debug(f"turn={self.env.turn}")
            if self.env.next_player == Player.black:
                action = self.black.action(observation.black, observation.white)
            else:
                action = self.white.action(observation.white, observation.black)
            observation, info = self.env.step(action)
        self.finish_game()
        self.save_play_data(write=idx % self.config.play_data.nb_game_in_file == 0)
        self.remove_play_data()

    def save_play_data(self, write=True):
        data = self.black.moves + self.white.moves
        self.buffer += data

        if not write:
            return

        rc = self.config.resource
        game_id = datetime.now().strftime("%Y%m%d-%H%M%S.%f")
        path = os.path.join(rc.play_data_dir, rc.play_data_filename_tmpl % game_id)
        logger.info(f"save play data to {path}")
        write_game_data_to_file(path, self.buffer)
        self.buffer = []

    def remove_play_data(self):
        files = get_game_data_filenames(self.config.resource)
        if len(files) < self.config.play_data.max_file_num:
            return
        for i in range(len(files) - self.config.play_data.max_file_num):
            os.remove(files[i])

    def finish_game(self):
        if self.env.winner == Winner.black:
            black_win = 1
        elif self.env.winner == Winner.white:
            black_win = -1
        else:
            black_win = 0

        self.black.finish_game(black_win)
        self.white.finish_game(-black_win)

        # black_num, white_num = self.env.board.number_of_black_and_white
        # self.env.message(f"black={black_num} white={white_num} winner={self.env.winner}")
        # self.env.render()

    def load_model(self):
        from reversi_zero.agent.model import ReversiModel
        model = ReversiModel(self.config)
        if self.config.opts.new or not load_best_model_weight(model):
            model.build()
            save_as_best_model(model)
        return model