예제 #1
0
 def start_game(self, idx, mtcs_info):
     self.env.reset()
     enable_resign = self.config.play.disable_resignation_rate <= random()
     self.config.play.simulation_num_per_move = self.decide_simulation_num_per_move(
         idx)
     logger.debug(
         f"simulation_num_per_move = {self.config.play.simulation_num_per_move}"
     )
     self.black = ReversiPlayer(self.config,
                                self.model,
                                enable_resign=enable_resign,
                                mtcs_info=mtcs_info)
     self.white = ReversiPlayer(self.config,
                                self.model,
                                enable_resign=enable_resign,
                                mtcs_info=mtcs_info)
     if not enable_resign:
         logger.debug("Resignation is disabled in the next game.")
     observation = self.env.observation  # type: Board
     while not self.env.done:
         # logger.debug(f"turn={self.env.turn}")
         if self.env.next_player == Player.black:
             action = self.black.action(observation.black,
                                        observation.white)
         else:
             action = self.white.action(observation.white,
                                        observation.black)
         observation, info = self.env.step(action)
     self.finish_game(resign_enabled=enable_resign)
     self.save_play_data(write=idx %
                         self.config.play_data.nb_game_in_file == 0)
     self.remove_play_data()
     return self.env
예제 #2
0
    def play_game(self, best_model, ng_model):
        env = ReversiEnv().reset()

        best_player = ReversiPlayer(self.config,
                                    best_model,
                                    play_config=self.config.eval.play_config)
        ng_player = ReversiPlayer(self.config,
                                  ng_model,
                                  play_config=self.config.eval.play_config)
        best_is_black = random() < 0.5
        if best_is_black:
            black, white = best_player, ng_player
        else:
            black, white = ng_player, best_player

        observation = env.observation
        while not env.done:
            if env.next_player == Player.black:
                action = black.action(observation.black, observation.white)
            else:
                action = white.action(observation.white, observation.black)
            observation, info = env.step(action)

        ng_win = None
        if env.winner == Winner.black:
            if best_is_black:
                ng_win = 0
            else:
                ng_win = 1
        elif env.winner == Winner.white:
            if best_is_black:
                ng_win = 1
            else:
                ng_win = 0
        return ng_win, best_is_black, observation.number_of_black_and_white
예제 #3
0
 def start_game(self, idx):
     self.env.reset()
     self.black = ReversiPlayer(self.config, self.model)
     self.white = ReversiPlayer(self.config, self.model)
     observation = self.env.observation  # type: Board
     while not self.env.done:
         # logger.debug(f"turn={self.env.turn}")
         if self.env.next_player == Player.black:
             action = self.black.action(observation.black, observation.white)
         else:
             action = self.white.action(observation.white, observation.black)
         observation, info = self.env.step(action)
     self.finish_game()
     self.save_play_data(write=idx % self.config.play_data.nb_game_in_file == 0)
     self.remove_play_data()
예제 #4
0
    def start(self):
        if self.model is None:
            self.model = self.load_model()

        self.buffer = []
        idx = 1
        mtcs_info = None

        while True:
            start_time = time()
            if mtcs_info is None and self.config.play.share_mtcs_info_in_self_play:
                mtcs_info = ReversiPlayer.create_mtcs_info()
            env = self.start_game(idx, mtcs_info)
            end_time = time()
            logger.debug(
                f"play game {idx} time={end_time - start_time} sec, "
                f"turn={env.turn}:{env.board.number_of_black_and_white}:{env.winner}"
            )

            if self.config.play.use_newest_next_generation_model:
                model_changed = reload_newest_next_generation_model_if_changed(
                    self.model, clear_session=True)
            else:
                model_changed = reload_best_model_weight_if_changed(
                    self.model, clear_session=True)

            if model_changed:
                mtcs_info = None

            idx += 1
예제 #5
0
    def start(self):
        if self.model is None:
            self.model = self.load_model()

        self.buffer = []
        idx = self.read_as_int(self.config.resource.self_play_game_idx_file) or 1
        mtcs_info = None

        while True:
            start_time = time()
            if mtcs_info is None and self.config.play.share_mtcs_info_in_self_play:
                mtcs_info = ReversiPlayer.create_mtcs_info()
            env = self.start_game(idx, mtcs_info)
            end_time = time()
            logger.debug(f"play game {idx} time={end_time - start_time} sec, "
                         f"turn={env.turn}:{env.board.number_of_black_and_white}:{env.winner}")

            while True:
                try:
                    if self.config.play.use_newest_next_generation_model:
                        reload_newest_next_generation_model_if_changed(self.model, clear_session=True)
                    else:
                        reload_best_model_weight_if_changed(self.model, clear_session=True)
                    break
                except Exception as e:
                    logger.error(e)


            if idx % self.config.play.reset_mtcs_info_per_game == 0:
                logger.debug("reset MTCS info")
                mtcs_info = None

            idx += 1
            with open(self.config.resource.self_play_game_idx_file, "wt") as f:
                f.write(str(idx))
예제 #6
0
    def _start(self):
        logger.debug("SelfPlayWorker#start()")
        np.random.seed(None)
        worker_name = f"worker{self.worker_index:03d}"
        self.tensor_board = TensorBoardLogger(
            os.path.join(self.config.resource.self_play_log_dir, worker_name))

        self.buffer = []
        mtcs_info = None
        local_idx = 0

        while True:
            np.random.seed(None)
            local_idx += 1
            game_idx = self.shared_var.game_idx

            start_time = time()
            if mtcs_info is None and self.config.play.share_mtcs_info_in_self_play:
                mtcs_info = ReversiPlayer.create_mtcs_info()

            # play game
            env = self.start_game(local_idx, game_idx, mtcs_info)

            game_idx = self.shared_var.incr_game_idx()
            # just log
            end_time = time()
            time_spent = end_time - start_time
            logger.debug(
                f"play game {game_idx} time={time_spent} sec, "
                f"turn={env.turn}:{env.board.number_of_black_and_white}:{env.winner}"
            )

            # log play info to tensor board
            prefix = "self"
            log_info = {
                f"{prefix}/time": time_spent,
                f"{prefix}/turn": env.turn
            }
            if mtcs_info:
                log_info[f"{prefix}/mcts_buffer_size"] = len(mtcs_info.var_p)
            self.tensor_board.log_scaler(log_info, game_idx)

            # reset MCTS info per X games
            if self.config.play.reset_mtcs_info_per_game and local_idx % self.config.play.reset_mtcs_info_per_game == 0:
                logger.debug("reset MCTS info")
                mtcs_info = None

            with open(self.config.resource.self_play_game_idx_file, "wt") as f:
                f.write(str(game_idx))
예제 #7
0
 def create_player(self):
     logger.debug("create new ReversiPlayer()")
     return ReversiPlayer(self.config,
                          self.model,
                          self.play_config,
                          enable_resign=False)
예제 #8
0
 def create_reversi_player(self, enable_resign=None, mtcs_info=None):
     return ReversiPlayer(self.config,
                          None,
                          enable_resign=enable_resign,
                          mtcs_info=mtcs_info,
                          api=self.api)
예제 #9
0
class SelfPlayWorker:
    def __init__(self, config: Config, env=None, model=None):
        """

        :param config:
        :param ReversiEnv|None env:
        :param reversi_zero.agent.model.ReversiModel|None model:
        """
        self.config = config
        self.model = model
        self.env = env
        self.black = None  # type: ReversiPlayer
        self.white = None  # type: ReversiPlayer
        self.buffer = []
        self.false_positive_count_of_resign = 0
        self.resign_test_game_count = 0

    def start(self):
        if self.model is None:
            self.model = self.load_model()

        self.buffer = []
        idx = self.read_as_int(
            self.config.resource.self_play_game_idx_file) or 1
        mtcs_info = None

        while True:
            start_time = time()
            if mtcs_info is None and self.config.play.share_mtcs_info_in_self_play:
                mtcs_info = ReversiPlayer.create_mtcs_info()
            env = self.start_game(idx, mtcs_info)
            end_time = time()
            logger.debug(
                f"play game {idx} time={end_time - start_time} sec, "
                f"turn={env.turn}:{env.board.number_of_black_and_white}:{env.winner}"
            )

            try:
                if self.config.play.use_newest_next_generation_model:
                    reload_newest_next_generation_model_if_changed(
                        self.model, clear_session=True)
                else:
                    reload_best_model_weight_if_changed(self.model,
                                                        clear_session=True)

            except Exception as e:
                logger.error(e)

            if idx % self.config.play.reset_mtcs_info_per_game == 0:
                logger.debug("reset MTCS info")
                mtcs_info = None

            idx += 1
            with open(self.config.resource.self_play_game_idx_file, "wt") as f:
                f.write(str(idx))

    def start_game(self, idx, mtcs_info):
        self.env.reset()
        enable_resign = self.config.play.disable_resignation_rate <= random()
        self.config.play.simulation_num_per_move = self.decide_simulation_num_per_move(
            idx)
        logger.debug(
            f"simulation_num_per_move = {self.config.play.simulation_num_per_move}"
        )
        self.black = ReversiPlayer(self.config,
                                   self.model,
                                   enable_resign=enable_resign,
                                   mtcs_info=mtcs_info)
        self.white = ReversiPlayer(self.config,
                                   self.model,
                                   enable_resign=enable_resign,
                                   mtcs_info=mtcs_info)
        if not enable_resign:
            logger.debug("Resignation is disabled in the next game.")
        observation = self.env.observation  # type: Board
        while not self.env.done:
            # logger.debug(f"turn={self.env.turn}")
            if self.env.next_player == Player.black:
                action = self.black.action(observation.black,
                                           observation.white)
            else:
                action = self.white.action(observation.white,
                                           observation.black)
            observation, info = self.env.step(action)
        self.finish_game(resign_enabled=enable_resign)
        self.save_play_data(write=idx %
                            self.config.play_data.nb_game_in_file == 0)
        self.remove_play_data()
        return self.env

    def save_play_data(self, write=True):
        data = self.black.moves + self.white.moves
        self.buffer += data

        if not write:
            return

        rc = self.config.resource
        game_id = datetime.now().strftime("%Y%m%d-%H%M%S.%f")
        path = os.path.join(rc.play_data_dir,
                            rc.play_data_filename_tmpl % game_id)
        logger.info(f"save play data to {path}")
        write_game_data_to_file(path, self.buffer)
        self.buffer = []

    def remove_play_data(self):
        files = get_game_data_filenames(self.config.resource)
        if len(files) < self.config.play_data.max_file_num:
            return
        for i in range(len(files) - self.config.play_data.max_file_num):
            os.remove(files[i])

    def finish_game(self, resign_enabled=True):
        if self.env.winner == Winner.black:
            black_win = 1
            false_positive_of_resign = self.black.resigned
        elif self.env.winner == Winner.white:
            black_win = -1
            false_positive_of_resign = self.white.resigned
        else:
            black_win = 0
            false_positive_of_resign = self.black.resigned or self.white.resigned

        self.black.finish_game(black_win)
        self.white.finish_game(-black_win)

        if not resign_enabled:
            self.resign_test_game_count += 1
            if false_positive_of_resign:
                self.false_positive_count_of_resign += 1
                logger.debug("false positive of resignation happened")
            self.check_and_update_resignation_threshold()

    def load_model(self):
        from reversi_zero.agent.model import ReversiModel
        model = ReversiModel(self.config)
        loaded = False
        if not self.config.opts.new:
            if self.config.play.use_newest_next_generation_model:
                loaded = reload_newest_next_generation_model_if_changed(
                    model) or load_best_model_weight(model)
            else:
                loaded = load_best_model_weight(
                    model) or reload_newest_next_generation_model_if_changed(
                        model)

        if not loaded:
            model.build()
            save_as_best_model(model)
        return model

    def reset_false_positive_count(self):
        self.false_positive_count_of_resign = 0
        self.resign_test_game_count = 0

    @property
    def false_positive_rate(self):
        if self.resign_test_game_count == 0:
            return 0
        return self.false_positive_count_of_resign / self.resign_test_game_count

    def check_and_update_resignation_threshold(self):
        if self.resign_test_game_count < 100 or self.config.play.resign_threshold is None:
            return

        old_threshold = self.config.play.resign_threshold
        if self.false_positive_rate >= self.config.play.false_positive_threshold:
            self.config.play.resign_threshold -= self.config.play.resign_threshold_delta
        else:
            self.config.play.resign_threshold += self.config.play.resign_threshold_delta
        logger.debug(
            f"update resign_threshold: {old_threshold} -> {self.config.play.resign_threshold}"
        )
        self.reset_false_positive_count()

    def decide_simulation_num_per_move(self, idx):
        ret = self.read_as_int(self.config.resource.force_simulation_num_file)

        if ret:
            logger.debug(f"loaded simulation num from file: {ret}")
            return ret

        for min_idx, num in self.config.play.schedule_of_simulation_num_per_move:
            if idx >= min_idx:
                ret = num
        return ret

    def read_as_int(self, filename):
        if os.path.exists(filename):
            try:
                with open(filename, "rt") as f:
                    ret = int(str(f.read()).strip())
                    if ret:
                        return ret
            except ValueError:
                pass
예제 #10
0
 def start_game(self, human_is_black):
     self.human_color = Player.black if human_is_black else Player.white
     self.env = ReversiEnv().reset()
     self.ai = ReversiPlayer(self.config, self.model)
예제 #11
0
class PlayWithHuman:
    def __init__(self, config: Config):
        self.config = config
        self.human_color = None
        self.observers = []
        self.env = ReversiEnv().reset()
        self.model = self._load_model()
        self.ai = None  # type: ReversiPlayer
        self.last_evaluation = None
        self.last_history = None  # type: HistoryItem

    def add_observer(self, observer_func):
        self.observers.append(observer_func)

    def notify_all(self, event):
        for ob_func in self.observers:
            ob_func(event)

    def start_game(self, human_is_black):
        self.human_color = Player.black if human_is_black else Player.white
        self.env = ReversiEnv().reset()
        self.ai = ReversiPlayer(self.config, self.model)

    def play_next_turn(self):
        self.notify_all(GameEvent.update)

        if self.over:
            self.notify_all(GameEvent.over)
            return

        if self.next_player != self.human_color:
            self.notify_all(GameEvent.ai_move)

    @property
    def over(self):
        return self.env.done

    @property
    def next_player(self):
        return self.env.next_player

    def stone(self, px, py):
        """left top=(0, 0), right bottom=(14,14)"""
        pos = int(py * 15 + px)
        assert 0 <= pos < 225
        bit = 1 << pos
        if self.env.board.black & bit:
            return Player.black
        elif self.env.board.white & bit:
            return Player.white
        return None

    @property
    def number_of_black_and_white(self):
        return self.env.observation.number_of_black_and_white

    def available(self, px, py):
        pos = int(py * 15 + px)
        if pos < 0 or 225 <= pos:
            return False
        own, enemy = self.env.board.black, self.env.board.white
        if self.human_color == Player.white:
            own, enemy = enemy, own
        legal_moves = find_correct_moves(own, enemy)
        return legal_moves & (1 << pos)

    def move(self, px, py):
        pos = int(py * 15 + px)
        assert 0 <= pos < 225

        if self.next_player != self.human_color:
            return False

        self.env.step(pos)

    def _load_model(self):
        return load_model(self.config)

    def move_by_ai(self):
        if self.next_player == self.human_color:
            return False

        own, enemy = self.get_state_of_next_player()
        action = self.ai.action(own, enemy)
        self.env.step(action)

        self.last_history = self.ai.ask_thought_about(own, enemy)
        self.last_evaluation = self.last_history.values[self.last_history.action]
        logger.debug(f"evaluation by ai={self.last_evaluation}")

    def get_state_of_next_player(self):
        if self.next_player == Player.black:
            own, enemy = self.env.board.black, self.env.board.white
        else:
            own, enemy = self.env.board.white, self.env.board.black
        return own, enemy
예제 #12
0
class SelfPlayWorker:
    def __init__(self, config: Config, env=None, model=None):
        """

        :param config:
        :param ReversiEnv|None env:
        :param reversi_zero.agent.model.ReversiModel|None model:
        """
        self.config = config
        self.model = model
        self.env = env
        self.black = None  # type: ReversiPlayer
        self.white = None  # type: ReversiPlayer
        self.buffer = []
        self.false_positive_count_of_resign = 0
        self.resign_test_game_count = 0

    def start(self):
        if self.model is None:
            self.model = self.load_model()

        self.buffer = []
        idx = 1

        while True:
            start_time = time()
            env = self.start_game(idx)
            end_time = time()
            logger.debug(
                f"play game {idx} time={end_time - start_time} sec, "
                f"turn={env.turn}:{env.board.number_of_black_and_white}")
            if True or (idx % self.config.play_data.nb_game_in_file) == 0:
                if self.config.play.use_newest_next_generation_model:
                    reload_newest_next_generation_model_if_changed(self.model)
                else:
                    if reload_best_model_weight_if_changed(self.model):
                        self.reset_false_positive_count()

            idx += 1

    def start_game(self, idx):
        self.env.reset()
        enable_resign = self.config.play.disable_resignation_rate <= random()
        self.black = ReversiPlayer(self.config,
                                   self.model,
                                   enable_resign=enable_resign)
        self.white = ReversiPlayer(self.config,
                                   self.model,
                                   enable_resign=enable_resign)
        if not enable_resign:
            logger.debug("Resignation is disabled in the next game.")
        observation = self.env.observation  # type: Board
        while not self.env.done:
            # logger.debug(f"turn={self.env.turn}")
            if self.env.next_player == Player.black:
                action = self.black.action(observation.black,
                                           observation.white)
            else:
                action = self.white.action(observation.white,
                                           observation.black)
            observation, info = self.env.step(action)
        self.finish_game(resign_enabled=enable_resign)
        self.save_play_data(write=idx %
                            self.config.play_data.nb_game_in_file == 0)
        self.remove_play_data()
        return self.env

    def save_play_data(self, write=True):
        data = self.black.moves + self.white.moves
        self.buffer += data

        if not write:
            return

        rc = self.config.resource
        game_id = datetime.now().strftime("%Y%m%d-%H%M%S.%f")
        path = os.path.join(rc.play_data_dir,
                            rc.play_data_filename_tmpl % game_id)
        logger.info(f"save play data to {path}")
        write_game_data_to_file(path, self.buffer)
        self.buffer = []

    def remove_play_data(self):
        files = get_game_data_filenames(self.config.resource)
        if len(files) < self.config.play_data.max_file_num:
            return
        for i in range(len(files) - self.config.play_data.max_file_num):
            os.remove(files[i])

    def finish_game(self, resign_enabled=True):
        if self.env.winner == Winner.black:
            black_win = 1
            false_positive_of_resign = self.black.resigned
        elif self.env.winner == Winner.white:
            black_win = -1
            false_positive_of_resign = self.white.resigned
        else:
            black_win = 0
            false_positive_of_resign = self.black.resigned or self.white.resigned

        self.black.finish_game(black_win)
        self.white.finish_game(-black_win)

        if not resign_enabled:
            self.resign_test_game_count += 1
            if false_positive_of_resign:
                self.false_positive_count_of_resign += 1
                logger.debug("false positive of resignation happened")
            self.check_and_update_resignation_threshold()

    def load_model(self):
        from reversi_zero.agent.model import ReversiModel
        model = ReversiModel(self.config)
        loaded = False
        if not self.config.opts.new:
            if self.config.play.use_newest_next_generation_model:
                loaded = reload_newest_next_generation_model_if_changed(
                    model) or load_best_model_weight(model)
            else:
                loaded = load_best_model_weight(
                    model) or reload_newest_next_generation_model_if_changed(
                        model)

        if not loaded:
            model.build()
            save_as_best_model(model)
        return model

    def reset_false_positive_count(self):
        self.false_positive_count_of_resign = 0
        self.resign_test_game_count = 0

    @property
    def false_positive_rate(self):
        if self.resign_test_game_count == 0:
            return 0
        return self.false_positive_count_of_resign / self.resign_test_game_count

    def check_and_update_resignation_threshold(self):
        if self.resign_test_game_count < 100 or self.config.play.resign_threshold is None:
            return

        old_threshold = self.config.play.resign_threshold
        if self.false_positive_rate >= self.config.play.false_positive_threshold:
            self.config.play.resign_threshold -= self.config.play.resign_threshold_delta
        else:
            self.config.play.resign_threshold += self.config.play.resign_threshold_delta
        logger.debug(
            f"update resign_threshold: {old_threshold} -> {self.config.play.resign_threshold}"
        )
        self.reset_false_positive_count()
예제 #13
0
class SelfPlayWorker:
    def __init__(self, config: Config, env=None, model=None):
        """

        :param config:
        :param ReversiEnv|None env:
        :param reversi_zero.agent.model.ReversiModel|None model:
        """
        self.config = config
        self.model = model
        self.env = env
        self.black = None  # type: ReversiPlayer
        self.white = None  # type: ReversiPlayer
        self.buffer = []

    def start(self):
        if self.model is None:
            self.model = self.load_model()

        self.buffer = []
        idx = 1

        while True:
            start_time = time()
            self.start_game(idx)
            end_time = time()
            logger.debug(f"play game {idx} time={end_time - start_time} sec")
            if (idx % self.config.play_data.nb_game_in_file) == 0:
                reload_best_model_weight_if_changed(self.model)
            idx += 1

    def start_game(self, idx):
        self.env.reset()
        self.black = ReversiPlayer(self.config, self.model)
        self.white = ReversiPlayer(self.config, self.model)
        observation = self.env.observation  # type: Board
        while not self.env.done:
            # logger.debug(f"turn={self.env.turn}")
            if self.env.next_player == Player.black:
                action = self.black.action(observation.black, observation.white)
            else:
                action = self.white.action(observation.white, observation.black)
            observation, info = self.env.step(action)
        self.finish_game()
        self.save_play_data(write=idx % self.config.play_data.nb_game_in_file == 0)
        self.remove_play_data()

    def save_play_data(self, write=True):
        data = self.black.moves + self.white.moves
        self.buffer += data

        if not write:
            return

        rc = self.config.resource
        game_id = datetime.now().strftime("%Y%m%d-%H%M%S.%f")
        path = os.path.join(rc.play_data_dir, rc.play_data_filename_tmpl % game_id)
        logger.info(f"save play data to {path}")
        write_game_data_to_file(path, self.buffer)
        self.buffer = []

    def remove_play_data(self):
        files = get_game_data_filenames(self.config.resource)
        if len(files) < self.config.play_data.max_file_num:
            return
        for i in range(len(files) - self.config.play_data.max_file_num):
            os.remove(files[i])

    def finish_game(self):
        if self.env.winner == Winner.black:
            black_win = 1
        elif self.env.winner == Winner.white:
            black_win = -1
        else:
            black_win = 0

        self.black.finish_game(black_win)
        self.white.finish_game(-black_win)

        # black_num, white_num = self.env.board.number_of_black_and_white
        # self.env.message(f"black={black_num} white={white_num} winner={self.env.winner}")
        # self.env.render()

    def load_model(self):
        from reversi_zero.agent.model import ReversiModel
        model = ReversiModel(self.config)
        if self.config.opts.new or not load_best_model_weight(model):
            model.build()
            save_as_best_model(model)
        return model
예제 #14
0
class PlayWithHuman:
    def __init__(self, config: Config):
        self.config = config
        self.human_color = None
        self.observers = []
        self.env = ReversiEnv().reset()
        self.model = self._load_model()
        self.ai = None  # type: ReversiPlayer
        self.last_evaluation = None
        self.last_history = None  # type: HistoryItem

    def add_observer(self, observer_func):
        self.observers.append(observer_func)

    def notify_all(self, event):
        for ob_func in self.observers:
            ob_func(event)

    def start_game(self, human_is_black):
        self.human_color = Player.black if human_is_black else Player.white
        self.env = ReversiEnv().reset()
        self.ai = ReversiPlayer(self.config, self.model)

    def play_next_turn(self):
        self.notify_all(GameEvent.update)

        if self.over:
            self.notify_all(GameEvent.over)
            return

        if self.next_player != self.human_color:
            self.notify_all(GameEvent.ai_move)

    @property
    def over(self):
        return self.env.done

    @property
    def next_player(self):
        return self.env.next_player

    def stone(self, px, py):
        """left top=(0, 0), right bottom=(7,7)"""
        pos = int(py * 8 + px)
        assert 0 <= pos < 64
        bit = 1 << pos
        if self.env.board.black & bit:
            return Player.black
        elif self.env.board.white & bit:
            return Player.white
        return None

    @property
    def number_of_black_and_white(self):
        return self.env.observation.number_of_black_and_white

    def available(self, px, py):
        pos = int(py * 8 + px)
        if pos < 0 or 64 <= pos:
            return False
        own, enemy = self.env.board.black, self.env.board.white
        if self.human_color == Player.white:
            own, enemy = enemy, own
        legal_moves = find_correct_moves(own, enemy)
        return legal_moves & (1 << pos)

    def move(self, px, py):
        pos = int(py * 8 + px)
        assert 0 <= pos < 64

        if self.next_player != self.human_color:
            return False

        self.env.step(pos)

    def _load_model(self):
        from reversi_zero.agent.model import ReversiModel
        model = ReversiModel(self.config)
        if self.config.play.use_newest_next_generation_model:
            loaded = reload_newest_next_generation_model_if_changed(
                model) or load_best_model_weight(model)
        else:
            loaded = load_best_model_weight(
                model) or reload_newest_next_generation_model_if_changed(model)
        if not loaded:
            raise RuntimeError("No models found!")
        return model

    def move_by_ai(self):
        if self.next_player == self.human_color:
            return False

        own, enemy = self.get_state_of_next_player()
        action = self.ai.action(own, enemy)
        self.env.step(action)

        self.last_history = self.ai.ask_thought_about(own, enemy)
        self.last_evaluation = self.last_history.values[
            self.last_history.action]
        logger.debug(f"evaluation by ai={self.last_evaluation}")

    def get_state_of_next_player(self):
        if self.next_player == Player.black:
            own, enemy = self.env.board.black, self.env.board.white
        else:
            own, enemy = self.env.board.white, self.env.board.black
        return own, enemy