def play_game(self, best_model, ng_model):
        env = ReversiEnv().reset()

        best_player = ReversiPlayer(self.config,
                                    best_model,
                                    play_config=self.config.eval.play_config)
        ng_player = ReversiPlayer(self.config,
                                  ng_model,
                                  play_config=self.config.eval.play_config)
        best_is_black = random() < 0.5
        if best_is_black:
            black, white = best_player, ng_player
        else:
            black, white = ng_player, best_player

        observation = env.observation
        while not env.done:
            if env.next_player == Player.black:
                action = black.action(observation.black, observation.white)
            else:
                action = white.action(observation.white, observation.black)
            observation, info = env.step(action)

        ng_win = None
        if env.winner == Winner.black:
            if best_is_black:
                ng_win = 0
            else:
                ng_win = 1
        elif env.winner == Winner.white:
            if best_is_black:
                ng_win = 1
            else:
                ng_win = 0
        return ng_win, best_is_black, observation.number_of_black_and_white
 def __init__(self, config: Config, model_dir):
     self.config = config
     self.human_color = None
     self.observers = []
     self.env = ReversiEnv().reset()
     self.model = self._load_model(model_dir)
     self.ai = None  # type: EvaluatePlayer
     self.ai_confidence = None
Exemple #3
0
 def __init__(self, config: Config):
     self.config = config
     self.human_color = None
     self.observers = []
     self.env = ReversiEnv().reset()
     self.model = self._load_model()
     self.ai = None  # type: ReversiPlayer
     self.last_evaluation = None
     self.last_history = None  # type: HistoryItem
Exemple #4
0
 async def start_search_my_move(self, own, enemy):
     self.running_simulation_num += 1
     root_key = self.counter_key(ReversiEnv().update(own, enemy, Player.black))
     with await self.sem:  # reduce parallel search number
         env = ReversiEnv().update(own, enemy, Player.black)
         leaf_v = await self.search_my_move(env, is_root_node=True)
         self.running_simulation_num -= 1
         if self.callback_in_mtcs and self.callback_in_mtcs.per_sim > 0 and \
                 self.running_simulation_num % self.callback_in_mtcs.per_sim == 0:
             self.callback_in_mtcs.callback(list(self.var_q[root_key]), list(self.var_n[root_key]))
         return leaf_v
    def start_game(self, human_is_black):
        self.human_color = Player.black if human_is_black else Player.white
        self.env = ReversiEnv().reset()

        def make_sim_env_fn():
            return self.env.copy()

        self.ai = EvaluatePlayer(make_sim_env_fn=make_sim_env_fn,
                                 config=self.config,
                                 model=self.model)
        self.ai.prepare(self.env, dir_noise=False)
        self.ai_confidence = None
Exemple #6
0
 def __init__(self, config: Config):
     self.config = config
     self.reader = NonBlockingStreamReader(sys.stdin)
     self.handler = NBoardProtocolVersion2(config, self)
     self.running = False
     self.nc = self.config.nboard  # shorcut
     #
     self.env = ReversiEnv().reset()
     self.model = load_model(self.config)
     self.play_config = self.config.play
     self.player = self.create_player()
     self.turn_of_nboard = None
    def action(self, own, enemy):
        """

        :param own: BitBoard
        :param enemy:  BitBoard
        :return: action: move pos=0 ~ 63 (0=top left, 7 top right, 63 bottom right)
        """
        env = ReversiEnv().update(own, enemy, Player.black)
        key = self.counter_key(env)

        for tl in range(self.play_config.thinking_loop):
            if tl > 0 and self.play_config.logging_thinking:
                logger.debug(
                    f"continue thinking: policy move=({action % 8}, {action // 8}), "
                    f"value move=({action_by_value % 8}, {action_by_value // 8})"
                )
            self.search_moves(own, enemy)
            policy = self.calc_policy(own, enemy)
            action = int(np.random.choice(range(64), p=policy))
            action_by_value = int(
                np.argmax(self.var_q[key] + (self.var_n[key] > 0) * 100))
            if action == action_by_value or env.turn < self.play_config.change_tau_turn:
                break

        self.moves.append([(own, enemy), list(policy)])
        # this is for play_gui, not necessary when training.
        self.thinking_history[(own,
                               enemy)] = HistoryItem(action, policy,
                                                     list(self.var_q[key]),
                                                     list(self.var_n[key]))

        return action
 async def start_search_my_move(self, own, enemy):
     self.running_simulation_num += 1
     with await self.sem:  # reduce parallel search number
         env = ReversiEnv().update(own, enemy, Player.black)
         leaf_v = await self.search_my_move(env, is_root_node=True)
         self.running_simulation_num -= 1
         return leaf_v
Exemple #9
0
    def action(self, own, enemy):
        """

        :param own: BitBoard
        :param enemy:  BitBoard
        :return: action: move pos=0 ~ 63 (0=top left, 7 top right, 63 bottom right)
        """
        env = ReversiEnv().update(own, enemy, Player.black)
        key = self.counter_key(env)

        for tl in range(self.play_config.thinking_loop):
            if tl > 0 and self.play_config.logging_thinking:
                logger.debug(f"continue thinking: policy move=({action % 8}, {action // 8}), "
                             f"value move=({action_by_value % 8}, {action_by_value // 8})")
            self.search_moves(own, enemy)
            policy = self.calc_policy(own, enemy)
            action = int(np.random.choice(range(64), p=policy))
            action_by_value = int(np.argmax(self.var_q[key] + (self.var_n[key] > 0)*100))
            if action == action_by_value or env.turn < self.play_config.change_tau_turn or env.turn <= 1:
                break

        # this is for play_gui, not necessary when training.
        next_key = self.get_next_key(own, enemy, action)
        self.thinking_history[(own, enemy)] = HistoryItem(action, policy, list(self.var_q[key]), list(self.var_n[key]),
                                                          list(self.var_q[next_key]), list(self.var_n[next_key]))

        if self.play_config.resign_threshold is not None and \
                        np.max(self.var_q[key] - (self.var_n[key] == 0)*10) <= self.play_config.resign_threshold:
            self.resigned = True
            if self.enable_resign:
                return None  # means resign

        saved_policy = self.calc_policy_by_tau_1(key) if self.config.play_data.save_policy_of_tau_1 else policy
        self.moves.append([(own, enemy), list(saved_policy)])
        return action
Exemple #10
0
    def action_with_evaluation(self, own, enemy, callback_in_mtcs=None):
        """

        :param own: BitBoard
        :param enemy:  BitBoard
        :param CallbackInMCTS callback_in_mtcs:
        :rtype: ActionWithEvaluation
        :return ActionWithEvaluation(
                    action=move pos=0 ~ 224 (0=top left, 14 top right, 224 bottom right),
                    n=N of the action,
                    q=W/N of the action,
                )
        """
        env = ReversiEnv().update(own, enemy, Player.black)
        key = self.counter_key(env)
        self.callback_in_mtcs = callback_in_mtcs
        pc = self.play_config

        if pc.use_solver_turn and env.turn >= pc.use_solver_turn:
            ret = self.action_by_searching(key)
            if ret:  # not save move as play data
                return ret

        for tl in range(self.play_config.thinking_loop):
            if env.turn > 0:
                self.search_moves(own, enemy)
            else:
                self.bypass_first_move(key)

            policy = self.calc_policy(own, enemy)
            action = int(np.random.choice(range(225), p=policy))
            action_by_value = int(np.argmax(self.var_q(key) + (self.var_n[key] > 0)*100))
            value_diff = self.var_q(key)[action] - self.var_q(key)[action_by_value]

            if env.turn <= pc.start_rethinking_turn or self.requested_stop_thinking or \
                    (value_diff > -0.01 and self.var_n[key][action] >= pc.required_visit_to_decide_action):
                break

        # this is for play_gui, not necessary when training.
        self.update_thinking_history(own, enemy, action, policy)

        if self.play_config.resign_threshold is not None and\
                        np.max(self.var_q(key) - (self.var_n[key] == 0)*10) <= self.play_config.resign_threshold:
            self.resigned = True
            if self.enable_resign:
                if env.turn >= self.config.play.allowed_resign_turn:
                    return ActionWithEvaluation(None, 0, 0)  # means resign
                else:
                    logger.debug(f"Want to resign but disallowed turn {env.turn} < {self.config.play.allowed_resign_turn}")

        saved_policy = self.calc_policy_by_tau_1(key) if self.config.play_data.save_policy_of_tau_1 else policy
        self.add_data_to_move_buffer_with_8_symmetries(own, enemy, saved_policy)
        return ActionWithEvaluation(action=action, n=self.var_n[key][action], q=self.var_q(key)[action])
    def play_game(self, model_1, model_2):
        env = ReversiEnv().reset()

        def make_sim_env_fn():
            return env.copy()

        p1 = EvaluatePlayer(make_sim_env_fn=make_sim_env_fn, config=self.config,
                            model=model_1, play_config=self.config.eval.play_config)
        p1.prepare(env, dir_noise=False)

        p2 = EvaluatePlayer(make_sim_env_fn=make_sim_env_fn, config=self.config,
                            model=model_2, play_config=self.config.eval.play_config)
        p2.prepare(env, dir_noise=False)

        p1_is_black = random() < 0.5
        if p1_is_black:
            black, white = p1, p2
        else:
            black, white = p2, p1

        while not env.done:
            if env.next_player == Player.black:
                action, _, _ = black.think()
            else:
                action, _, _ = white.think()

            env.step(action)

            black.play(action, env)
            white.play(action, env)

        if env.black_wins:
            p1_win = p1_is_black
        elif env.black_loses:
            p1_win = not p1_is_black
        else:
            p1_win = None

        return p1_win
Exemple #12
0
    async def search_my_move(self, env: ReversiEnv, is_root_node=False):
        """

        Q, V is value for this Player(always black).
        P is value for the player of next_player (black or white)
        :param env:
        :param is_root_node:
        :return:
        """
        if env.done:
            if env.winner == Winner.black:
                return 1
            elif env.winner == Winner.white:
                return -1
            else:
                return 0

        key = self.counter_key(env)
        another_side_key = self.another_side_counter_key(env)

        while key in self.now_expanding:
            await asyncio.sleep(self.config.play.wait_for_expanding_sleep_sec)

        # is leaf?
        if key not in self.expanded:  # reach leaf node
            leaf_v = await self.expand_and_evaluate(env)
            if env.next_player == Player.black:
                return leaf_v  # Value for black
            else:
                return -leaf_v  # Value for white == -Value for black

        virtual_loss = self.config.play.virtual_loss
        virtual_loss_for_w = virtual_loss if env.next_player == Player.black else -virtual_loss

        action_t = self.select_action_q_and_u(env, is_root_node)
        _, _ = env.step(action_t)

        self.var_n[key][action_t] += virtual_loss
        self.var_w[key][action_t] -= virtual_loss_for_w
        leaf_v = await self.search_my_move(env)  # next move

        # on returning search path
        # update: N, W
        self.var_n[key][action_t] += - virtual_loss + 1
        self.var_w[key][action_t] += virtual_loss_for_w + leaf_v
        # update another side info(flip color and player)
        self.var_n[another_side_key][action_t] += 1
        self.var_w[another_side_key][action_t] -= leaf_v  # must flip the sign.
        return leaf_v
 def solve(self, black, white, next_player, timeout=30, exactly=False):
     self.timeout = timeout
     self.start_time = time()
     if not self.last_is_exactly and exactly:
         self.cache = {}
     self.last_is_exactly = exactly
     
     try:
         # logger.debug("start resolving")
         move, score = self.find_winning_move_and_score(ReversiEnv().update(black, white, next_player),
                                                        exactly=exactly)
         if next_player == Player.white:
             score = -score
         # logger.debug(f"solve answer=({move},{score})({time()-self.start_time:.3f} seconds)")
         return move, score
     except Timeout:
         return None, None
    async def search_my_move(self, env: ReversiEnv, is_root_node=False):
        """

        Q, V is value for this Player(always black).
        P is value for the player of next_player (black or white)
        :param env:
        :param is_root_node:
        :return:
        """
        if env.done:
            if env.winner == Winner.black:
                return 1
            elif env.winner == Winner.white:
                return -1
            else:
                return 0

        key = self.counter_key(env)

        while key in self.now_expanding:
            await asyncio.sleep(self.config.play.wait_for_expanding_sleep_sec)

        # is leaf?
        if key not in self.expanded:  # reach leaf node
            leaf_v = await self.expand_and_evaluate(env)
            if env.next_player == Player.black:
                return leaf_v  # Value for black
            else:
                return -leaf_v  # Value for white == -Value for black

        action_t = self.select_action_q_and_u(env, is_root_node)
        _, _ = env.step(action_t)

        virtual_loss = self.config.play.virtual_loss
        self.var_n[key][action_t] += virtual_loss
        self.var_w[key][action_t] -= virtual_loss
        leaf_v = await self.search_my_move(env)  # next move

        # on returning search path
        # update: N, W, Q, U
        n = self.var_n[key][
            action_t] = self.var_n[key][action_t] - virtual_loss + 1
        w = self.var_w[key][
            action_t] = self.var_w[key][action_t] + virtual_loss + leaf_v
        self.var_q[key][action_t] = w / n
        return leaf_v
Exemple #15
0
    def calc_policy(self, own, enemy):
        """calc π(a|s0)

        :param own:
        :param enemy:
        :return:
        """
        pc = self.play_config
        env = ReversiEnv().update(own, enemy, Player.black)
        key = self.counter_key(env)
        if env.turn < pc.change_tau_turn:
            return self.calc_policy_by_tau_1(key)
        else:
            action = np.argmax(self.var_n[key])  # tau = 0
            ret = np.zeros(64)
            ret[action] = 1
            return ret
    def action(self, own, enemy):
        """

        :param own: BitBoard
        :param enemy:  BitBoard
        :return: action: move pos=0 ~ 63 (0=top left, 7 top right, 63 bottom right)
        """
        self.search_moves(own, enemy)
        policy = self.calc_policy(own, enemy)
        self.moves.append([(own, enemy), list(policy)])
        action = int(np.random.choice(range(64), p=policy))

        # this is for play_gui, not necessary when training.
        env = ReversiEnv().update(own, enemy, Player.black)
        key = self.counter_key(env)
        self.thinking_history[(own, enemy)] = HistoryItem(action, policy, list(self.var_q[key]), list(self.var_n[key]))

        return action
    def find_winning_move_and_score(self, env: ReversiEnv, exactly=True):
        if env.done:
            b, w = env.board.number_of_black_and_white
            return None, b - w
        if time() - self.start_time > self.timeout:
            logger.debug("timeout!")
            raise Timeout()

        turn = env.turn
        key = black, white, next_player = env.board.black, env.board.white, env.next_player
        if key in self.cache:
            return self.cache[key]

        if next_player == Player.black:
            legal_moves = find_correct_moves(black, white)
        else:
            legal_moves = find_correct_moves(white, black)

        action_list = [idx for idx in range(225) if legal_moves & (1 << idx)]
        score_list = np.zeros(len(action_list), dtype=int)
        for i, action in enumerate(action_list):
            # env.update(black, white, next_player)
            env.board.black = black
            env.board.white = white
            env.next_player = next_player
            env.turn = turn
            env.done = False
            env.winner = None
            #
            env.step(action)
            _, score = self.find_winning_move_and_score(env, exactly=exactly)
            score_list[i] = score

            if not exactly:
                # do not need to find the best score move
                if next_player == Player.black and score > 0:
                    break
                elif next_player == Player.white and score < 0:
                    break

        # print(list(zip(action_list, score_list)))

        if next_player == Player.black:
            best_action = action_list[int(np.argmax(score_list))]
            best_score = np.max(score_list)
        else:
            best_action = action_list[int(np.argmin(score_list))]
            best_score = np.min(score_list)

        self.cache[key] = (best_action, best_score)
        return best_action, best_score
def start(config: Config):
    tf_util.set_session_config(per_process_gpu_memory_fraction=0.3)
    api_server = MultiProcessReversiModelAPIServer(config)
    process_num = config.play_data.multi_process_num
    api_server.start_serve()

    with Manager() as manager:
        shared_var = SharedVar(
            manager,
            game_idx=read_as_int(config.resource.self_play_game_idx_file) or 0)
        with ProcessPoolExecutor(max_workers=process_num) as executor:
            futures = []
            for i in range(process_num):
                play_worker = SelfPlayWorker(config,
                                             env=ReversiEnv(),
                                             api=api_server.get_api_client(),
                                             shared_var=shared_var,
                                             worker_index=i)
                futures.append(executor.submit(play_worker.start))
Exemple #19
0
def start(config: Config):
    tf_util.set_session_config(allow_growth=True)
    # api_server_list = [MultiProcessReversiModelAPIServer(config) for i in range(config.model.num_gpus)]
    api_server = MultiProcessReversiModelAPIServer(config)
    api_server.start_serve()
    process_num = config.play_data.multi_process_num
    # for i in range(config.model.num_gpus):
    #     api_server_list[i].start_serve(i)
    #     print(f'Create server on GPU#{i}')

    with Manager() as manager:
        shared_var = SharedVar(
            manager,
            game_idx=read_as_int(config.resource.self_play_game_idx_file) or 0)
        with ProcessPoolExecutor(max_workers=process_num) as executor:
            futures = []
            for i in range(process_num):
                play_worker = SelfPlayWorker(config,
                                             env=ReversiEnv(),
                                             api=api_server.get_api_client(),
                                             shared_var=shared_var,
                                             worker_index=i)
                futures.append(executor.submit(play_worker.start))
Exemple #20
0
    def generate_train_data(self, batch_size):
        env = ReversiEnv()
        # The AZ paper doesn't leverage the symmetric observation data augmentation. But it is nice to use it if we can.
        symmetric_n = env.rotate_flip_op_count

        while True:
            orig_data_size = self.dataset.size
            data_size = orig_data_size * symmetric_n if symmetric_n > 1 else orig_data_size

            x, lm, y1, y2 = [], [], [], []
            for _ in range(batch_size):
                n = randint(0, data_size - 1)
                orig_n = n // symmetric_n if symmetric_n > 1 else n

                file_name, offset = self.dataset.locate(orig_n)

                state, policy, legal_moves, z = self.loaded_data[file_name][offset]
                state = env.decompress_ob(state)

                if symmetric_n > 1:
                    op = n % symmetric_n
                    state = env.rotate_flip_ob(state, op)
                    policy = env.rotate_flip_pi(policy, op)
                    legal_moves = env.rotate_flip_pi(legal_moves, op)

                state = np.transpose(state, [1, 2, 0])
                x.append(state)
                lm.append(legal_moves)
                y1.append(policy)
                y2.append([z])

            x = np.asarray(x)
            lm = np.asarray(lm)
            y1 = np.asarray(y1)
            y2 = np.asarray(y2)
            yield x, lm, y1, y2
Exemple #21
0
class PlayWithHuman:
    def __init__(self, config: Config):
        self.config = config
        self.human_color = None
        self.observers = []
        self.env = ReversiEnv().reset()
        self.model = self._load_model()
        self.ai = None  # type: ReversiPlayer
        self.last_evaluation = None
        self.last_history = None  # type: HistoryItem

    def add_observer(self, observer_func):
        self.observers.append(observer_func)

    def notify_all(self, event):
        for ob_func in self.observers:
            ob_func(event)

    def start_game(self, human_is_black):
        self.human_color = Player.black if human_is_black else Player.white
        self.env = ReversiEnv().reset()
        self.ai = ReversiPlayer(self.config, self.model)

    def play_next_turn(self):
        self.notify_all(GameEvent.update)

        if self.over:
            self.notify_all(GameEvent.over)
            return

        if self.next_player != self.human_color:
            self.notify_all(GameEvent.ai_move)

    @property
    def over(self):
        return self.env.done

    @property
    def next_player(self):
        return self.env.next_player

    def stone(self, px, py):
        """left top=(0, 0), right bottom=(7,7)"""
        pos = int(py * 8 + px)
        assert 0 <= pos < 64
        bit = 1 << pos
        if self.env.board.black & bit:
            return Player.black
        elif self.env.board.white & bit:
            return Player.white
        return None

    @property
    def number_of_black_and_white(self):
        return self.env.observation.number_of_black_and_white

    def available(self, px, py):
        pos = int(py * 8 + px)
        if pos < 0 or 64 <= pos:
            return False
        own, enemy = self.env.board.black, self.env.board.white
        if self.human_color == Player.white:
            own, enemy = enemy, own
        legal_moves = find_correct_moves(own, enemy)
        return legal_moves & (1 << pos)

    def move(self, px, py):
        pos = int(py * 8 + px)
        assert 0 <= pos < 64

        if self.next_player != self.human_color:
            return False

        self.env.step(pos)

    def _load_model(self):
        from reversi_zero.agent.model import ReversiModel
        model = ReversiModel(self.config)
        if self.config.play.use_newest_next_generation_model:
            loaded = reload_newest_next_generation_model_if_changed(
                model) or load_best_model_weight(model)
        else:
            loaded = load_best_model_weight(
                model) or reload_newest_next_generation_model_if_changed(model)
        if not loaded:
            raise RuntimeError("No models found!")
        return model

    def move_by_ai(self):
        if self.next_player == self.human_color:
            return False

        own, enemy = self.get_state_of_next_player()
        action = self.ai.action(own, enemy)
        self.env.step(action)

        self.last_history = self.ai.ask_thought_about(own, enemy)
        self.last_evaluation = self.last_history.values[
            self.last_history.action]
        logger.debug(f"evaluation by ai={self.last_evaluation}")

    def get_state_of_next_player(self):
        if self.next_player == Player.black:
            own, enemy = self.env.board.black, self.env.board.white
        else:
            own, enemy = self.env.board.white, self.env.board.black
        return own, enemy
Exemple #22
0
def start(config: Config):
    tf_util.set_session_config(per_process_gpu_memory_fraction=None, allow_growth=True)
    return SelfPlayWorker(config, env=ReversiEnv()).start()
Exemple #23
0
 def get_next_key(self, own, enemy, action):
     env = ReversiEnv().update(own, enemy, Player.black)
     env.step(action)
     return self.counter_key(env)
def start(config: Config, gpu_mem_frac=None):
    if gpu_mem_frac is not None:
        config.model.gpu_mem_frac = gpu_mem_frac

    return SelfPlayWorker(config, env=ReversiEnv()).start()
Exemple #25
0
class NBoardEngine:
    def __init__(self, config: Config):
        self.config = config
        self.reader = NonBlockingStreamReader(sys.stdin)
        self.handler = NBoardProtocolVersion2(config, self)
        self.running = False
        self.nc = self.config.nboard  # shorcut
        #
        self.env = ReversiEnv().reset()
        self.model = load_model(self.config)
        self.play_config = self.config.play
        self.player = self.create_player()
        self.turn_of_nboard = None

    def create_player(self):
        logger.debug("create new ReversiPlayer()")
        return ReversiPlayer(self.config,
                             self.model,
                             self.play_config,
                             enable_resign=False)

    def start(self):
        self.running = True
        self.reader.start(push_callback=self.push_callback)
        while self.running:
            message = self.reader.readline(self.nc.read_stdin_timeout)
            if message is None:
                continue
            message = message.strip()
            logger.debug(f"> {message}")
            self.handler.handle_message(message)

    def push_callback(self, message: str):
        # note: called in another thread
        if message.startswith("ping"):  # interupt
            self.stop_thinkng()

    def stop(self):
        self.running = False

    def reply(self, message):
        logger.debug(f"< {message}")
        sys.stdout.write(message + "\n")
        sys.stdout.flush()

    def stop_thinkng(self):
        self.player.stop_thinking()

    def set_depth(self, n):
        try:
            n = int(n)
            self.play_config.simulation_num_per_move = n * self.nc.simulation_num_per_depth_about
            logger.info(
                f"set simulation_num_per_move to {self.play_config.simulation_num_per_move}"
            )
        except ValueError:
            pass

    def reset_state(self):
        self.player = self.create_player()

    def set_game(self, game_state: GameState):
        self.env.reset()
        self.env.update(game_state.black, game_state.white, game_state.player)
        self.turn_of_nboard = game_state.player
        for action in game_state.actions:
            self._change_turn()
            if action is not None:
                self.env.step(action)

    def _change_turn(self):
        if self.turn_of_nboard:
            self.turn_of_nboard = Player.black if self.turn_of_nboard == Player.white else Player.white

    def move(self, action):
        self._change_turn()
        if action is not None:
            self.env.step(action)

    def go(self) -> GoResponse:
        if self.env.next_player != self.turn_of_nboard:
            return GoResponse(None, 0, 0)

        board = self.env.board
        if self.env.next_player == Player.black:
            states = (board.black, board.white)
        else:
            states = (board.white, board.black)
        start_time = time()
        action = self.player.action(*states)
        item = self.player.ask_thought_about(*states)
        evaluation = item.values[action]
        time_took = time() - start_time
        return GoResponse(action, evaluation, time_took)

    def hint(self, n_hint):
        """

        :param n_hint:
        """
        board = self.env.board
        if self.env.next_player == Player.black:
            states = (board.black, board.white)
        else:
            states = (board.white, board.black)

        def hint_report_callback(values, visits):
            hint_list = []
            for action, visit in list(
                    sorted(enumerate(visits), key=lambda x: -x[1]))[:n_hint]:
                if visit > 0:
                    hint_list.append(
                        HintResponse(action, values[action], visit))
            self.handler.report_hint(hint_list)

        callback_info = CallbackInMCTS(
            self.config.nboard.hint_callback_per_sim, hint_report_callback)
        self.player.action(*states, callback_in_mtcs=callback_info)
        item = self.player.ask_thought_about(*states)
        hint_report_callback(item.values, item.visit)
Exemple #26
0
class PlayWithHuman:
    def __init__(self, config: Config):
        self.config = config
        self.human_color = None
        self.observers = []
        self.env = ReversiEnv().reset()
        self.model = self._load_model()
        self.ai = None  # type: ReversiPlayer
        self.last_evaluation = None
        self.last_history = None  # type: HistoryItem

    def add_observer(self, observer_func):
        self.observers.append(observer_func)

    def notify_all(self, event):
        for ob_func in self.observers:
            ob_func(event)

    def start_game(self, human_is_black):
        self.human_color = Player.black if human_is_black else Player.white
        self.env = ReversiEnv().reset()
        self.ai = ReversiPlayer(self.config, self.model)

    def play_next_turn(self):
        self.notify_all(GameEvent.update)

        if self.over:
            self.notify_all(GameEvent.over)
            return

        if self.next_player != self.human_color:
            self.notify_all(GameEvent.ai_move)

    @property
    def over(self):
        return self.env.done

    @property
    def next_player(self):
        return self.env.next_player

    def stone(self, px, py):
        """left top=(0, 0), right bottom=(14,14)"""
        pos = int(py * 15 + px)
        assert 0 <= pos < 225
        bit = 1 << pos
        if self.env.board.black & bit:
            return Player.black
        elif self.env.board.white & bit:
            return Player.white
        return None

    @property
    def number_of_black_and_white(self):
        return self.env.observation.number_of_black_and_white

    def available(self, px, py):
        pos = int(py * 15 + px)
        if pos < 0 or 225 <= pos:
            return False
        own, enemy = self.env.board.black, self.env.board.white
        if self.human_color == Player.white:
            own, enemy = enemy, own
        legal_moves = find_correct_moves(own, enemy)
        return legal_moves & (1 << pos)

    def move(self, px, py):
        pos = int(py * 15 + px)
        assert 0 <= pos < 225

        if self.next_player != self.human_color:
            return False

        self.env.step(pos)

    def _load_model(self):
        return load_model(self.config)

    def move_by_ai(self):
        if self.next_player == self.human_color:
            return False

        own, enemy = self.get_state_of_next_player()
        action = self.ai.action(own, enemy)
        self.env.step(action)

        self.last_history = self.ai.ask_thought_about(own, enemy)
        self.last_evaluation = self.last_history.values[self.last_history.action]
        logger.debug(f"evaluation by ai={self.last_evaluation}")

    def get_state_of_next_player(self):
        if self.next_player == Player.black:
            own, enemy = self.env.board.black, self.env.board.white
        else:
            own, enemy = self.env.board.white, self.env.board.black
        return own, enemy
Exemple #27
0
 def start_game(self, human_is_black):
     self.human_color = Player.black if human_is_black else Player.white
     self.env = ReversiEnv().reset()
     self.ai = ReversiPlayer(self.config, self.model)
class PlayWithHuman:
    def __init__(self, config: Config, model_dir):
        self.config = config
        self.human_color = None
        self.observers = []
        self.env = ReversiEnv().reset()
        self.model = self._load_model(model_dir)
        self.ai = None  # type: EvaluatePlayer
        self.ai_confidence = None

    def add_observer(self, observer_func):
        self.observers.append(observer_func)

    def notify_all(self, event):
        for ob_func in self.observers:
            ob_func(event)

    def start_game(self, human_is_black):
        self.human_color = Player.black if human_is_black else Player.white
        self.env = ReversiEnv().reset()

        def make_sim_env_fn():
            return self.env.copy()

        self.ai = EvaluatePlayer(make_sim_env_fn=make_sim_env_fn,
                                 config=self.config,
                                 model=self.model)
        self.ai.prepare(self.env, dir_noise=False)
        self.ai_confidence = None

    def play_next_turn(self):
        self.notify_all(GameEvent.update)

        if self.over:
            self.notify_all(GameEvent.over)
            return

        if self.next_player != self.human_color:
            self.notify_all(GameEvent.ai_move)
        elif np.amax(self.env.legal_moves) == 0:
            # pass
            print('pass move')
            pos = 64
            self.env.step(pos)
            self.ai.play(pos, self.env)

    @property
    def over(self):
        return self.env.done

    @property
    def next_player(self):
        return self.env.next_player

    def stone(self, px, py):
        """left top=(0, 0), right bottom=(7,7)"""

        pos = int(py * 8 + px)
        assert 0 <= pos < 64
        bit = 1 << pos
        if self.env.board.black & bit:
            return Player.black
        elif self.env.board.white & bit:
            return Player.white
        return None

    @property
    def number_of_black_and_white(self):
        return self.env.board.number_of_black_and_white

    def available(self, px, py):
        pos = int(py * 8 + px)
        if pos < 0 or 64 <= pos:
            return False
        own, enemy = self.env.board.black, self.env.board.white
        if self.human_color == Player.white:
            own, enemy = enemy, own
        legal_moves = find_correct_moves(own, enemy)
        return legal_moves & (1 << pos)

    def move(self, px, py):
        pos = int(py * 8 + px)
        assert 0 <= pos < 64

        if self.next_player != self.human_color:
            raise Exception('not human\'s turn!')

        self.env.step(pos)

        self.ai.play(pos, self.env)

    def _load_model(self, model_dir):
        from reversi_zero.agent.model import ReversiModel
        model = ReversiModel(self.config)
        model.create_session()
        model.load(model_dir)

        return model

    def move_by_ai(self):
        if self.next_player == self.human_color:
            raise Exception('not AI\'s turn!')

        logger.info('start thinking...')
        action, _, vs = self.ai.think()
        self.ai_confidence = vs
        logger.info('end thinking...')
        self.env.step(action)
        self.ai.play(action, self.env)

    def get_state_of_next_player(self):
        if self.next_player == Player.black:
            own, enemy = self.env.board.black, self.env.board.white
        else:
            own, enemy = self.env.board.white, self.env.board.black
        return own, enemy
def start(config: Config):
    tf_util.set_session_config(per_process_gpu_memory_fraction=0.3)
    return SelfPlayWorker(config, env=ReversiEnv()).start()