Esempio n. 1
0
    async def search_my_move(self, env: GomokuEnv, is_root_node=False):
        """

        Q, V is value for this Player(always white).
        P is value for the player of next_player (black or white)
        :param env:
        :param is_root_node:
        :return:
        """
        if env.done:
            if env.winner == Winner.white:
                return 1
            elif env.winner == Winner.black:
                return -1
            else:
                return 0

        key = self.counter_key(env)

        while key in self.now_expanding:
            await asyncio.sleep(self.config.play.wait_for_expanding_sleep_sec)

        # is leaf?
        if key not in self.expanded:  # reach leaf node
            leaf_v = await self.expand_and_evaluate(env)
            if env.player_turn() == Player.white:
                return leaf_v  # Value for white
            else:
                return -leaf_v  # Value for white == -Value for white

        action_t = self.select_action_q_and_u(env, is_root_node)
        _, _ = env.step(action_t)

        # back propagate the values upward the search tree
        virtual_loss = self.config.play.virtual_loss
        self.var_n[key][action_t] += virtual_loss
        self.var_w[key][action_t] -= virtual_loss
        leaf_v = await self.search_my_move(env)  # next move

        # on returning search path
        # update: N, W, Q, U
        if self.mem is not None:
            self.mem.update(key, action_t, leaf_v)
        n = self.var_n[key][
            action_t] = self.var_n[key][action_t] - virtual_loss + 1
        w = self.var_w[key][
            action_t] = self.var_w[key][action_t] + virtual_loss + leaf_v
        q = w / n
        if self.mem is not None:
            q = (1.0 - self.beta) * w / n + (self.beta) * self.mem.get_amaf_q(
                key, action_t)
        #self.var_q[key][action_t] = (1.0 - self.beta) * w / n + (self.beta) * self.mem.get_amaf_q(key, action_t)
        self.var_q[key][action_t] = q
        return leaf_v
Esempio n. 2
0
 async def start_search_my_move(self, board, turn):
     self.running_simulation_num += 1
     with await self.sem:  # reduce parallel search number
         env = GomokuEnv().update(board, turn)
         leaf_v = await self.search_my_move(env, is_root_node=True)
         self.running_simulation_num -= 1
         return leaf_v
Esempio n. 3
0
    def action(self, board, turn):

        env = GomokuEnv().update(board, turn)
        key = self.counter_key(env)

        for tl in range(self.play_config.thinking_loop):
            if tl > 0 and self.play_config.logging_thinking:
                logger.debug(
                    f"continue thinking: policy move=({action % 8}, {action // 8}), "
                    f"value move=({action_by_value % 8}, {action_by_value // 8})"
                )
            self.search_moves(board, turn)
            policy = self.calc_policy(board, turn)
            action = int(np.random.choice(range(self.labels_n), p=policy))
            action_by_value = int(
                np.argmax(self.var_q[key] + (self.var_n[key] > 0) * 100))
            if action == action_by_value or env.turn < self.play_config.change_tau_turn:
                break

        # this is for play_gui, not necessary when training.
        self.thinking_history[env.observation] = HistoryItem(
            action, policy, list(self.var_q[key]), list(self.var_n[key]))

        self.moves.append([env.observation, list(policy)])
        return action
Esempio n. 4
0
def start(config: Config):
    PlayWithHumanConfig().update_play_config(config.play)
    gomoku_model = PlayWithHuman(config)

    while True:
        env = GomokuEnv().reset()
        human_is_black = random() < 0.5
        gomoku_model.start_game(human_is_black)

        while not env.done:
            if env.player_turn() == Player.black:
                if not human_is_black:
                    action = gomoku_model.move_by_ai(env)
                    print("IA moves to: " + str(action))
                else:
                    action = gomoku_model.move_by_human(env)
                    print("You move to: " + str(action))
            else:
                if human_is_black:
                    action = gomoku_model.move_by_ai(env)
                    print("IA moves to: " + str(action))
                else:
                    action = gomoku_model.move_by_human(env)
                    print("You move to: " + str(action))
            env.step(action)
            env.render()

        print("\nEnd of the game.")
        print("Game result:")
        if env.winner == Winner.white:
            print("X wins")
        elif env.winner == Winner.black:
            print("O wins")
        else:
            print("Game was a draw")
Esempio n. 5
0
    def convert_to_training_data(data):
        """
        Helper function to convert saved data to training data format
        :param data: format is SelfPlayWorker.buffer
        :return:
        """
        state_list = []
        policy_list = []
        z_list = []
        for state, policy, z in data:
            board = list(state)
            board = np.reshape(board, (8, 5))
            env = GomokuEnv().update(board, 0)

            black_ary, white_ary = env.black_and_white_plane()
            state = [black_ary, white_ary] if env.player_turn() == Player.black else [white_ary, black_ary]

            state_list.append(state)
            policy_list.append(policy)
            z_list.append(z)

        return np.array(state_list), np.array(policy_list), np.array(z_list)
Esempio n. 6
0
 def calc_policy(self, board, turn):
     """compute π(a|s0)
     :return:
     """
     pc = self.play_config
     env = GomokuEnv().update(board, turn)
     key = self.counter_key(env)
     if env.turn < pc.change_tau_turn:
         return self.var_n[key] / np.sum(self.var_n[key])  # tau = 1
     else:
         action = np.argmax(self.var_n[key])  # tau = 0
         ret = np.zeros(self.labels_n)
         ret[action] = 1
         return ret
Esempio n. 7
0
    def play_game(self, best_model, ng_model):
        ''' Plays a single game between the best model and candidate model'''

        env = GomokuEnv().reset()

        best_player = GomokuPlayer(self.config,
                                   best_model,
                                   play_config=self.config.eval.play_config)
        ng_player = GomokuPlayer(self.config,
                                 ng_model,
                                 play_config=self.config.eval.play_config)
        best_is_white = random() < 0.5
        if not best_is_white:
            black, white = best_player, ng_player
        else:
            black, white = ng_player, best_player

        env.reset()
        while not env.done:
            if env.player_turn() == Player.black:
                action = black.action(env.board, env.turn)
            else:
                action = white.action(env.board, env.turn)
            env.step(action)

        # record the winner
        ng_win = None
        if env.winner == Winner.white:
            if best_is_white:
                ng_win = 0
            else:
                ng_win = 1
        elif env.winner == Winner.black:
            if best_is_white:
                ng_win = 1
            else:
                ng_win = 0
        return ng_win, best_is_white
Esempio n. 8
0
def start(config: Config):
    tf_util.set_session_config(per_process_gpu_memory_fraction=0.2)
    return SelfPlayWorker(config, env=GomokuEnv()).start()