コード例 #1
0
def start(config: Config):
    PlayWithHumanConfig().update_play_config(config.play)
    gomoku_model = PlayWithHuman(config)

    while True:
        env = GomokuEnv().reset()
        human_is_black = random() < 0.5
        gomoku_model.start_game(human_is_black)

        while not env.done:
            if env.player_turn() == Player.black:
                if not human_is_black:
                    action = gomoku_model.move_by_ai(env)
                    print("IA moves to: " + str(action))
                else:
                    action = gomoku_model.move_by_human(env)
                    print("You move to: " + str(action))
            else:
                if human_is_black:
                    action = gomoku_model.move_by_ai(env)
                    print("IA moves to: " + str(action))
                else:
                    action = gomoku_model.move_by_human(env)
                    print("You move to: " + str(action))
            env.step(action)
            env.render()

        print("\nEnd of the game.")
        print("Game result:")
        if env.winner == Winner.white:
            print("X wins")
        elif env.winner == Winner.black:
            print("O wins")
        else:
            print("Game was a draw")
コード例 #2
0
    async def search_my_move(self, env: GomokuEnv, is_root_node=False):
        """

        Q, V is value for this Player(always white).
        P is value for the player of next_player (black or white)
        :param env:
        :param is_root_node:
        :return:
        """
        if env.done:
            if env.winner == Winner.white:
                return 1
            elif env.winner == Winner.black:
                return -1
            else:
                return 0

        key = self.counter_key(env)

        while key in self.now_expanding:
            await asyncio.sleep(self.config.play.wait_for_expanding_sleep_sec)

        # is leaf?
        if key not in self.expanded:  # reach leaf node
            leaf_v = await self.expand_and_evaluate(env)
            if env.player_turn() == Player.white:
                return leaf_v  # Value for white
            else:
                return -leaf_v  # Value for white == -Value for white

        action_t = self.select_action_q_and_u(env, is_root_node)
        _, _ = env.step(action_t)

        # back propagate the values upward the search tree
        virtual_loss = self.config.play.virtual_loss
        self.var_n[key][action_t] += virtual_loss
        self.var_w[key][action_t] -= virtual_loss
        leaf_v = await self.search_my_move(env)  # next move

        # on returning search path
        # update: N, W, Q, U
        if self.mem is not None:
            self.mem.update(key, action_t, leaf_v)
        n = self.var_n[key][
            action_t] = self.var_n[key][action_t] - virtual_loss + 1
        w = self.var_w[key][
            action_t] = self.var_w[key][action_t] + virtual_loss + leaf_v
        q = w / n
        if self.mem is not None:
            q = (1.0 - self.beta) * w / n + (self.beta) * self.mem.get_amaf_q(
                key, action_t)
        #self.var_q[key][action_t] = (1.0 - self.beta) * w / n + (self.beta) * self.mem.get_amaf_q(key, action_t)
        self.var_q[key][action_t] = q
        return leaf_v
コード例 #3
0
ファイル: optimize.py プロジェクト: harvzhang/gomoku-zero
    def convert_to_training_data(data):
        """
        Helper function to convert saved data to training data format
        :param data: format is SelfPlayWorker.buffer
        :return:
        """
        state_list = []
        policy_list = []
        z_list = []
        for state, policy, z in data:
            board = list(state)
            board = np.reshape(board, (8, 5))
            env = GomokuEnv().update(board, 0)

            black_ary, white_ary = env.black_and_white_plane()
            state = [black_ary, white_ary] if env.player_turn() == Player.black else [white_ary, black_ary]

            state_list.append(state)
            policy_list.append(policy)
            z_list.append(z)

        return np.array(state_list), np.array(policy_list), np.array(z_list)
コード例 #4
0
    def play_game(self, best_model, ng_model):
        ''' Plays a single game between the best model and candidate model'''

        env = GomokuEnv().reset()

        best_player = GomokuPlayer(self.config,
                                   best_model,
                                   play_config=self.config.eval.play_config)
        ng_player = GomokuPlayer(self.config,
                                 ng_model,
                                 play_config=self.config.eval.play_config)
        best_is_white = random() < 0.5
        if not best_is_white:
            black, white = best_player, ng_player
        else:
            black, white = ng_player, best_player

        env.reset()
        while not env.done:
            if env.player_turn() == Player.black:
                action = black.action(env.board, env.turn)
            else:
                action = white.action(env.board, env.turn)
            env.step(action)

        # record the winner
        ng_win = None
        if env.winner == Winner.white:
            if best_is_white:
                ng_win = 0
            else:
                ng_win = 1
        elif env.winner == Winner.black:
            if best_is_white:
                ng_win = 1
            else:
                ng_win = 0
        return ng_win, best_is_white