コード例 #1
0
def mcts(payload):
    root = Node(None, None)
    my_id = (payload['role_id'] + 2) % 3

    next_id = (payload['role_id'] + 1) % 3
    next_next_id = (payload['role_id'] + 2) % 3
    my_card_ = payload['hand_card'][payload['role_id']]
    my_card_.sort()
    my_card = card_list_to_dict(card_to_list(change_card_form_reversal(my_card_)))
    next_card_ = payload['hand_card'][next_id]
    next_card_.sort()
    next_card = card_list_to_dict(card_to_list(change_card_form_reversal(next_card_)))
    next_next_card_ = payload['hand_card'][next_next_id]
    next_next_card_.sort()
    next_next_card = card_list_to_dict(card_to_list(change_card_form_reversal(next_next_card_)))
    last_move_, last_p_ = get_last_move(payload['role_id'], next_id, next_next_id, payload['last_taken'])
    last_move = change_card_form_reversal(last_move_)
    last_p = (last_p_ + 2) % 3
    moves_num = len(get_moves(my_card, last_move))
    state = State(my_id, my_card, next_card, next_next_card, last_move, -1, moves_num, None, last_p)
    root.set_state(state)

    computation_budget = 1000
    for i in range(computation_budget):
        expand_node = tree_policy(root, my_id)
        reward = default_policy(expand_node, my_id)
        backup(expand_node, reward)
    best_next_node = get_bestchild_(root)
    move = best_next_node.get_state().action

    return move
コード例 #2
0
ファイル: tree.py プロジェクト: yangmeiyi/doudizhu-rl
    def get_next_state_with_random_choice(self, untried_move):

        #  下家变自家,下下家变下家,自家变下下家
        valid_moves = get_moves(self.my_card, self.last_move)
        moves_num = len(valid_moves)
        i = np.random.choice(moves_num)
        tmp = valid_moves[i].copy()
        if untried_move is not None:
            tmp = untried_move
        while self.is_buchu(tmp) and self.last_pid == self.my_id:
            i = np.random.choice(moves_num)
            tmp = valid_moves[i].copy()
        move = []
        next_next_card = self.my_card.copy()
        for k in [str(i) for i in range(3, 14)] + ['1', '2', '14', '15']:
            move.extend([int(k)] * tmp.get(k, 0))
            next_next_card[k] -= tmp.get(k, 0)

        my_id = (self.my_id + 1) % 3
        my_card = self.next_card.copy()
        next_card = self.next_next_card.copy()
        #  判断出完牌游戏是否结束
        winner = self.my_id
        for lis in next_next_card.values():
            if lis != 0:
                winner = -1
                break
        last_move = move.copy()
        last_p = self.my_id
        #  如果选择不出, 下家的last_move等于自家的last_move
        if len(move) == 0:
            last_p = self.last_pid
            last_move = self.last_move.copy()
        if len(move) == 0 and self.last_pid == my_id:
            last_move = []
        valid_moves_ = get_moves(my_card, last_move)
        moves_num_ = len(valid_moves_)
        next_state = State(my_id, my_card, next_card, next_next_card,
                           last_move, winner, moves_num_, move, last_p)
        return next_state
コード例 #3
0
    def expand(self):
        if self.state.try_flag == 0:
            valid_moves = get_moves(self.state.my_card, self.state.last_move)
            for move in valid_moves:
                self.state.init_untried_actions(move)
            self.state.try_flag = 1

        moves_num = len(self.state.untried_actions)
        i = np.random.choice(moves_num)
        untried_move = self.state.untried_actions[i].copy()
        while self.state.is_buchu(untried_move) and self.state.last_pid == self.state.my_id:
            i = np.random.choice(moves_num)
            untried_move = self.state.untried_actions[i].copy()

        new_state = self.get_state().get_next_state_with_random_choice(untried_move)
        del self.state.untried_actions[i]
        sub_node = Node(self, new_state)
        self.add_child(sub_node)
        return sub_node
コード例 #4
0
    def choose(self, state):
        min_crads = min([sum(p.get_hand_card()) for p in self.game.players])
        if min_crads > 7:
            # 获得手牌
            hand_card = self.get_hand_card()
            # 拆牌器和引擎用了不同的编码 1 -> A, B -> *, R -> $
            trans_hand_card = [
                card_list[i] for i in range(15) for _ in range(hand_card[i])
            ]
            # 获得上家出牌
            last_move = [
                card_list[i] for i in range(15)
                for _ in range(state.last_move[i])
            ]
            # 拆牌
            D = Decomposer()
            combs, fine_mask = D.get_combinations(trans_hand_card, last_move)
            # 根据对手剩余最少牌数决定每多一手牌的惩罚
            left_crads = [sum(p.get_hand_card()) for p in self.game.players]
            min_oppo_crads = min(
                left_crads[1],
                left_crads[2]) if self.player_id == 0 else left_crads[0]
            round_penalty = 15 - 12 * min_oppo_crads / 20
            # 寻找最优出牌
            best_move = None
            best_comb = None
            max_value = -np.inf
            for i in range(len(combs)):
                # 手牌总分
                total_value = sum([cards_value[x] for x in combs[i]])
                small_num = 0
                for j in range(0, len(combs[i])):
                    if j > 0 and action_space[j][0] not in ["2", "R", "B"]:
                        small_num += 1
                total_value -= small_num * round_penalty
                for j in range(0, len(combs[i])):
                    # Pass 得分
                    if combs[i][j] == 0 and min_oppo_crads > 4:
                        if total_value > max_value:
                            max_value = total_value
                            best_comb = combs[i]
                            best_move = 0
                    # 出牌得分
                    elif combs[i][j] > 0 and (fine_mask is None
                                              or fine_mask[i, j] == True):
                        # 特判只有一手
                        if len(combs[i]) == 1 or len(
                                combs[i]) == 2 and combs[i][0] == 0:
                            max_value = np.inf
                            best_comb = combs[i]
                            best_move = combs[i][-1]
                        move_value = total_value - cards_value[
                            combs[i][j]] + round_penalty
                        if move_value > max_value:
                            max_value = move_value
                            best_comb = combs[i]
                            best_move = combs[i][j]
                if best_move is None:
                    best_comb = [0]
                    best_move = 0
            # 最优出牌
            best_cards = action_space[best_move]
            move = [best_cards.count(x) for x in card_list]
            # 输出选择的牌组
            # print("\nbest comb: ")
            # for m in best_comb:
            #     print(action_space[m], cards_value[m])
            # 输出 player i [手牌] // [出牌]
            print("Player {}".format(self.player_id),
                  ' ',
                  Card.visual_card(hand_card),
                  end=' // ')
            print(Card.visual_card(move), "From RuleBasedModel")
            return move, None

        #  start = time.time()
        #  定位current_node
        cards_out = self.game.cards_out
        length = len(cards_out)
        #  判断是否定位到current_node的flag
        flag = 0
        if self.new_game is False:
            #  前两步对手选择的move
            out1 = self.list_to_card(cards_out[length - 2][1])
            out2 = self.list_to_card(cards_out[length - 1][1])
            for child in self.current_node.get_children():
                if self.compare(child.state.action, out1):
                    self.current_node = child
                    flag = 1
                    break
            if flag == 1:
                for child in self.current_node.get_children():
                    if self.compare(child.state.action, out2):
                        self.current_node = child
                        flag = 2
                        break

        my_id = self.player_id
        if flag != 2:
            self.new_game = False
            root = Node(None, None)
            self.current_node = root

            #  下家id
            next_id = (my_id + 1) % 3
            #  下下家id
            next_next_id = (my_id + 2) % 3
            my_card = self.card_list_to_dict(self.get_hand_card())
            #  下家牌
            next_card = self.card_list_to_dict(
                self.game.players[next_id].get_hand_card())
            #  下下家牌
            next_next_card = self.card_list_to_dict(
                self.game.players[next_next_id].get_hand_card())
            last_move = self.trans_card(Card.visual_card(self.game.last_move))
            last_p = self.game.last_pid
            moves_num = len(get_moves(my_card, last_move))
            state = State(my_id, my_card, next_card, next_next_card, last_move,
                          -1, moves_num, None, last_p)
            self.current_node.set_state(state)

        #  搜索
        computation_budget = 2000
        for i in range(computation_budget):
            expand_node = tree_policy(self.current_node, my_id)
            reward = default_policy(expand_node, my_id)
            backup(expand_node, reward)
        best_next_node = get_bestchild(self.current_node, my_id)
        move = best_next_node.get_state().action
        self.current_node = best_next_node
        new_move = self.card_to_list(move)

        hand_card = []
        for i, n in enumerate(Card.all_card_name):
            hand_card.extend([n] * self.get_hand_card()[i])
        print("Player {}".format(self.player_id), ' ', hand_card, end=' // ')
        print(Card.visual_card(new_move), "From MctsModel")
        #  end = time.time()
        #  dur = end - start
        #  print('cost: {}'.format(dur))
        return new_move, None
コード例 #5
0
    def choose(self, state):
        start = time.time()
        #  定位current_node
        cards_out = self.game.cards_out
        length = len(cards_out)
        #  判断是否定位到current_node的flag
        flag = 0
        if length > 2:
            #  前两步对手选择的move
            out1 = self.list_to_card(cards_out[length - 2][1])
            out2 = self.list_to_card(cards_out[length - 1][1])
            for child in self.current_node.get_children():
                if self.compare(child.state.action, out1):
                    self.current_node = child
                    flag = 1
                    break
            if flag == 1:
                for child in self.current_node.get_children():
                    if self.compare(child.state.action, out2):
                        self.current_node = child
                        flag = 2
                        break

        my_id = self.player_id
        if flag != 2:
            root = Node(None, None)
            self.current_node = root

            #  下家id
            next_id = (my_id + 1) % 3
            #  下下家id
            next_next_id = (my_id + 2) % 3
            my_card = self.card_list_to_dict(self.get_hand_card())
            #  下家牌
            next_card = self.card_list_to_dict(
                self.game.players[next_id].get_hand_card())
            #  下下家牌
            next_next_card = self.card_list_to_dict(
                self.game.players[next_next_id].get_hand_card())
            last_move = self.trans_card(Card.visual_card(self.game.last_move))
            last_p = self.game.last_pid
            moves_num = len(get_moves(my_card, last_move))
            state_ = State(my_id, my_card, next_card, next_next_card,
                           last_move, -1, moves_num, None, last_p)
            self.current_node.set_state(state_)

        #  搜索
        computation_budget = 2000
        for i in range(computation_budget):
            expand_node = tree_policy(self.current_node, my_id)
            reward = default_policy(expand_node, my_id)
            backup(expand_node, reward)
        best_next_node = get_bestchild_(self.current_node)
        move = best_next_node.get_state().action
        self.current_node = best_next_node
        new_move = self.card_to_list(move)

        hand_card = []
        for i, n in enumerate(Card.all_card_name):
            hand_card.extend([n] * self.get_hand_card()[i])
        print("Player {}".format(self.player_id), ' ', hand_card, end=' // ')
        print(Card.visual_card(new_move))
        end = time.time()
        dur = end - start
        #  print('cost: {}'.format(dur))
        return new_move, None