Beispiel #1
0
 def choose(self, state):
     # 获得手牌
     hand_card = self.get_hand_card()
     # 拆牌器和引擎用了不同的编码 1 -> A, B -> *, R -> $
     trans_hand_card = [card_list[i] for i in range(15) for _ in range(hand_card[i])]
     # 获得上家出牌
     last_move = [card_list[i] for i in range(15) for _ in range(state.last_move[i])]
     # 拆牌
     D = Decomposer()
     combs, fine_mask = D.get_combinations(trans_hand_card, last_move)
     # 根据对手剩余最少牌数决定每多一手牌的惩罚
     left_crads = [sum(p.get_hand_card()) for p in self.game.players]
     min_oppo_crads = min(left_crads[1], left_crads[2]) if self.player_id == 0 else left_crads[0]
     round_penalty = 15 - 12 * min_oppo_crads / 20
     # 寻找最优出牌
     best_move = None
     best_comb = None
     max_value = -np.inf
     for i in range(len(combs)):
         # 手牌总分
         total_value = sum([cards_value[x] for x in combs[i]])
         small_num = 0
         for j in range(0, len(combs[i])):
             if j > 0 and action_space[j][0] not in ["2", "R", "B"]:
                 small_num += 1
         total_value -= small_num * round_penalty
         for j in range(0, len(combs[i])):
             # Pass 得分
             if combs[i][j] == 0 and min_oppo_crads > 4:
                 if total_value > max_value:
                     max_value = total_value
                     best_comb = combs[i]
                     best_move = 0
             # 出牌得分
             elif combs[i][j] > 0 and (fine_mask is None or fine_mask[i, j] == True):
                 # 特判只有一手
                 if len(combs[i]) == 1 or len(combs[i]) == 2 and combs[i][0] == 0:
                     max_value = np.inf
                     best_comb = combs[i]
                     best_move = combs[i][-1]
                 move_value = total_value - cards_value[combs[i][j]] + round_penalty
                 if move_value > max_value:
                     max_value = move_value
                     best_comb = combs[i]
                     best_move = combs[i][j]
         if best_move is None:
             best_comb = [0]
             best_move = 0
     # 最优出牌
     best_cards = action_space[best_move]
     move = [best_cards.count(x) for x in card_list]
     # 输出选择的牌组
     # print("\nbest comb: ")
     # for m in best_comb:
     #     print(action_space[m], cards_value[m])
     # 输出 player i [手牌] // [出牌]
     print("Player {}".format(self.player_id), ' ', Card.visual_card(hand_card), end=' // ')
     print(Card.visual_card(move))
     return move, None
Beispiel #2
0
    def choose(self, state):
        # state -(CNNModel)-> card_combs
        current_handcards = self.get_hand_card()
        series_cards_out = self.game.cards_out
        player_id = self.player_id
        # Process Public
        public_np = self.get_public_card()
        # calc binary np array state (15*4 bool)
        state = generate_game_state(current_handcards, public_np,
                                    series_cards_out, player_id)

        if self.player_id == 0:
            # Using the model trained on Landlord
            valid_moves = self.get_moves()
            # Load Trained Net
            net = resnet.resnetpokernet(num_classes=13707).to(device)
            net.load_state_dict(torch.load(args.model_path)["state_dict"])
            batch_state = np.expand_dims(state, axis=0)
            # batch_state_ = batch_state.to(device)
            batch_state_ = torch.from_numpy(batch_state).float().to(device)
            outputs = net(batch_state_)
            _, pred = torch.topk(outputs, 10)

            if isinstance(valid_moves, np.ndarray):
                pass
            else:
                valid_moves = np.array(valid_moves)

            for i in range(10):
                _, list_idx = label_int2str(pred[0][i].item())
                find_res = findByRow(valid_moves, np.array(list_idx))
                if len(find_res) == 1:
                    move = valid_moves[find_res]
                    hand_card = []
                    for j, n in enumerate(Card.all_card_name):
                        hand_card.extend([n] * self.get_hand_card()[j])
                    # player i [手牌] // [出牌]
                    print("Player {}".format(self.player_id), ' ', hand_card,
                          ' // ', Card.visual_card(move))
                    return move, None

            # Don't exist in top-10, return PASS
            # rand_i = np.random.choice(len(valid_moves))
            return [], None

        elif self.player_id == 1:
            # Using the model trained on Landlord_down
            # Would be loaded later
            pass
        elif self.player_id == 2:
            # Using the model trained on Landlord_up
            # Would be loaded later
            pass
        else:
            raise ValueError(
                'player_id should be among 0, 1, 2. Got: {}'.format())
        return []
 def choose(self, state):
     valid_moves = self.get_moves()
     hand_card = []
     for i, n in enumerate(Card.all_card_name):
         hand_card.extend([n] * self.get_hand_card()[i])
     i = np.random.choice(len(valid_moves))
     move = valid_moves[i]
     # player i [手牌] // [出牌]
     print("Player {}".format(self.player_id), ' ', hand_card, end=' // ')
     print(Card.visual_card(move))
     return move, None
Beispiel #4
0
    def choose(self, state):
        move_list = self.move_list
        res = self.model.choose_action(state_to_tensor(state), move_list)
        if os.path.exists(
                os.path.join(os.path.dirname(os.path.dirname(__file__)),
                             'test')):
            # player i [手牌] // [出牌]
            hand_card = []
            for i, n in enumerate(Card.all_card_name):
                hand_card.extend([n] * self.get_hand_card()[i])
            print("DQN Player {}".format(self.player_id), ' ', hand_card,
                  ' // ', Card.visual_card(res))

        return res, None
Beispiel #5
0
def test_is_quads():
    situations = [
        (
            [
                Card(14, "s"),
                Card(14, "h"),
                Card(14, "d"),
                Card(14, "c"),
                Card(10, "h"),
                Card(4, "h"),
                Card(10, "h"),
            ],
            (14, 10),
        ),
        (
            [
                Card(14, "s"),
                Card(13, "h"),
                Card(14, "d"),
                Card(14, "c"),
                Card(10, "h"),
                Card(4, "h"),
                Card(2, "h"),
            ],
            False,
        ),
    ]
    test_outcomes = [is_quads(hand) == outcome for hand, outcome in situations]
    assert all(test_outcomes)
Beispiel #6
0
def test_is_straight_flush():
    situations = [
        (
            [
                Card(14, "s"),
                Card(13, "h"),
                Card(12, "h"),
                Card(11, "h"),
                Card(10, "h"),
                Card(4, "h"),
                Card(2, "h"),
            ],
            False,
        ),
        (
            [
                Card(14, "h"),
                Card(13, "h"),
                Card(12, "h"),
                Card(11, "h"),
                Card(10, "h"),
                Card(4, "s"),
                Card(2, "s"),
            ],
            (1, 14),
        ),
    ]
    test_outcomes = [
        is_straight_flush(hand) == outcome for hand, outcome in situations
    ]
    assert all(test_outcomes)
Beispiel #7
0
def test_is_flush_various():
    situations = [
        (
            [
                Card(10, "h"),
                Card(10, "h"),
                Card(9, "h"),
                Card(5, "h"),
                Card(9, "h"),
                Card(4, "s"),
                Card(2, "s"),
            ],
            10,
        ),
        (
            [
                Card(10, "h"),
                Card(10, "h"),
                Card(9, "h"),
                Card(5, "h"),
                Card(9, "s"),
                Card(4, "s"),
                Card(2, "s"),
            ],
            False,
        ),
    ]

    test_outcomes = [is_flush(hand) == outcome for hand, outcome in situations]
    assert all(test_outcomes)
Beispiel #8
0
 def ranks_to_hand(ranks):
     return [Card(rank, "h") for rank in ranks]
Beispiel #9
0
def test_is_straight_not():
    hand = [14, 12, 10, 9, 8, 6, 2]
    hand = [Card(rank, "h") for rank in hand]
    assert not is_straight(hand)
Beispiel #10
0
def test_is_straight():
    hand = [14, 13, 12, 11, 10, 9, 8]
    hand = [Card(rank, "h") for rank in hand]
    assert is_straight(hand)
    def choose(self, state):
        start = time.time()
        #  定位current_node
        cards_out = self.game.cards_out
        length = len(cards_out)
        #  判断是否定位到current_node的flag
        flag = 0
        if length > 2:
            #  前两步对手选择的move
            out1 = self.list_to_card(cards_out[length - 2][1])
            out2 = self.list_to_card(cards_out[length - 1][1])
            for child in self.current_node.get_children():
                if self.compare(child.state.action, out1):
                    self.current_node = child
                    flag = 1
                    break
            if flag == 1:
                for child in self.current_node.get_children():
                    if self.compare(child.state.action, out2):
                        self.current_node = child
                        flag = 2
                        break

        my_id = self.player_id
        if flag != 2:
            root = Node(None, None)
            self.current_node = root

            #  下家id
            next_id = (my_id + 1) % 3
            #  下下家id
            next_next_id = (my_id + 2) % 3
            my_card = self.card_list_to_dict(self.get_hand_card())
            #  下家牌
            next_card = self.card_list_to_dict(
                self.game.players[next_id].get_hand_card())
            #  下下家牌
            next_next_card = self.card_list_to_dict(
                self.game.players[next_next_id].get_hand_card())
            last_move = self.trans_card(Card.visual_card(self.game.last_move))
            last_p = self.game.last_pid
            moves_num = len(get_moves(my_card, last_move))
            state_ = State(my_id, my_card, next_card, next_next_card,
                           last_move, -1, moves_num, None, last_p)
            self.current_node.set_state(state_)

        #  搜索
        computation_budget = 2000
        for i in range(computation_budget):
            expand_node = tree_policy(self.current_node, my_id)
            reward = default_policy(expand_node, my_id)
            backup(expand_node, reward)
        best_next_node = get_bestchild_(self.current_node)
        move = best_next_node.get_state().action
        self.current_node = best_next_node
        new_move = self.card_to_list(move)

        hand_card = []
        for i, n in enumerate(Card.all_card_name):
            hand_card.extend([n] * self.get_hand_card()[i])
        print("Player {}".format(self.player_id), ' ', hand_card, end=' // ')
        print(Card.visual_card(new_move))
        end = time.time()
        dur = end - start
        #  print('cost: {}'.format(dur))
        return new_move, None
Beispiel #12
0
    def choose(self, state):
        min_crads = min([sum(p.get_hand_card()) for p in self.game.players])
        if min_crads > 7:
            # 获得手牌
            hand_card = self.get_hand_card()
            # 拆牌器和引擎用了不同的编码 1 -> A, B -> *, R -> $
            trans_hand_card = [
                card_list[i] for i in range(15) for _ in range(hand_card[i])
            ]
            # 获得上家出牌
            last_move = [
                card_list[i] for i in range(15)
                for _ in range(state.last_move[i])
            ]
            # 拆牌
            D = Decomposer()
            combs, fine_mask = D.get_combinations(trans_hand_card, last_move)
            # 根据对手剩余最少牌数决定每多一手牌的惩罚
            left_crads = [sum(p.get_hand_card()) for p in self.game.players]
            min_oppo_crads = min(
                left_crads[1],
                left_crads[2]) if self.player_id == 0 else left_crads[0]
            round_penalty = 15 - 12 * min_oppo_crads / 20
            # 寻找最优出牌
            best_move = None
            best_comb = None
            max_value = -np.inf
            for i in range(len(combs)):
                # 手牌总分
                total_value = sum([cards_value[x] for x in combs[i]])
                small_num = 0
                for j in range(0, len(combs[i])):
                    if j > 0 and action_space[j][0] not in ["2", "R", "B"]:
                        small_num += 1
                total_value -= small_num * round_penalty
                for j in range(0, len(combs[i])):
                    # Pass 得分
                    if combs[i][j] == 0 and min_oppo_crads > 4:
                        if total_value > max_value:
                            max_value = total_value
                            best_comb = combs[i]
                            best_move = 0
                    # 出牌得分
                    elif combs[i][j] > 0 and (fine_mask is None
                                              or fine_mask[i, j] == True):
                        # 特判只有一手
                        if len(combs[i]) == 1 or len(
                                combs[i]) == 2 and combs[i][0] == 0:
                            max_value = np.inf
                            best_comb = combs[i]
                            best_move = combs[i][-1]
                        move_value = total_value - cards_value[
                            combs[i][j]] + round_penalty
                        if move_value > max_value:
                            max_value = move_value
                            best_comb = combs[i]
                            best_move = combs[i][j]
                if best_move is None:
                    best_comb = [0]
                    best_move = 0
            # 最优出牌
            best_cards = action_space[best_move]
            move = [best_cards.count(x) for x in card_list]
            # 输出选择的牌组
            # print("\nbest comb: ")
            # for m in best_comb:
            #     print(action_space[m], cards_value[m])
            # 输出 player i [手牌] // [出牌]
            print("Player {}".format(self.player_id),
                  ' ',
                  Card.visual_card(hand_card),
                  end=' // ')
            print(Card.visual_card(move), "From RuleBasedModel")
            return move, None

        #  start = time.time()
        #  定位current_node
        cards_out = self.game.cards_out
        length = len(cards_out)
        #  判断是否定位到current_node的flag
        flag = 0
        if self.new_game is False:
            #  前两步对手选择的move
            out1 = self.list_to_card(cards_out[length - 2][1])
            out2 = self.list_to_card(cards_out[length - 1][1])
            for child in self.current_node.get_children():
                if self.compare(child.state.action, out1):
                    self.current_node = child
                    flag = 1
                    break
            if flag == 1:
                for child in self.current_node.get_children():
                    if self.compare(child.state.action, out2):
                        self.current_node = child
                        flag = 2
                        break

        my_id = self.player_id
        if flag != 2:
            self.new_game = False
            root = Node(None, None)
            self.current_node = root

            #  下家id
            next_id = (my_id + 1) % 3
            #  下下家id
            next_next_id = (my_id + 2) % 3
            my_card = self.card_list_to_dict(self.get_hand_card())
            #  下家牌
            next_card = self.card_list_to_dict(
                self.game.players[next_id].get_hand_card())
            #  下下家牌
            next_next_card = self.card_list_to_dict(
                self.game.players[next_next_id].get_hand_card())
            last_move = self.trans_card(Card.visual_card(self.game.last_move))
            last_p = self.game.last_pid
            moves_num = len(get_moves(my_card, last_move))
            state = State(my_id, my_card, next_card, next_next_card, last_move,
                          -1, moves_num, None, last_p)
            self.current_node.set_state(state)

        #  搜索
        computation_budget = 2000
        for i in range(computation_budget):
            expand_node = tree_policy(self.current_node, my_id)
            reward = default_policy(expand_node, my_id)
            backup(expand_node, reward)
        best_next_node = get_bestchild(self.current_node, my_id)
        move = best_next_node.get_state().action
        self.current_node = best_next_node
        new_move = self.card_to_list(move)

        hand_card = []
        for i, n in enumerate(Card.all_card_name):
            hand_card.extend([n] * self.get_hand_card()[i])
        print("Player {}".format(self.player_id), ' ', hand_card, end=' // ')
        print(Card.visual_card(new_move), "From MctsModel")
        #  end = time.time()
        #  dur = end - start
        #  print('cost: {}'.format(dur))
        return new_move, None
    def choose(self, state):
        # 获得手牌
        hand_card = self.get_hand_card()
        # 拆牌器和引擎用了不同的编码 1 -> A, B -> *, R -> $
        trans_hand_card = [
            card_list[i] for i in range(15) for _ in range(hand_card[i])
        ]
        # 获得上家出牌
        last_move = [
            card_list[i] for i in range(15) for _ in range(state.last_move[i])
        ]
        # 拆牌
        D = Decomposer()
        combs, fine_mask = D.get_combinations(trans_hand_card, last_move)
        # 根据对手剩余最少牌数决定每多一手牌的惩罚
        left_crads = [sum(p.get_hand_card()) for p in self.game.players]
        min_oppo_crads = min(
            left_crads[1],
            left_crads[2]) if self.player_id == 0 else left_crads[0]
        round_penalty = 17 - 12 * min_oppo_crads / 20  # 惩罚值调整为与敌人最少手牌数负线性相关

        if not last_move:
            if self.player_id == 0:  #地主
                round_penalty += 7
            elif self.player_id == 1:  #地主下家
                round_penalty += 5
            else:  #地主上家
                round_penalty += 3

        if self.player_id == 2 and not last_move:  #队友没要地主牌
            round_penalty += 5
        if self.player_id == 1 and not last_move:  #地主没要队友牌
            round_penalty -= 8

        # 寻找最优出牌
        best_move = None
        max_value = -np.inf
        for i in range(len(combs)):
            # 手牌总分
            total_value = sum([cards_value[x] for x in combs[i]])
            # small_num = 0
            # for j in range(0, len(combs[i])):
            #     if j > 0 and action_space[j][0] not in ["2", "R", "B"]:
            #         small_num += 1
            # total_value -= small_num * round_penalty
            small_num = hand_card[-1] + hand_card[-2] + hand_card[-3]
            small_num = (len(combs[i]) - small_num
                         )  # 如果一手牌为小牌, 需要加上惩罚值, 所以要统计小牌数量
            total_value -= small_num * round_penalty

            # 手里有火箭和另一手牌
            if len(combs[i]) == 3 and combs[i][0] == 0 or len(combs[i]) == 2:
                if cards_value[combs[i][-1]] == 12 or cards_value[combs[i]
                                                                  [-2]] == 12:
                    print('*****rule  火箭直接走')
                    return [0] * 13 + [1, 1], None

            # 下家农民手里只有一张牌,送队友走
            if self.player_id == 1 and sum(self.game.players[2].get_hand_card(
            )) == 1 and not last_move:
                for i, j in enumerate(hand_card):
                    if j != 0:
                        tem = [0] * 15
                        tem[i] = 1
                        print('******rule  下家农民手里只有一张牌,送队友走')
                        return tem, None

            #队友出大牌能走就压
            if self.player_id == 2 and len(combs[i]) == 3 and combs[i][0] == 0:
                if action_space[combs[i][1]] in dapai and (
                        fine_mask is None or fine_mask[i, 1] == True):
                    print('******rule  队友出大牌能走就压')
                    best_move = combs[i][1]
                    break
                elif action_space[combs[i][2]] in dapai and (
                        fine_mask is None or fine_mask[i, 2] == True):
                    print('******rule  队友出大牌能走就压')
                    best_move = combs[i][2]
                    break

            # 队友出大牌走不了就不压
            if self.player_id == 2 and state.last_pid == 1 and sorted(
                    last_move) in dapai:
                print('******rule  队友出大牌走不了就不压')
                best_move = 0
                break

            for j in range(0, len(combs[i])):
                # Pass 得分
                if combs[i][j] == 0 and min_oppo_crads > 8:
                    if total_value > max_value:
                        max_value = total_value
                        best_move = 0
                        # print('pass得分',max_value,end='   //   ')
                # 出牌得分
                elif combs[i][j] > 0 and (fine_mask is None
                                          or fine_mask[i, j] == True
                                          ):  # 枚举非pass且fine_mask为True的出牌
                    # 特判只有一手
                    if len(combs[i]) == 1 or len(
                            combs[i]) == 2 and combs[i][0] == 0:
                        max_value = np.inf
                        best_move = combs[i][-1]
                        break

                    move_value = total_value - cards_value[combs[i]
                                                           [j]] + round_penalty

                    #手里有当前最大牌和另一手牌
                    if len(combs[i]) == 3 and combs[i][0] == 0 or len(
                            combs[i]) == 2:
                        if combs[i][j] > maxcard(
                                state.other_hand) and combs[i][j] <= 15:
                            move_value += 100

                    #地主只剩一张牌时别出单牌
                    if self.player_id != 0 and sum(
                            self.game.players[0].get_hand_card()) == 1:
                        if combs[i][j] <= maxcard(state.other_hand):
                            move_value -= 100

                    # 农民只剩一张牌时别出单牌
                    if self.player_id == 0 and (
                            sum(self.game.players[1].get_hand_card()) == 1
                            or sum(self.game.players[2].get_hand_card())) == 1:
                        if combs[i][j] <= maxcard(state.other_hand):
                            move_value -= 100

                    if move_value > max_value:
                        max_value = move_value
                        best_move = combs[i][j]
            if best_move is None:
                best_move = 0

        # 最优出牌
        best_cards = action_space[best_move]
        move = [best_cards.count(x) for x in card_list]
        # print('出牌得分', max_value)
        # 输出选择的牌组
        # print("\nbest comb: ")
        # for m in best_comb:
        #     print(action_space[m], cards_value[m])
        # 输出 player i [手牌] // [出牌]
        print("Player {}".format(self.player_id),
              ' ',
              Card.visual_card(hand_card),
              end=' // ')
        print(Card.visual_card(move))
        return move, None