def obtain_reward(hand_card_status, surround, *args):
        info = dict()
        put_card_status = surround.has_put_card_status
        last_player_role, last_card_type_struct, last_action = args[0], args[
            1], args[2]
        if len(args) != 3:
            last_player_role, last_card_type_struct, last_action = None, None, None
        primary_item = None
        if last_card_type_struct is not None:
            primary_item = last_card_type_struct.primary_item
        exist_card = list(
            map(lambda x: x[0],
                filter(lambda x: x[1] >= 1, enumerate(hand_card_status))))
        if isinstance(exist_card, int):
            exist_card = [exist_card]
        if primary_item is not None:
            exist_card = list(filter(lambda x: x > primary_item, exist_card))
            if isinstance(exist_card, int):
                exist_card = [exist_card]
        if len(exist_card) == 0:
            info['error'] = True
        else:
            rnd = random.randint(0, len(exist_card) - 1)
            put_card = exist_card[rnd]
            score = HandCardUtils.value_map(put_card, CardTypeEnum.CT_ONE, 1)
            hand_card_status[put_card] -= 1
            put_card_status[put_card] += 1
            info['put_card'] = [put_card]
            info['primary_item'] = put_card

        return score, info
Beispiel #2
0
 def _calc_put_card_seq_score(self, hand_card):
     ctj = CardTypeJudge()
     score = 0
     for item in hand_card:
         hand_card_status = HandCardUtils.obtain_hand_card_status(item)
         cts = ctj.judge_card_type(hand_card_status)
         one_hand_score = HandCardUtils.value_map(cts.primary_item,
                                                  cts.card_type,
                                                  cts.card_count)
         score += one_hand_score
     return score
Beispiel #3
0
    def step(self, action):
        score = 0
        ERR_CARD_SCORE = -1
        done = False
        if ENV_DEBUG:
            print('Action: {}'.format(action))
        # 单牌
        if action == ActionTypeEnum.ACTION_PUT_ONE.value:
            exist_card = list(
                map(
                    lambda x: x[0],
                    filter(lambda x: x[1] >= 1,
                           enumerate(self.hand_card_status))))
            if isinstance(exist_card, int):
                exist_card = [exist_card]
            rnd = random.randint(0, len(exist_card) - 1)
            put_card = exist_card[rnd]
            score = HandCardUtils.value_map(put_card, CardTypeEnum.CT_ONE, 1)
            self.observation[put_card] -= 1
            self.observation[18 + put_card] += 1
            self.hand_card_status[put_card] -= 1
            if ENV_DEBUG:
                print('Put card %s' % put_card)
        # 对子
        elif action == ActionTypeEnum.ACTION_PUT_DOU.value:
            exist_card = list(
                map(
                    lambda x: x[0],
                    filter(lambda x: x[1] == 2,
                           enumerate(self.hand_card_status))))
            if isinstance(exist_card, int):
                exist_card = [exist_card]
            if len(exist_card) == 0:
                score = ERR_CARD_SCORE
                done = True
                if ENV_DEBUG:
                    print('Can not accept the card')
            else:
                rnd = random.randint(0, len(exist_card) - 1)
                put_card = exist_card[rnd]
                score = HandCardUtils.value_map(put_card, CardTypeEnum.CT_DOU,
                                                2)
                self.observation[put_card] -= 2
                self.observation[18 + put_card] += 2
                self.hand_card_status[put_card] -= 2
                if ENV_DEBUG:
                    print('Put card %s,%s' % (put_card, put_card))
        # 三不带
        elif action == ActionTypeEnum.ACTION_PUT_THREE.value:
            exist_card = list(
                map(
                    lambda x: x[0],
                    filter(lambda x: x[1] == 3,
                           enumerate(self.hand_card_status))))
            if isinstance(exist_card, int):
                exist_card = [exist_card]
            if len(exist_card) == 0:
                score = ERR_CARD_SCORE
                done = True
                if ENV_DEBUG:
                    print('Can not accept the card')
            else:
                rnd = random.randint(0, len(exist_card) - 1)
                put_card = exist_card[rnd]
                score = HandCardUtils.value_map(put_card,
                                                CardTypeEnum.CT_THREE, 3)
                self.observation[put_card] -= 3
                self.observation[18 + put_card] += 3
                self.hand_card_status[put_card] -= 3
                if ENV_DEBUG:
                    print('Put card %s,%s,%s' % (put_card, put_card, put_card))
        # 三带一
        elif action == ActionTypeEnum.ACTION_PUT_THREE_ONE.value:
            exist_card = list(
                map(
                    lambda x: x[0],
                    filter(lambda x: x[1] == 3,
                           enumerate(self.hand_card_status))))
            if isinstance(exist_card, int):
                exist_card = [exist_card]
            if len(exist_card) == 0:
                score = ERR_CARD_SCORE
                done = True
                if ENV_DEBUG:
                    print('Can not accept the card')
            else:
                rnd = random.randint(0, len(exist_card) - 1)
                put_card = exist_card[rnd]
                other_exist_card = list(
                    map(
                        lambda x: x[0],
                        filter(lambda x: x[1] >= 1 and x[0] != put_card,
                               enumerate(self.hand_card_status))))
                if isinstance(other_exist_card, int):
                    other_exist_card = [other_exist_card]
                if len(other_exist_card) == 0:
                    score = ERR_CARD_SCORE
                    done = True
                    if ENV_DEBUG:
                        print('Can not accept the card')
                else:
                    rnd = random.randint(0, len(other_exist_card) - 1)
                    other_put_card = other_exist_card[rnd]
                    score = HandCardUtils.value_map(put_card,
                                                    CardTypeEnum.CT_THREE_ONE,
                                                    4)
                    self.observation[put_card] -= 3
                    self.observation[18 + put_card] += 3
                    self.hand_card_status[put_card] -= 3
                    self.observation[other_put_card] -= 1
                    self.observation[18 + other_put_card] += 1
                    self.hand_card_status[other_put_card] -= 1
                    if ENV_DEBUG:
                        print('Put card %s,%s,%s,%s' %
                              (put_card, put_card, put_card, other_put_card))
        # 三带一对
        elif action == ActionTypeEnum.ACTION_PUT_THREE_DOU.value:
            exist_card = list(
                map(
                    lambda x: x[0],
                    filter(lambda x: x[1] == 3,
                           enumerate(self.hand_card_status))))
            if isinstance(exist_card, int):
                exist_card = [exist_card]
            if len(exist_card) == 0:
                score = ERR_CARD_SCORE
                done = True
                if ENV_DEBUG:
                    print('Can not accept the card')
            else:
                rnd = random.randint(0, len(exist_card) - 1)
                put_card = exist_card[rnd]
                other_exist_card = list(
                    map(
                        lambda x: x[0],
                        filter(lambda x: x[1] >= 2 and x[0] != put_card,
                               enumerate(self.hand_card_status))))
                if isinstance(other_exist_card, int):
                    other_exist_card = [other_exist_card]
                if len(other_exist_card) == 0:
                    score = ERR_CARD_SCORE
                    done = True
                    if ENV_DEBUG:
                        print('Can not accept the card')
                else:
                    rnd = random.randint(0, len(other_exist_card) - 1)
                    other_put_card = other_exist_card[rnd]
                    score = HandCardUtils.value_map(put_card,
                                                    CardTypeEnum.CT_THREE_DOU,
                                                    5)
                    self.observation[put_card] -= 3
                    self.observation[18 + put_card] += 3
                    self.hand_card_status[put_card] -= 3
                    self.observation[other_put_card] -= 2
                    self.observation[18 + other_put_card] += 2
                    self.hand_card_status[other_put_card] -= 2
                    if ENV_DEBUG:
                        print('Put card %s,%s,%s,%s,%s' %
                              (put_card, put_card, put_card, other_put_card,
                               other_put_card))
        # 两连对
        elif action == ActionTypeEnum.ACTION_PUT_2_DOU.value:
            exist_card = HandCardUtils.find_even_pair(self.hand_card_status,
                                                      k=2)
            if len(exist_card) == 0:
                score = ERR_CARD_SCORE
                done = True
                if ENV_DEBUG:
                    print('Can not accept the card')
            else:
                score = HandCardUtils.value_map(exist_card[-1],
                                                CardTypeEnum.CT_DOU, 4)
                log_put = list()
                for put_card in exist_card:
                    self.observation[put_card] -= 2
                    self.observation[18 + put_card] += 2
                    self.hand_card_status[put_card] -= 2
                    log_put.extend([put_card, put_card])
                if ENV_DEBUG:
                    print('Put card %s,%s,%s,%s' % tuple(log_put))
        # 三连对
        elif action == ActionTypeEnum.ACTION_PUT_3_DOU.value:
            exist_card = HandCardUtils.find_even_pair(self.hand_card_status,
                                                      k=3)
            if len(exist_card) == 0:
                score = ERR_CARD_SCORE
                done = True
                if ENV_DEBUG:
                    print('Can not accept the card')
            else:
                score = HandCardUtils.value_map(exist_card[-1],
                                                CardTypeEnum.CT_DOU, 6)
                log_put = list()
                for put_card in exist_card:
                    self.observation[put_card] -= 2
                    self.observation[18 + put_card] += 2
                    self.hand_card_status[put_card] -= 2
                    log_put.extend([put_card, put_card])
                if ENV_DEBUG:
                    print('Put card %s,%s,%s,%s,%s,%s' % tuple(log_put))
        # 四连对
        elif action == ActionTypeEnum.ACTION_PUT_4_DOU.value:
            exist_card = HandCardUtils.find_even_pair(self.hand_card_status,
                                                      k=4)
            if len(exist_card) == 0:
                score = ERR_CARD_SCORE
                done = True
                if ENV_DEBUG:
                    print('Can not accept the card')
            else:
                score = HandCardUtils.value_map(exist_card[-1],
                                                CardTypeEnum.CT_DOU, 8)
                log_put = list()
                for put_card in exist_card:
                    self.observation[put_card] -= 2
                    self.observation[18 + put_card] += 2
                    self.hand_card_status[put_card] -= 2
                    log_put.extend([put_card, put_card])
                if ENV_DEBUG:
                    print('Put card %s,%s,%s,%s,%s,%s,%s,%s' % tuple(log_put))
        # 五连对
        elif action == ActionTypeEnum.ACTION_PUT_5_DOU.value:
            exist_card = HandCardUtils.find_even_pair(self.hand_card_status,
                                                      k=5)
            if len(exist_card) == 0:
                score = ERR_CARD_SCORE
                done = True
                if ENV_DEBUG:
                    print('Can not accept the card')
            else:
                score = HandCardUtils.value_map(exist_card[-1],
                                                CardTypeEnum.CT_DOU, 10)
                log_put = list()
                for put_card in exist_card:
                    self.observation[put_card] -= 2
                    self.observation[18 + put_card] += 2
                    self.hand_card_status[put_card] -= 2
                    log_put.extend([put_card, put_card])
                if ENV_DEBUG:
                    print('Put card %s,%s,%s,%s,%s,%s,%s,%s,%s,%s' %
                          tuple(log_put))
        # 两连三不带
        elif action == ActionTypeEnum.ACTION_PUT_2_THREE.value:
            exist_card = HandCardUtils.find_even_three(self.hand_card_status,
                                                       k=2)
            if len(exist_card) == 0:
                score = ERR_CARD_SCORE
                done = True
                if ENV_DEBUG:
                    print('Can not accept the card')
            else:
                score = HandCardUtils.value_map(exist_card[-1],
                                                CardTypeEnum.CT_THREE, 6)
                log_put = list()
                for put_card in exist_card:
                    self.observation[put_card] -= 3
                    self.observation[18 + put_card] += 3
                    self.hand_card_status[put_card] -= 3
                    log_put.extend([put_card, put_card, put_card])
                if ENV_DEBUG:
                    print('Put card %s,%s,%s,%s,%s,%s' % tuple(log_put))
        # 三连三不带
        elif action == ActionTypeEnum.ACTION_PUT_3_THREE.value:
            exist_card = HandCardUtils.find_even_three(self.hand_card_status,
                                                       k=3)
            if len(exist_card) == 0:
                score = ERR_CARD_SCORE
                done = True
                if ENV_DEBUG:
                    print('Can not accept the card')
            else:
                score = HandCardUtils.value_map(exist_card[-1],
                                                CardTypeEnum.CT_THREE, 9)
                log_put = list()
                for put_card in exist_card:
                    self.observation[put_card] -= 3
                    self.observation[18 + put_card] += 3
                    self.hand_card_status[put_card] -= 3
                    log_put.extend([put_card, put_card, put_card])
                if ENV_DEBUG:
                    print('Put card %s,%s,%s,%s,%s,%s,%s,%s,%s' %
                          tuple(log_put))
        # 两连三带一
        elif action == ActionTypeEnum.ACTION_PUT_2_THREE_ONE.value:
            K = 2
            exist_card = HandCardUtils.find_even_three(self.hand_card_status,
                                                       k=K)
            if len(exist_card) == 0:
                score = ERR_CARD_SCORE
                done = True
                if ENV_DEBUG:
                    print('Can not accept the card')
            else:
                other_card = list(
                    map(
                        lambda x: x[0],
                        filter(lambda x: x[0] not in exist_card,
                               enumerate(self.hand_card_status))))
                if len(other_card) < K:
                    score = ERR_CARD_SCORE
                    done = True
                    if ENV_DEBUG:
                        print('Can not accept the card')
                else:
                    tmp = list()
                    for _ in range(K):
                        rnd = random.randint(0, len(other_card) - 1)
                        tmp_card = other_card[rnd]
                        tmp.append(tmp_card)
                        self.observation[tmp_card] -= 1
                        self.observation[18 + tmp_card] += 1
                        self.hand_card_status[tmp_card] -= 1
                    score = HandCardUtils.value_map(exist_card[-1],
                                                    CardTypeEnum.CT_THREE_ONE,
                                                    3 * K + K)
                    log_put = list()
                    for put_card in exist_card:
                        self.observation[put_card] -= 3
                        self.observation[18 + put_card] += 3
                        self.hand_card_status[put_card] -= 3
                        log_put.extend([put_card, put_card, put_card])
                    log_put.extend(tmp)
                    if ENV_DEBUG:
                        print('Put card %s,%s,%s,%s,%s,%s,%s,%s' %
                              tuple(log_put))
        # 三连三带一
        elif action == ActionTypeEnum.ACTION_PUT_3_THREE_ONE.value:
            K = 3
            exist_card = HandCardUtils.find_even_three(self.hand_card_status,
                                                       k=K)
            if len(exist_card) == 0:
                score = ERR_CARD_SCORE
                done = True
                if ENV_DEBUG:
                    print('Can not accept the card')
            else:
                other_card = list(
                    map(
                        lambda x: x[0],
                        filter(lambda x: x[0] not in exist_card,
                               enumerate(self.hand_card_status))))
                if len(other_card) < K:
                    score = ERR_CARD_SCORE
                    done = True
                    if ENV_DEBUG:
                        print('Can not accept the card')
                else:
                    tmp = list()
                    for _ in range(K):
                        rnd = random.randint(0, len(other_card) - 1)
                        tmp_card = other_card[rnd]
                        tmp.append(tmp_card)
                        self.observation[tmp_card] -= 1
                        self.observation[18 + tmp_card] += 1
                        self.hand_card_status[tmp_card] -= 1
                    score = HandCardUtils.value_map(exist_card[-1],
                                                    CardTypeEnum.CT_THREE_ONE,
                                                    3 * K + K)
                    log_put = list()
                    for put_card in exist_card:
                        self.observation[put_card] -= 3
                        self.observation[18 + put_card] += 3
                        self.hand_card_status[put_card] -= 3
                        log_put.extend([put_card, put_card, put_card])
                    log_put.extend(tmp)
                    if ENV_DEBUG:
                        print('Put card %s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s' %
                              tuple(log_put))
        # 两连三带一对
        elif action == ActionTypeEnum.ACTION_PUT_2_THREE_DOU.value:
            K = 2
            exist_card = HandCardUtils.find_even_three(self.hand_card_status,
                                                       k=K)
            if len(exist_card) == 0:
                score = ERR_CARD_SCORE
                done = True
                if ENV_DEBUG:
                    print('Can not accept the card')
            else:
                other_card = list(
                    map(
                        lambda x: x[0],
                        filter(lambda x: x[0] not in exist_card and x[1] == 2,
                               enumerate(self.hand_card_status))))
                if len(other_card) < K:
                    score = ERR_CARD_SCORE
                    done = True
                    if ENV_DEBUG:
                        print('Can not accept the card')
                else:
                    tmp = list()
                    for _ in range(K):
                        rnd = random.randint(0, len(other_card) - 1)
                        tmp_card = other_card[rnd]
                        tmp.extend([tmp_card, tmp_card])
                        self.observation[tmp_card] -= 2
                        self.observation[18 + tmp_card] += 2
                        self.hand_card_status[tmp_card] -= 2
                    score = HandCardUtils.value_map(exist_card[-1],
                                                    CardTypeEnum.CT_THREE_DOU,
                                                    3 * K + 2 * K)
                    log_put = list()
                    for put_card in exist_card:
                        self.observation[put_card] -= 3
                        self.observation[18 + put_card] += 3
                        self.hand_card_status[put_card] -= 3
                        log_put.extend([put_card, put_card, put_card])
                    log_put.extend(tmp)
                    if ENV_DEBUG:
                        print('Put card %s,%s,%s,%s,%s,%s,%s,%s,%s,%s' %
                              tuple(log_put))
        # 三连三带一对
        elif action == ActionTypeEnum.ACTION_PUT_3_THREE_DOU.value:
            K = 3
            exist_card = HandCardUtils.find_even_three(self.hand_card_status,
                                                       k=K)
            if len(exist_card) == 0:
                score = ERR_CARD_SCORE
                done = True
                if ENV_DEBUG:
                    print('Can not accept the card')
            else:
                other_card = list(
                    map(
                        lambda x: x[0],
                        filter(lambda x: x[0] not in exist_card and x[1] == 2,
                               enumerate(self.hand_card_status))))
                if len(other_card) < K:
                    score = ERR_CARD_SCORE
                    done = True
                    if ENV_DEBUG:
                        print('Can not accept the card')
                else:
                    tmp = list()
                    for _ in range(K):
                        rnd = random.randint(0, len(other_card) - 1)
                        tmp_card = other_card[rnd]
                        tmp.extend([tmp_card, tmp_card])
                        self.observation[tmp_card] -= 2
                        self.observation[18 + tmp_card] += 2
                        self.hand_card_status[tmp_card] -= 2
                    score = HandCardUtils.value_map(exist_card[-1],
                                                    CardTypeEnum.CT_THREE_DOU,
                                                    3 * K + 2 * K)
                    log_put = list()
                    for put_card in exist_card:
                        self.observation[put_card] -= 3
                        self.observation[18 + put_card] += 3
                        self.hand_card_status[put_card] -= 3
                        log_put.extend([put_card, put_card, put_card])
                    log_put.extend(tmp)
                    if ENV_DEBUG:
                        print(
                            'Put card %s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s'
                            % tuple(log_put))
        # 四带二单
        elif action == ActionTypeEnum.ACTION_PUT_FOUR_ONE.value:
            exist_card = list(
                map(
                    lambda x: x[0],
                    filter(lambda x: x[1] == 4,
                           enumerate(self.hand_card_status))))
            if isinstance(exist_card, int):
                exist_card = [exist_card]
            if len(exist_card) == 0:
                score = ERR_CARD_SCORE
                done = True
                if ENV_DEBUG:
                    print('Can not accept the card')
            else:
                rnd = random.randint(0, len(exist_card) - 1)
                put_card = exist_card[rnd]
                other_exist_card = list(
                    map(
                        lambda x: x[0],
                        filter(lambda x: x[1] >= 1 and x[0] != put_card,
                               enumerate(self.hand_card_status))))
                if isinstance(other_exist_card, int):
                    other_exist_card = [other_exist_card]
                if len(other_exist_card) <= 1:
                    score = ERR_CARD_SCORE
                    done = True
                    if ENV_DEBUG:
                        print('Can not accept the card')
                else:
                    score = HandCardUtils.value_map(put_card,
                                                    CardTypeEnum.CT_FOUR_ONE,
                                                    6)
                    self.observation[put_card] -= 4
                    self.observation[18 + put_card] += 4
                    self.hand_card_status[put_card] -= 4
                    pre = -1
                    t = 0
                    tmp = list()
                    while t < 2:
                        rnd = random.randint(0, len(other_exist_card) - 1)
                        while True:
                            if rnd != pre:
                                pre = rnd
                                break
                            rnd = random.randint(0, len(other_exist_card) - 1)
                        other_put_card = other_exist_card[rnd]
                        tmp.append(other_put_card)
                        self.observation[other_put_card] -= 1
                        self.observation[18 + other_put_card] += 1
                        self.hand_card_status[other_put_card] -= 1
                        t += 1
                    log_put = [put_card] * 4
                    log_put.extend(tmp)
                    if ENV_DEBUG:
                        print('Put card %s,%s,%s,%s,%s,%s' % tuple(log_put))
        # 四带二对
        elif action == ActionTypeEnum.ACTION_PUT_FOUR_DOU.value:
            exist_card = list(
                map(
                    lambda x: x[0],
                    filter(lambda x: x[1] == 4,
                           enumerate(self.hand_card_status))))
            if isinstance(exist_card, int):
                exist_card = [exist_card]
            if len(exist_card) == 0:
                score = ERR_CARD_SCORE
                done = True
                if ENV_DEBUG:
                    print('Can not accept the card')
            else:
                rnd = random.randint(0, len(exist_card) - 1)
                put_card = exist_card[rnd]
                other_exist_card = list(
                    map(
                        lambda x: x[0],
                        filter(lambda x: x[1] >= 2 and x[0] != put_card,
                               enumerate(self.hand_card_status))))
                if isinstance(other_exist_card, int):
                    other_exist_card = [other_exist_card]
                if len(other_exist_card) <= 1:
                    score = ERR_CARD_SCORE
                    done = True
                    if ENV_DEBUG:
                        print('Can not accept the card')
                else:
                    score = HandCardUtils.value_map(put_card,
                                                    CardTypeEnum.CT_FOUR_DOU,
                                                    8)
                    self.observation[put_card] -= 4
                    self.observation[18 + put_card] += 4
                    self.hand_card_status[put_card] -= 4
                    pre = -1
                    t = 0
                    tmp = list()
                    while t < 2:
                        rnd = random.randint(0, len(other_exist_card) - 1)
                        while True:
                            if rnd != pre:
                                pre = rnd
                                break
                            rnd = random.randint(0, len(other_exist_card) - 1)
                        other_put_card = other_exist_card[rnd]
                        tmp.extend([other_put_card, other_put_card])
                        self.observation[other_put_card] -= 2
                        self.observation[18 + other_put_card] += 2
                        self.hand_card_status[other_put_card] -= 2
                        t += 1
                    log_put = [put_card] * 4
                    log_put.extend(tmp)
                    if ENV_DEBUG:
                        print('Put card %s,%s,%s,%s,%s,%s,%s,%s' %
                              tuple(log_put))
        # 连子(5)
        elif action == ActionTypeEnum.ACTION_PUT_5_CONTINUE.value:
            K = 5
            exist_card = HandCardUtils.find_continues(self.hand_card_status,
                                                      k=K)
            if len(exist_card) == 0:
                score = ERR_CARD_SCORE
                done = True
                if ENV_DEBUG:
                    print('Can not accept the card')
            else:
                score = HandCardUtils.value_map(exist_card[-1],
                                                CardTypeEnum.CT_CONTINUE, K)
                log_put = list()
                for put_card in exist_card:
                    self.observation[put_card] -= 1
                    self.observation[18 + put_card] += 1
                    self.hand_card_status[put_card] -= 1
                    log_put.extend([put_card])
                if ENV_DEBUG:
                    print('Put card %s,%s,%s,%s,%s' % tuple(log_put))
        # 连子(6)
        elif action == ActionTypeEnum.ACTION_PUT_6_CONTINUE.value:
            K = 6
            exist_card = HandCardUtils.find_continues(self.hand_card_status,
                                                      k=K)
            if len(exist_card) == 0:
                score = ERR_CARD_SCORE
                done = True
                if ENV_DEBUG:
                    print('Can not accept the card')
            else:
                score = HandCardUtils.value_map(exist_card[-1],
                                                CardTypeEnum.CT_CONTINUE, K)
                log_put = list()
                for put_card in exist_card:
                    self.observation[put_card] -= 1
                    self.observation[18 + put_card] += 1
                    self.hand_card_status[put_card] -= 1
                    log_put.extend([put_card])
                if ENV_DEBUG:
                    print('Put card %s,%s,%s,%s,%s,%s' % tuple(log_put))

        # 连子(7)
        elif action == ActionTypeEnum.ACTION_PUT_7_CONTINUE.value:
            K = 7
            exist_card = HandCardUtils.find_continues(self.hand_card_status,
                                                      k=K)
            if len(exist_card) == 0:
                score = ERR_CARD_SCORE
                done = True
                if ENV_DEBUG:
                    print('Can not accept the card')
            else:
                score = HandCardUtils.value_map(exist_card[-1],
                                                CardTypeEnum.CT_CONTINUE, K)
                log_put = list()
                for put_card in exist_card:
                    self.observation[put_card] -= 1
                    self.observation[18 + put_card] += 1
                    self.hand_card_status[put_card] -= 1
                    log_put.extend([put_card])
                if ENV_DEBUG:
                    print('Put card %s,%s,%s,%s,%s,%s,%s' % tuple(log_put))

        # 连子(8)
        elif action == ActionTypeEnum.ACTION_PUT_8_CONTINUE.value:
            K = 8
            exist_card = HandCardUtils.find_continues(self.hand_card_status,
                                                      k=K)
            if len(exist_card) == 0:
                score = ERR_CARD_SCORE
                done = True
                if ENV_DEBUG:
                    print('Can not accept the card')
            else:
                score = HandCardUtils.value_map(exist_card[-1],
                                                CardTypeEnum.CT_CONTINUE, K)
                log_put = list()
                for put_card in exist_card:
                    self.observation[put_card] -= 1
                    self.observation[18 + put_card] += 1
                    self.hand_card_status[put_card] -= 1
                    log_put.extend([put_card])
                if ENV_DEBUG:
                    print('Put card %s,%s,%s,%s,%s,%s,%s,%s' % tuple(log_put))

        # 连子(9)
        elif action == ActionTypeEnum.ACTION_PUT_9_CONTINUE.value:
            K = 9
            exist_card = HandCardUtils.find_continues(self.hand_card_status,
                                                      k=K)
            if len(exist_card) == 0:
                score = ERR_CARD_SCORE
                done = True
                if ENV_DEBUG:
                    print('Can not accept the card')
            else:
                score = HandCardUtils.value_map(exist_card[-1],
                                                CardTypeEnum.CT_CONTINUE, K)
                log_put = list()
                for put_card in exist_card:
                    self.observation[put_card] -= 1
                    self.observation[18 + put_card] += 1
                    self.hand_card_status[put_card] -= 1
                    log_put.extend([put_card])
                if ENV_DEBUG:
                    print('Put card %s,%s,%s,%s,%s,%s,%s,%s,%s' %
                          tuple(log_put))

        # 连子(10)
        elif action == ActionTypeEnum.ACTION_PUT_10_CONTINUE.value:
            K = 10
            exist_card = HandCardUtils.find_continues(self.hand_card_status,
                                                      k=K)
            if len(exist_card) == 0:
                score = ERR_CARD_SCORE
                done = True
                if ENV_DEBUG:
                    print('Can not accept the card')
            else:
                score = HandCardUtils.value_map(exist_card[-1],
                                                CardTypeEnum.CT_CONTINUE, K)
                log_put = list()
                for put_card in exist_card:
                    self.observation[put_card] -= 1
                    self.observation[18 + put_card] += 1
                    self.hand_card_status[put_card] -= 1
                    log_put.extend([put_card])
                if ENV_DEBUG:
                    print('Put card %s,%s,%s,%s,%s,%s,%s,%s,%s,%s' %
                          tuple(log_put))
        # 连子(11)
        elif action == ActionTypeEnum.ACTION_PUT_11_CONTINUE.value:
            K = 11
            exist_card = HandCardUtils.find_continues(self.hand_card_status,
                                                      k=K)
            if len(exist_card) == 0:
                score = ERR_CARD_SCORE
                done = True
                if ENV_DEBUG:
                    print('Can not accept the card')
            else:
                score = HandCardUtils.value_map(exist_card[-1],
                                                CardTypeEnum.CT_CONTINUE, K)
                log_put = list()
                for put_card in exist_card:
                    self.observation[put_card] -= 1
                    self.observation[18 + put_card] += 1
                    self.hand_card_status[put_card] -= 1
                    log_put.extend([put_card])
                if ENV_DEBUG:
                    print('Put card %s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s' %
                          tuple(log_put))
        # 连子(12)
        elif action == ActionTypeEnum.ACTION_PUT_12_CONTINUE.value:
            K = 12
            exist_card = HandCardUtils.find_continues(self.hand_card_status,
                                                      k=K)
            if len(exist_card) == 0:
                score = ERR_CARD_SCORE
                done = True
                if ENV_DEBUG:
                    print('Can not accept the card')
            else:
                score = HandCardUtils.value_map(exist_card[-1],
                                                CardTypeEnum.CT_CONTINUE, K)
                log_put = list()
                for put_card in exist_card:
                    self.observation[put_card] -= 1
                    self.observation[18 + put_card] += 1
                    self.hand_card_status[put_card] -= 1
                    log_put.extend([put_card])
                if ENV_DEBUG:
                    print('Put card %s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s' %
                          tuple(log_put))
        # 炸弹
        elif action == ActionTypeEnum.ACTION_PUT_BOMB.value:
            qu_ja = [CardEnum.QU.value, CardEnum.JA.value]
            comm_bomb = list(
                map(
                    lambda x: x[0],
                    filter(lambda x: x[1] == 4,
                           enumerate(self.hand_card_status))))
            master_bomb = list(
                map(
                    lambda x: x[0],
                    filter(lambda x: x[0] in qu_ja,
                           enumerate(self.hand_card_status))))
            if isinstance(comm_bomb, int):
                comm_bomb = [comm_bomb]
            if isinstance(master_bomb, int):
                master_bomb = [master_bomb]
            if len(comm_bomb) == 0 and len(master_bomb) < 2:
                score = ERR_CARD_SCORE
                done = True
                if ENV_DEBUG:
                    print('Can not accept the card')
            else:
                rnd = len(comm_bomb)
                put_card = qu_ja
                if len(comm_bomb) >= 1 and len(master_bomb) == 2:
                    rnd = random.randint(0, len(comm_bomb))
                elif len(comm_bomb) >= 1:
                    rnd = random.randint(0, len(comm_bomb) - 1)
                if rnd < len(comm_bomb):
                    put_card = comm_bomb[rnd]
                card_count = 4 if isinstance(put_card, int) else 2
                if card_count == 2:
                    score = HandCardUtils.value_map(put_card[-1],
                                                    CardTypeEnum.CT_BOMB,
                                                    card_count)
                    for item in put_card:
                        self.observation[item] -= 1
                        self.observation[18 + item] += 1
                        self.hand_card_status[item] -= 1
                    if ENV_DEBUG:
                        print('Put card %s,%s' % tuple(put_card))
                else:
                    score = HandCardUtils.value_map(put_card,
                                                    CardTypeEnum.CT_BOMB,
                                                    card_count)
                    self.observation[put_card] -= 4
                    self.observation[18 + put_card] += 4
                    self.hand_card_status[put_card] -= 4
                    if ENV_DEBUG:
                        print('Put card %s,%s,%s,%s' %
                              (put_card, put_card, put_card, put_card))
        # 不出
        elif action == ActionTypeEnum.ACTION_NO_PUT.value:
            pass

        if self._is_done():
            score += 1
        if ENV_DEBUG:
            print('Current card: %s' % self.hand_card_status)
            print('Current env: %s' % self.observation)
            print('is game over: %s' % (self._is_done() or done))
        return self.observation[3:], score, self._is_done() or done
 def judge_card_type(self, hand_card):
     assert len(hand_card) == 18, 'the size of hand card status must be 18.'
     cardTypeStruct = CardTypeStruct()
     card_count = sum(hand_card)
     cardTypeStruct.card_count = card_count
     find_one = list(
         map(lambda x: x[0],
             filter(lambda x: x[1] == 1, enumerate(hand_card))))
     find_two = list(
         map(lambda x: x[0],
             filter(lambda x: x[1] == 2, enumerate(hand_card))))
     find_three = list(
         map(lambda x: x[0],
             filter(lambda x: x[1] == 3, enumerate(hand_card))))
     find_four = list(
         map(lambda x: x[0],
             filter(lambda x: x[1] == 4, enumerate(hand_card))))
     # 单牌
     if card_count == 1:
         assert len(find_one) == 1, 'hand card status error.'
         cardTypeStruct.card_type = CardTypeEnum.CT_ONE
         cardTypeStruct.primary_item = find_one[0]
         # 分值[-7,7]
         cardTypeStruct.card_score = HandCardUtils.value_map(
             find_one[0], CardTypeEnum.CT_ONE, card_count)
     # 对子(包含连对)
     if card_count % 2 == 0 and card_count > 0 and len(find_two) >= 1:
         # 列表中只含对子且对子中不含2
         if len(find_two
                ) * 2 == card_count and CardEnum.TW.value not in find_two:
             if HandCardUtils.is_find(find_two):
                 cardTypeStruct.card_type = CardTypeEnum.CT_DOU
                 cardTypeStruct.primary_item = find_two[-1]
                 # 分值[-7,7]
                 cardTypeStruct.card_score = HandCardUtils.value_map(
                     find_two[-1], CardTypeEnum.CT_DOU, card_count)
     #三带一单(连三带一单)
     if card_count % 4 == 0 and card_count > 0 and len(find_three) >= 1:
         # 三带一单
         if len(find_three) == 1 and len(find_one) == 1:
             cardTypeStruct.card_type = CardTypeEnum.CT_THREE_ONE
             cardTypeStruct.primary_item = find_three[0]
             # 分值[-7,7]
             cardTypeStruct.card_score = HandCardUtils.value_map(
                 find_three[0], CardTypeEnum.CT_THREE_ONE, card_count)
         elif len(find_three) * 3 + len(
                 find_one
         ) == card_count and CardEnum.TW.value not in find_three:
             if HandCardUtils.is_find(find_three):
                 cardTypeStruct.card_type = CardTypeEnum.CT_THREE_ONE
                 cardTypeStruct.primary_item = find_three[-1]
                 # 分值[0.5,7.5]
                 cardTypeStruct.card_score = HandCardUtils.value_map(
                     find_three[-1], CardTypeEnum.CT_THREE_ONE, card_count)
     #三带一对(连三带一对)
     if card_count % 5 == 0 and card_count > 0 and len(find_three) >= 1:
         # 三带一对
         if len(find_two) == 1 and len(find_three) == 1:
             cardTypeStruct.card_type = CardTypeEnum.CT_THREE_DOU
             cardTypeStruct.primary_item = find_three[0]
             # 分值 [-7,7]
             cardTypeStruct.card_score = HandCardUtils.value_map(
                 find_three[0], CardTypeEnum.CT_THREE_DOU, card_count)
         elif len(find_three) * 3 + len(
                 find_two
         ) * 2 == card_count and CardEnum.TW.value not in find_three:
             if HandCardUtils.is_find(find_three):
                 cardTypeStruct.card_type = CardTypeEnum.CT_THREE_DOU
                 cardTypeStruct.primary_item = find_three[-1]
                 # 分值[0.5,7.5]
                 cardTypeStruct.card_score = HandCardUtils.value_map(
                     find_three[-1], CardTypeEnum.CT_THREE_DOU, card_count)
     # 连子
     if card_count >= 5:
         # 必须都是单牌
         if len(find_one) == card_count:
             arr = set(find_one)
             exclude_arr = {
                 CardEnum.TW.value, CardEnum.JA.value, CardEnum.QU.value
             }
             # 连子中不能包含2和大小王
             if len(list(arr.intersection(exclude_arr))) == 0:
                 if HandCardUtils.is_find(find_one):
                     cardTypeStruct.card_type = CardTypeEnum.CT_CONTINUE
                     cardTypeStruct.primary_item = find_one[-1]
                     # 分值[-6,8]
                     cardTypeStruct.card_score = HandCardUtils.value_map(
                         find_one[-1], CardTypeEnum.CT_CONTINUE, card_count)
     # 四带两单
     if card_count == 6 and len(find_four) >= 1:
         if len(find_four) == 1 and len(find_one) == 2:
             cardTypeStruct.card_type = CardTypeEnum.CT_FOUR_ONE
             cardTypeStruct.primary_item = find_four[0]
             # 分值[0,7]
             cardTypeStruct.card_score = HandCardUtils.value_map(
                 find_four[0], CardTypeEnum.CT_FOUR_ONE, card_count)
     # 四带两对
     if card_count == 8 and len(find_four) >= 1:
         if len(find_four) == 1 and len(find_two) == 2:
             cardTypeStruct.card_type = CardTypeEnum.CT_FOUR_DOU
             cardTypeStruct.primary_item = find_four[0]
             # 分值[0,7]
             cardTypeStruct.card_score = HandCardUtils.value_map(
                 find_four[0], CardTypeEnum.CT_FOUR_DOU, card_count)
     # 三不带(连三不带)
     if card_count % 3 == 0 and card_count > 0 and len(find_three) >= 1:
         if len(find_three) == 1:
             cardTypeStruct.card_type = CardTypeEnum.CT_THREE
             cardTypeStruct.primary_item = find_three[0]
             # 分值[-7,7]
             cardTypeStruct.card_score = HandCardUtils.value_map(
                 find_three[0], CardTypeEnum.CT_THREE, card_count)
         # 连三不带里面不能包含2
         elif len(
                 find_three
         ) * 3 == card_count and CardEnum.TW.value not in find_three:
             if HandCardUtils.is_find(find_three):
                 cardTypeStruct.card_type = CardTypeEnum.CT_THREE
                 cardTypeStruct.primary_item = find_three[-1]
                 # 分值[0.5,7.5]
                 cardTypeStruct.card_score = HandCardUtils.value_map(
                     find_three[-1], CardTypeEnum.CT_THREE, card_count)
     # 炸弹(王炸)
     if card_count == 2:
         if len(find_one
                ) == 2 and find_one[0] == CardEnum.QU.value and find_one[
                    1] == CardEnum.JA.value:
             cardTypeStruct.card_type = CardTypeEnum.CT_BOMB
             cardTypeStruct.primary_item = find_one[-1]
             # 分值 25
             cardTypeStruct.card_score = HandCardUtils.value_map(
                 find_one[-1], CardTypeEnum.CT_BOMB, card_count)
     # 炸弹(普通炸弹)
     if card_count == 4 and len(find_four) >= 1:
         if len(find_four) == 1:
             cardTypeStruct.card_type = CardTypeEnum.CT_BOMB
             cardTypeStruct.primary_item = find_four[0]
             # 分值[7,21]
             cardTypeStruct.card_score = HandCardUtils.value_map(
                 find_four[0], CardTypeEnum.CT_BOMB, card_count)
     return cardTypeStruct