コード例 #1
0
        def evaluate(index):
            card = cards[index]
            choices = stacks[np.array([s[-1] < card for s in stacks])]

            if len(choices) == 0:
                e = (1.0 - card / NUM_CARDS) * min(
                    count_nimmts(s) for s in stacks)
            else:
                stack = choices[np.argmax([s[-1] for s in choices])]
                if len(stack) == STACK_VOL:
                    e = (1.0 -
                         (card - stack[-1]) / NUM_CARDS) * count_nimmts(stack)
                else:
                    e = ((-0.1 + card - stack[-1]) / NUM_CARDS) / (STACK_VOL -
                                                                   len(stack))
                    next = cards[index +
                                 1] if index < len(cards) - 1 else NUM_CARDS
                    e += ((len(stack) == STACK_VOL - 1) * 2 -
                          1) * (1.0 - (next - card)) / NUM_CARDS
            return e
コード例 #2
0
        def evaluate(index):
            card = cards[index]
            choices = stacks[np.array([s[-1] < card for s in stacks])]

            if len(choices) == 0:
                e = (1.0 - card / NUM_CARDS) * min(
                    count_nimmts(s) for s in stacks)
            else:
                stack = choices[np.argmax([s[-1] for s in choices])]
                if len(stack) == STACK_VOL:
                    e = (1.0 -
                         (card - stack[-1]) / NUM_CARDS) * count_nimmts(stack)
                else:
                    e = ((-0.1 + card - stack[-1]) / NUM_CARDS) / (
                        STACK_VOL - len(stack)) * count_nimmts(stack)
                    next = NUM_CARDS
                    for s in stacks:
                        if s[-1] > card:
                            next = min(next, s[-1])
                    #if len(stack) =4:
                    #	e -= (next - card)
                    if card != cards[-1]:
                        next = min(next, cards[index + 1])

                    if len(stack) == 4:
                        e -= (next - card -
                              1.0) / NUM_CARDS * count_nimmts(stack + [
                                  card,
                              ])
                    else:
                        e += (next - card -
                              1.0) / NUM_CARDS * count_nimmts(stack + [
                                  card,
                              ])
                    #else:
                    #		e -=
                    #e += ((len(stack) == STACK_VOL - 1) * 2 - 1) * (1.0 - (next - card)) / NUM_CARDS
            return e
コード例 #3
0
def normalize(dat) :
    bits = [];
    action = dat['action']
    card_stacks = dat['state']['card_stacks']
    card_status = dat['state']['card_status']
    agent_id = dat['state']['agent_id']
    hand_card = dat['state']['hand_cards'][agent_id]
    for i in range(16):
        l_and = True
        l_or = False
        for j in range(4):
            if ((1<<j)&i > 0):
                l_and = l_and and (action < card_stacks[j][-1])
                l_or = l_or or (action < card_stacks[j][-1])
        bits.append(int(l_and))
        bits.append(int(l_or))
    bits.append(1.0 * action / pyrl.common.num_agent_init_card)
    for i in range(4):
        bits.append(1.0 * card_stacks[i][-1] / pyrl.common.num_agent_init_card)
    tmp = []
    for i in range(4):
        for j in range(5):
            tmp.append(j == len(card_stacks[i]))
    target_stack = -1
    for j in range(4):
        if action > card_stacks[j][-1]:
            if target_stack == -1 or card_stacks[j][-1] > card_stacks[target_stack][-1]:
                target_stack = j
    for i in range(4):
        tmp.append(action > card_stacks[i][-1])
        tmp.append(target_stack == i)
    tmp.append(target_stack == -1)

    for i in range(num_base_ind):
        bits.append(int(tmp[i]))
    for i in range(20):
        for j in range(20, num_base_ind):
            bits.append(int(tmp[i] and tmp[j]))
    i2 = ind_2
    i3 = ind_3
    i4 = ind_3
    for i in range(50):
        bits.append(int(tmp[i2[i][0]] and tmp[i2[i][1]]))
        bits.append(int(tmp[i3[i][0]] and tmp[i3[i][1]] and tmp[i3[i][2]]))
        bits.append(int(tmp[i4[i][0]] and tmp[i4[i][1]] and tmp[i4[i][2]] and tmp[i4[i][3]]))

    status_bucket = [[0 for col in range(13)] for row in range(3)]
    #every 13 in a bucket
    for i in range(num_cards):
        status_bucket[card_status[i]][i/8] += 1
    bits = bits + status_bucket[0] + status_bucket[1] + status_bucket[2]

    if target_stack == -1:
        bits.append(min([count_nimmts(s) for s in card_stacks]))
        bits.append(min([count_nimmts(s) for s in card_stacks]))
        bits.append(min([count_nimmts(s) for s in card_stacks]))
    else:
        bits.append(count_nimmts(card_stacks[target_stack]))
        bits.append(count_nimmts(card_stacks[target_stack]) * int(len(card_stacks[target_stack]) >= 5))
        bits.append(count_nimmts(card_stacks[target_stack]) * int(len(card_stacks[target_stack]) >= 4))

    bits.append(count_nimmts(hand_card))
    punish_bucket = [[0 for col in range(7)] for row in range(3)]
    for i in range(num_cards):
        punish_bucket[card_status[i]][num_nimmt(i)-1] += 1
    bits = bits + punish_bucket[0] + punish_bucket[1] + punish_bucket[2]
    #in all there are 459 features
    return bits
コード例 #4
0
    def policy_min(self, agentEnv):
        stacks = agentEnv['card_stacks']
        ret = np.argmin([count_nimmts(s) for s in stacks])

        return ret