Exemple #1
0
    def get_combinations(self, curr_cards_char, last_cards_char):
        if len(curr_cards_char) > 10:
            card_mask = Card.char2onehot60(curr_cards_char).astype(np.uint8)
            mask = augment_action_space_onehot60
            a = np.expand_dims(1 - card_mask, 0) * mask
            invalid_row_idx = set(np.where(a > 0)[0])
            if len(last_cards_char) == 0:
                invalid_row_idx.add(0)

            valid_row_idx = [i for i in range(len(augment_action_space)) if i not in invalid_row_idx]

            mask = mask[valid_row_idx, :]
            idx_mapping = dict(zip(range(mask.shape[0]), valid_row_idx))

            # augment mask
            # TODO: known issue: 555444666 will not decompose into 5554 and 66644
            combs = get_combinations_nosplit(mask, card_mask)
            combs = [([] if len(last_cards_char) == 0 else [0]) + [clamp_action_idx(idx_mapping[idx]) for idx in comb] for comb in combs]

            if len(last_cards_char) > 0:
                idx_must_be_contained = set(
                    [idx for idx in valid_row_idx if CardGroup.to_cardgroup(augment_action_space[idx]). \
                        bigger_than(CardGroup.to_cardgroup(last_cards_char))])
                combs = [comb for comb in combs if not idx_must_be_contained.isdisjoint(comb)]
                fine_mask = np.zeros([len(combs), self.num_actions[1]], dtype=np.bool)
                for i in range(len(combs)):
                    for j in range(len(combs[i])):
                        if combs[i][j] in idx_must_be_contained:
                            fine_mask[i][j] = True
            else:
                fine_mask = None
        else:
            mask = get_mask_onehot60(curr_cards_char, action_space, None).reshape(len(action_space), 15, 4).sum(-1).astype(
                np.uint8)
            valid = mask.sum(-1) > 0
            cards_target = Card.char2onehot60(curr_cards_char).reshape(-1, 4).sum(-1).astype(np.uint8)
            # do not feed empty to C++, which will cause infinite loop
            combs = get_combinations_recursive(mask[valid, :], cards_target)
            idx_mapping = dict(zip(range(valid.shape[0]), np.where(valid)[0]))

            combs = [([] if len(last_cards_char) == 0 else [0]) + [idx_mapping[idx] for idx in comb] for comb in combs]

            if len(last_cards_char) > 0:
                valid[0] = True
                idx_must_be_contained = set(
                    [idx for idx in range(len(action_space)) if valid[idx] and CardGroup.to_cardgroup(action_space[idx]). \
                        bigger_than(CardGroup.to_cardgroup(last_cards_char))])
                combs = [comb for comb in combs if not idx_must_be_contained.isdisjoint(comb)]
                fine_mask = np.zeros([len(combs), self.num_actions[1]], dtype=np.bool)
                for i in range(len(combs)):
                    for j in range(len(combs[i])):
                        if combs[i][j] in idx_must_be_contained:
                            fine_mask[i][j] = True
            else:
                fine_mask = None
        return combs, fine_mask
def dancing_link():
    env = Pyenv()
    env.reset()
    env.prepare()
    # print(env.get_handcards())
    cards = env.get_handcards()
    cards = ['3', '3', '3', '4', '4', '4']
    import timeit
    begin = timeit.default_timer()
    card_mask = Card.char2onehot60(cards).astype(np.uint8)
    # mask = get_mask_onehot60(cards, action_space, None).astype(np.uint8)
    last_cards = ['3', '3']
    mask = augment_action_space_onehot60
    a = np.expand_dims(1 - card_mask, 0) * mask
    row_idx = set(np.where(a > 0)[0])

    # tmp = np.ones(len(augment_action_space))
    # tmp[row_idx] = 0
    # tmp[0] = 0
    # valid_row_idx = np.where(tmp > 0)[0]
    valid_row_idx = [
        i for i in range(1, len(augment_action_space)) if i not in row_idx
    ]
    idx_must_be_contained = set([idx for idx in valid_row_idx if CardGroup.to_cardgroup(augment_action_space[idx]).\
                    bigger_than(CardGroup.to_cardgroup(last_cards))])
    print(idx_must_be_contained)
    mask = mask[valid_row_idx, :]
    idx_mapping = dict(zip(range(mask.shape[0]), valid_row_idx))

    # augment mask
    # TODO: known issue: 555444666 will not decompose into 5554 and 66644

    combs = get_combinations_nosplit(
        mask,
        Card.char2onehot60(cards).astype(np.uint8))
    combs = [[clamp_action_idx(idx_mapping[idx]) for idx in comb]
             for comb in combs]
    combs = [
        comb for comb in combs if not idx_must_be_contained.isdisjoint(comb)
    ]
    fine_mask = np.zeros([len(combs), 21])
    for i in range(len(combs)):
        for j in range(len(combs[i])):
            if combs[i][j] in idx_must_be_contained:
                fine_mask[i][j] = 1
    print(fine_mask)
    end = timeit.default_timer()
    print(end - begin)

    print(len(combs))
    for comb in combs:
        for idx in comb:
            print(action_space[idx], end=', ')
        print()
Exemple #3
0
 def get_state_prob(self):
     total_cards = np.ones([60])
     total_cards[53:56] = 0
     total_cards[57:60] = 0
     player_idx = self.get_current_idx()
     remain_cards = total_cards - Card.char2onehot60(
         self.get_curr_handcards() +
         self.histories[self.agent_names[player_idx]] +
         self.histories[self.agent_names[(player_idx + 1) % 3]] +
         self.histories[self.agent_names[(player_idx + 2) % 3]])
     # sanity check
     # remain_cards_check = Card.char2onehot60(self.player_cards[self.agent_names[(player_idx + 1) % 3]] + self.player_cards[self.agent_names[(player_idx + 2) % 3]])
     # remain_cards_cp = remain_cards.copy()
     # normalize(remain_cards_cp, 0, 60)
     # assert np.all(remain_cards_cp == remain_cards_check)
     next_cnt = len(
         self.player_cards[self.agent_names[(player_idx + 1) %
                                            len(self.agent_names)]])
     next_next_cnt = len(
         self.player_cards[self.agent_names[(player_idx + 2) %
                                            len(self.agent_names)]])
     right_prob_state = remain_cards * (next_cnt /
                                        (next_cnt + next_next_cnt))
     left_prob_state = remain_cards * (next_next_cnt /
                                       (next_cnt + next_next_cnt))
     prob_state = np.concatenate([right_prob_state, left_prob_state])
     return prob_state
Exemple #4
0
 def get_data(self):
     action_space_onehot = [Card.char2onehot60(a) for a in action_space]
     while True:
         yield [
             action_space_onehot[self.rng.randint(0,
                                                  len(action_space_onehot))]
         ]
def recursive():
    import timeit
    env = Pyenv()
    st = StatCounter()
    for i in range(1):
        env.reset()
        env.prepare()
        # print(env.get_handcards())
        cards = env.get_curr_handcards()[:15]
        cards = ['J', '10', '10', '7', '7', '6']

        # last_cards = ['3', '3']
        mask = get_mask_onehot60(cards, action_space,
                                 None).reshape(len(action_space), 15,
                                               4).sum(-1).astype(np.uint8)
        valid = mask.sum(-1) > 0
        cards_target = Card.char2onehot60(cards).reshape(-1, 4).sum(-1).astype(
            np.uint8)
        t1 = timeit.default_timer()
        print(cards_target)
        print(mask[valid])
        combs = get_combinations_recursive(mask[valid, :], cards_target)
        print(combs)
        idx_mapping = dict(zip(range(valid.shape[0]), np.where(valid)[0]))

        # idx_must_be_contained = set(
        #     [idx for idx in range(1, 9085) if valid[idx] and CardGroup.to_cardgroup(action_space[idx]). \
        #         bigger_than(CardGroup.to_cardgroup(last_cards))])
        # print(idx_must_be_contained)
        combs = [[idx_mapping[idx] for idx in comb] for comb in combs]
        # combs = [comb for comb in combs if not idx_must_be_contained.isdisjoint(comb)]
        # fine_mask = np.zeros([len(combs), 21])
        # for i in range(len(combs)):
        #     for j in range(len(combs[i])):
        #         if combs[i][j] in idx_must_be_contained:
        #             fine_mask[i][j] = 1
        # print(fine_mask)
        t2 = timeit.default_timer()
        st.feed(t2 - t1)
        print(len(combs))
        import pdb
        pdb.set_trace()
        for comb in combs:
            for idx in comb:
                print(action_space[idx], end=', ')
            print()
    print(st.average)
Exemple #6
0
 def __init__(self, input_names, output_names):
     self.action_space_onehot = np.array([Card.char2onehot60(a) for a in action_space])
     self.input_names = input_names
     self.output_names = output_names
Exemple #7
0
 def cards_char2embedding(cards_char):
     test = (action_space_onehot60 == Card.char2onehot60(cards_char))
     test = np.all(test, axis=1)
     target = np.where(test)[0]
     return encoding[target[0]]
Exemple #8
0
def data_generator(rng):
    env = Env(rng.randint(1 << 31))
    # logger.info('called')

    while True:
        env.reset()
        env.prepare()
        r = 0
        while r == 0:
            last_cards_value = env.get_last_outcards()
            last_cards_char = to_char(last_cards_value)
            last_out_cards = Card.val2onehot60(last_cards_value)
            last_category_idx = env.get_last_outcategory_idx()
            curr_cards_char = to_char(env.get_curr_handcards())
            is_active = True if last_cards_value.size == 0 else False

            s = env.get_state_prob()
            # s = s[:60]
            intention, r, category_idx = env.step_auto()

            if category_idx == 14:
                continue
            minor_cards_targets = pick_minor_targets(category_idx,
                                                     to_char(intention))
            # self, state, last_cards, passive_decision_target, passive_bomb_target, passive_response_target,
            # active_decision_target, active_response_target, seq_length_target, minor_response_target, minor_type, mode
            if not is_active:
                if category_idx == Category.QUADRIC.value and category_idx != last_category_idx:
                    passive_decision_input = 1
                    passive_bomb_input = intention[0] - 3
                    yield s, last_out_cards, passive_decision_input, 0, 0, 0, 0, 0, 0, 0, 0
                    yield s, last_out_cards, 0, passive_bomb_input, 0, 0, 0, 0, 0, 0, 1

                else:
                    if category_idx == Category.BIGBANG.value:
                        passive_decision_input = 2
                        yield s, last_out_cards, passive_decision_input, 0, 0, 0, 0, 0, 0, 0, 0
                    else:
                        if category_idx != Category.EMPTY.value:
                            passive_decision_input = 3
                            # OFFSET_ONE
                            # 1st, Feb - remove relative card output since shift is hard for the network to learn
                            passive_response_input = intention[0] - 3
                            if passive_response_input < 0:
                                print("something bad happens")
                                passive_response_input = 0
                            yield s, last_out_cards, passive_decision_input, 0, 0, 0, 0, 0, 0, 0, 0
                            yield s, last_out_cards, 0, 0, passive_response_input, 0, 0, 0, 0, 0, 2
                        else:
                            passive_decision_input = 0
                            yield s, last_out_cards, passive_decision_input, 0, 0, 0, 0, 0, 0, 0, 0

            else:
                seq_length = get_seq_length(category_idx, intention)

                # ACTIVE OFFSET ONE!
                active_decision_input = category_idx - 1
                active_response_input = intention[0] - 3
                yield s, last_out_cards, 0, 0, 0, active_decision_input, 0, 0, 0, 0, 3
                yield s, last_out_cards, 0, 0, 0, 0, active_response_input, 0, 0, 0, 4

                if seq_length is not None:
                    # length offset one
                    seq_length_input = seq_length - 1
                    yield s, last_out_cards, 0, 0, 0, 0, 0, seq_length_input, 0, 0, 5

            if minor_cards_targets is not None:
                main_cards = pick_main_cards(category_idx, to_char(intention))
                handcards = curr_cards_char.copy()
                state = s.copy()
                for main_card in main_cards:
                    handcards.remove(main_card)
                cards_onehot = Card.char2onehot60(main_cards)

                # we must make the order in each 4 batch correct...
                discard_onehot_from_s_60(state, cards_onehot)

                is_pair = False
                minor_type = 0
                if category_idx == Category.THREE_TWO.value or category_idx == Category.THREE_TWO_LINE.value:
                    is_pair = True
                    minor_type = 1
                for target in minor_cards_targets:
                    target_val = Card.char2value_3_17(target) - 3
                    yield state.copy(
                    ), last_out_cards, 0, 0, 0, 0, 0, 0, target_val, minor_type, 6
                    cards = [target]
                    handcards.remove(target)
                    if is_pair:
                        if target not in handcards:
                            print('something wrong...')
                            print('minor', target)
                            print('main_cards', main_cards)
                            print('handcards', handcards)
                            print('intention', intention)
                            print('category_idx', category_idx)
                        else:
                            handcards.remove(target)
                            cards.append(target)

                    # correct for one-hot state
                    cards_onehot = Card.char2onehot60(cards)

                    # print(s.shape)
                    # print(cards_onehot.shape)
                    discard_onehot_from_s_60(state, cards_onehot)
Exemple #9
0
def play_one_episode(env, func):
    env.reset()
    env.prepare()
    r = 0
    stats = [StatCounter() for _ in range(7)]
    while r == 0:
        last_cards_value = env.get_last_outcards()
        last_cards_char = to_char(last_cards_value)
        last_out_cards = Card.val2onehot60(last_cards_value)
        last_category_idx = env.get_last_outcategory_idx()
        curr_cards_char = to_char(env.get_curr_handcards())
        is_active = True if last_cards_value.size == 0 else False

        s = env.get_state_prob()
        intention, r, category_idx = env.step_auto()

        if category_idx == 14:
            continue
        minor_cards_targets = pick_minor_targets(category_idx,
                                                 to_char(intention))

        if not is_active:
            if category_idx == Category.QUADRIC.value and category_idx != last_category_idx:
                passive_decision_input = 1
                passive_bomb_input = intention[0] - 3
                passive_decision_prob, passive_bomb_prob, _, _, _, _, _ = func(
                    [
                        s.reshape(1, -1),
                        last_out_cards.reshape(1, -1),
                        np.zeros([s.shape[0]])
                    ])
                stats[0].feed(
                    int(passive_decision_input == np.argmax(
                        passive_decision_prob)))
                stats[1].feed(
                    int(passive_bomb_input == np.argmax(passive_bomb_prob)))

            else:
                if category_idx == Category.BIGBANG.value:
                    passive_decision_input = 2
                    passive_decision_prob, _, _, _, _, _, _ = func([
                        s.reshape(1, -1),
                        last_out_cards.reshape(1, -1),
                        np.zeros([s.shape[0]])
                    ])
                    stats[0].feed(
                        int(passive_decision_input == np.argmax(
                            passive_decision_prob)))
                else:
                    if category_idx != Category.EMPTY.value:
                        passive_decision_input = 3
                        # OFFSET_ONE
                        # 1st, Feb - remove relative card output since shift is hard for the network to learn
                        passive_response_input = intention[0] - 3
                        if passive_response_input < 0:
                            print("something bad happens")
                            passive_response_input = 0
                        passive_decision_prob, _, passive_response_prob, _, _, _, _ = func(
                            [
                                s.reshape(1, -1),
                                last_out_cards.reshape(1, -1),
                                np.zeros([s.shape[0]])
                            ])
                        stats[0].feed(
                            int(passive_decision_input == np.argmax(
                                passive_decision_prob)))
                        stats[2].feed(
                            int(passive_response_input == np.argmax(
                                passive_response_prob)))
                    else:
                        passive_decision_input = 0
                        passive_decision_prob, _, _, _, _, _, _ = func([
                            s.reshape(1, -1),
                            last_out_cards.reshape(1, -1),
                            np.zeros([s.shape[0]])
                        ])
                        stats[0].feed(
                            int(passive_decision_input == np.argmax(
                                passive_decision_prob)))

        else:
            seq_length = get_seq_length(category_idx, intention)

            # ACTIVE OFFSET ONE!
            active_decision_input = category_idx - 1
            active_response_input = intention[0] - 3
            _, _, _, active_decision_prob, active_response_prob, active_seq_prob, _ = func(
                [
                    s.reshape(1, -1),
                    last_out_cards.reshape(1, -1),
                    np.zeros([s.shape[0]])
                ])

            stats[3].feed(
                int(active_decision_input == np.argmax(active_decision_prob)))
            stats[4].feed(
                int(active_response_input == np.argmax(active_response_prob)))

            if seq_length is not None:
                # length offset one
                seq_length_input = seq_length - 1
                stats[5].feed(
                    int(seq_length_input == np.argmax(active_seq_prob)))

        if minor_cards_targets is not None:
            main_cards = pick_main_cards(category_idx, to_char(intention))
            handcards = curr_cards_char.copy()
            state = s.copy()
            for main_card in main_cards:
                handcards.remove(main_card)
            cards_onehot = Card.char2onehot60(main_cards)

            # we must make the order in each 4 batch correct...
            discard_onehot_from_s_60(state, cards_onehot)

            is_pair = False
            minor_type = 0
            if category_idx == Category.THREE_TWO.value or category_idx == Category.THREE_TWO_LINE.value:
                is_pair = True
                minor_type = 1
            for target in minor_cards_targets:
                target_val = Card.char2value_3_17(target) - 3
                _, _, _, _, _, _, minor_response_prob = func([
                    state.copy().reshape(1, -1),
                    last_out_cards.reshape(1, -1),
                    np.array([minor_type])
                ])
                stats[6].feed(
                    int(target_val == np.argmax(minor_response_prob)))
                cards = [target]
                handcards.remove(target)
                if is_pair:
                    if target not in handcards:
                        logger.warn('something wrong...')
                        logger.warn('minor', target)
                        logger.warn('main_cards', main_cards)
                        logger.warn('handcards', handcards)
                    else:
                        handcards.remove(target)
                        cards.append(target)

                # correct for one-hot state
                cards_onehot = Card.char2onehot60(cards)

                # print(s.shape)
                # print(cards_onehot.shape)
                discard_onehot_from_s_60(state, cards_onehot)
    return stats
Exemple #10
0
    def run(self):
        logger.info('simulator main loop')
        context = zmq.Context()

        sim2coord_socket = context.socket(zmq.PUSH)
        sim2coord_socket.setsockopt(zmq.IDENTITY, self.name.encode('utf-8'))
        sim2coord_socket.set_hwm(2)
        sim2coord_socket.connect(self.sim2coord)

        coord2sim_socket = context.socket(zmq.DEALER)
        coord2sim_socket.setsockopt(zmq.IDENTITY, self.name.encode('utf-8'))
        coord2sim_socket.set_hwm(2)
        coord2sim_socket.connect(self.coord2sim)

        sim2exp_sockets = []
        for sim2exp in self.sim2exps:
            sim2exp_socket = context.socket(zmq.PUSH)
            sim2exp_socket.setsockopt(zmq.IDENTITY, self.name.encode('utf-8'))
            sim2exp_socket.set_hwm(2)
            sim2exp_socket.connect(sim2exp)
            sim2exp_sockets.append(sim2exp_socket)

        sim2mgr_socket = context.socket(zmq.PUSH)
        sim2mgr_socket.setsockopt(zmq.IDENTITY, self.name.encode('utf-8'))
        sim2mgr_socket.set_hwm(2)
        sim2mgr_socket.connect(self.sim2mgr)

        mgr2sim_socket = context.socket(zmq.DEALER)
        mgr2sim_socket.setsockopt(zmq.IDENTITY, self.name.encode('utf-8'))
        mgr2sim_socket.set_hwm(2)
        mgr2sim_socket.connect(self.mgr2sim)

        # while True:
        #     time.sleep(0.3)
        #     print(self.name)
        #     sim2exp_sockets[1].send(dumps([self.name, 'haha']))

        # print('main loop')
        # while True:
        #     time.sleep(0.3)
        #     msg = loads(coord2sim_socket.recv(copy=False).bytes)
        #     print(msg)
            # sim2coord_socket.send(dumps([self.name, self.agent_names[0], np.arange(10)]))

        def request_screen():
            sim2mgr_socket.send(dumps([self.name, SimulatorManager.MSG_TYPE.SCREEN, []]))
            return loads(mgr2sim_socket.recv(copy=False).bytes)

        def request_click(bbox):
            sim2mgr_socket.send(dumps([self.name, SimulatorManager.MSG_TYPE.CLICK, [(bbox[0] + bbox[2]) // 2 + self.window_rect[0] + 6, (bbox[1] + bbox[3]) // 2 + self.window_rect[1] + 46]]))
            return loads(mgr2sim_socket.recv(copy=False).bytes)

        def request_lock():
            sim2mgr_socket.send(dumps([self.name, SimulatorManager.MSG_TYPE.LOCK, []]))
            return loads(mgr2sim_socket.recv(copy=False).bytes)

        def request_unlock():
            sim2mgr_socket.send(dumps([self.name, SimulatorManager.MSG_TYPE.UNLOCK, []]))
            return loads(mgr2sim_socket.recv(copy=False).bytes)

        def spin_lock_on_button():
            act = dict()
            while not act:
                self.current_screen = request_screen()
                cv2.imwrite('debug.png', self.current_screen)
                act = get_current_button_action(self.current_screen)
                if self.toggle.value == 0:
                    break

            return act

        def discard(act, bboxes, idxs):
            def diff(idxs, cards):
                res = []
                for i in range(len(cards)):
                    if cards[i] is not None:
                        if i in idxs:
                            res.append(i)
                    else:
                        if i not in idxs:
                            res.append(i)
                return res

            differences = diff(idxs, get_cards_bboxes(request_screen(), self.templates, bboxes=bboxes)[0])
            print(differences)
            request_lock()
            while len(differences) > 0:
                for d in differences:
                    request_click(bboxes[d])
                # request_click(bboxes[differences[0]])
                # time.sleep(0.3)
                differences = diff(idxs, get_cards_bboxes(request_screen(), self.templates, bboxes=bboxes)[0])
                print(differences)
            if 'chupai' in act:
                request_click(act['chupai'])
            elif 'alone_chupai' in act:
                request_click(act['alone_chupai'])
            elif 'ming_chupai' in act:
                request_click(act['ming_chupai'])
            request_unlock()

        game_cnt = 0
        while True:
            import psutil
            # print('memory usage is: ', psutil.virtual_memory())
            if self.toggle.value == 0:
                time.sleep(0.2)
                continue
            print('new round')
            self.current_screen = request_screen()

            act = spin_lock_on_button()
            if not act:
                continue
            print(act)
            if 'start' in act:
                request_click(act['start'])
                continue
            if self.state == Simulator.State.CALLING:
                # state has changed
                if 'reverse' in act:
                    self.state = Simulator.State.PLAYING
                    self.current_lord_pos = who_is_lord(self.current_screen)
                    while self.current_lord_pos < 0:
                        self.current_screen = request_screen()
                        self.current_lord_pos = who_is_lord(self.current_screen)
                        print('current lord pos ', self.current_lord_pos)
                        if self.toggle.value == 0:
                            break
                    continue
                if 'continuous defeat' in act:
                    request_click(act['continuous defeat'])
                    continue
                print('calling', act)
                handcards, _ = get_cards_bboxes(self.current_screen, self.templates, 0)
                cards_value, _ = CEnv.get_cards_value(Card.char2color(handcards))
                print('cards value: ', cards_value)
                # assert 'jiaodizhu' in act
                request_click(act['bujiao']) if cards_value < 10 else request_click(act['jiaodizhu'])
            elif self.state == Simulator.State.PLAYING:
                if 'defeat' in act or 'victory' in act:
                    request_click(act['defeat'] if 'defeat' in act else act['victory'])
                    if self.cached_msg is None:
                        print('other player wins in one step!!!')
                        continue
                    win = is_win(self.current_screen)
                    state, action, fine_mask = self.cached_msg
                    if win:
                        sim2exp_sockets[self.current_lord_pos].send(dumps([[state, state], action, 1, True, False, [fine_mask, fine_mask]]))
                        self.win_rates[self.agent_names[self.current_lord_pos]].feed(1.)
                    else:
                        sim2exp_sockets[self.current_lord_pos].send(dumps([[state, state], action, -1, True, False, [fine_mask, fine_mask]]))
                        self.win_rates[self.agent_names[self.current_lord_pos]].feed(0.)

                    game_cnt += 1
                    if game_cnt % 100 == 0:
                        for agent in self.agent_names:
                            if self.win_rates[agent].count > 0:
                                logger.info('[last-100]{} win rate: {}'.format(agent, self.win_rates[agent].average))
                                self.win_rates[agent].reset()

                    self.reset_episode()

                    continue
                # test if we have cached msg not sent

                print('playing', act)
                left_cards, _ = get_cards_bboxes(self.current_screen, self.mini_templates, 1)
                right_cards, _ = get_cards_bboxes(self.current_screen, self.mini_templates, 2)
                if None in left_cards or None in right_cards:
                    request_click(act['buchu'])
                    time.sleep(1.)
                    continue
                assert None not in left_cards
                assert None not in right_cards
                self.history[1].extend(right_cards)
                self.history[2].extend(left_cards)
                # last_cards = left_cards
                # if not left_cards:
                #     last_cards = right_cards
                # print('last cards', last_cards)
                total_cards = np.ones([60])
                total_cards[53:56] = 0
                total_cards[57:60] = 0
                handcards, bboxes = get_cards_bboxes(self.current_screen, self.templates, 0)
                handcards = [card for card in handcards if card is not None]
                remain_cards = total_cards - Card.char2onehot60(handcards + self.history[0] + self.history[1] + self.history[2])
                print('current handcards: ', handcards)
                # left_cnt, right_cnt = get_opponent_cnts(self.current_screen, self.tiny_templates)
                # print('left cnt: ', left_cnt, 'right cnt: ', right_cnt)
                left_cnt = 17 - len(self.history[2])
                right_cnt = 17 - len(self.history[1])
                if self.current_lord_pos == 1:
                    left_cnt += 3
                if self.current_lord_pos == 2:
                    right_cnt += 3
                # assert left_cnt > 0 and right_cnt > 0
                # to be the same as C++ side, right comes before left

                right_prob_state = remain_cards * (right_cnt / (left_cnt + right_cnt))
                left_prob_state = remain_cards * (left_cnt / (left_cnt + right_cnt))
                prob_state = np.concatenate([right_prob_state, left_prob_state])
                # assert prob_state.size == 120
                # assert np.all(prob_state < 1.) and np.all(prob_state >= 0.)
                # print(prob_state)
                intention, buffer_comb, buffer_fine = self.predictor.predict(handcards, [left_cards, right_cards], prob_state, self, sim2coord_socket, coord2sim_socket)
                if self.cached_msg is not None:
                    state, action, fine_mask = self.cached_msg
                    sim2exp_sockets[self.current_lord_pos].send(
                                               dumps([[state, buffer_comb[0]], action, 0, False, False,
                                                      [fine_mask, buffer_comb[2]]]))

                    sim2exp_sockets[self.current_lord_pos].send(
                                           dumps([[buffer_comb[0], buffer_fine[0]], buffer_comb[1], 0, False, True,

                                                  [buffer_comb[2], buffer_fine[2]]]))
                self.cached_msg = buffer_fine

                self.history[0].extend(intention)
                print('intention is: ', intention)
                intention.sort(key=lambda k: Card.cards_to_value[k])
                if len(intention) == 0:
                    request_click(act['buchu'])
                else:
                    i = 0
                    j = 0
                    to_click = []
                    to_click_idxs = []
                    while j < len(intention):
                        if handcards[i] == intention[j]:
                            to_click_idxs.append(i)
                            to_click.append(bboxes[i])
                            i += 1
                            j += 1
                        else:
                            i += 1
                    for bbox in to_click:
                        request_click(bbox)
                    time.sleep(0.5)
                    request_click([1310, 760, 1310, 760])
            time.sleep(1.)
Exemple #11
0
def play_one_episode(env, func):
    def take_action_from_prob(prob, mask):
        prob = prob[0]
        # to avoid numeric difficulty
        prob[mask == 0] = -1
        return np.argmax(prob)

    env.reset()
    # init_cards = np.arange(52)
    # init_cards = np.append(init_cards[::4], init_cards[1::4])
    # env.prepare_manual(init_cards)
    env.prepare()
    r = 0
    lstm_state = np.zeros([1024 * 2])
    while r == 0:
        last_cards_value = env.get_last_outcards()
        last_cards_char = to_char(last_cards_value)
        last_two_cards = env.get_last_two_cards()
        last_two_cards_onehot = np.concatenate([
            Card.val2onehot60(last_two_cards[0]),
            Card.val2onehot60(last_two_cards[1])
        ])
        curr_cards_char = to_char(env.get_curr_handcards())
        is_active = True if last_cards_value.size == 0 else False

        s = env.get_state_prob()
        s = np.concatenate([Card.char2onehot60(curr_cards_char), s])
        # print(s.shape)

        role_id = env.get_role_ID()
        # print('%s current cards' % ('lord' if role_id == 2 else 'farmer'), curr_cards_char)

        if role_id in ROLE_IDS_TO_TRAIN:
            if is_active:
                # first get mask
                mask = get_mask(curr_cards_char, action_space, None)
                # not valid for active
                mask[0] = 0

                active_prob, _, lstm_state = func(np.array([role_id]),
                                                  s.reshape(1, -1),
                                                  np.zeros([1, 120]),
                                                  lstm_state.reshape(1, -1))

                # make decision depending on output
                action_idx = take_action_from_prob(active_prob, mask)
            else:
                # print('last cards char', last_cards_char)
                mask = get_mask(curr_cards_char, action_space, last_cards_char)

                _, passive_prob, lstm_state = func(
                    np.array([role_id]), s.reshape(1, -1),
                    last_two_cards_onehot.reshape(1, -1),
                    lstm_state.reshape(1, -1))

                action_idx = take_action_from_prob(passive_prob, mask)

            # since step auto needs full last card group info, we do not explicitly feed card type
            intention = to_value(action_space[action_idx])
            r, _, _ = env.step_manual(intention)
            # print('lord gives', to_char(intention))
            assert (intention is not None)
        else:
            intention, r, _ = env.step_auto()
            # print('farmer gives', to_char(intention))
    # if r > 0:
    #     print('farmer wins')
    # else:
    #     print('lord wins')
    return int(r > 0)