Beispiel #1
0
    def get_input_representation(self, embeddings):
        """Get fact (sentence) vectors via embedding, positional encoding and bi-directional GRU"""
        # get word vectors from embedding
        inputs = tf.nn.embedding_lookup(embeddings, self.input_placeholder)

        #https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/rnn/python/ops/rnn_cell.py#L1005
        #https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py#L382

        encoded_inputs = []
        inputs = tf.split(1, self.max_input_len, inputs)
        inputs = [tf.squeeze(i, squeeze_dims=[1]) for i in inputs]
        '''need to change this into tf.while_loop()'''
        '''tf.make_template can solve this sharing issue'''
        shared_bidirectional_dynamic_rnn = tf.make_template(
            'shared_bidirectional_dynamic_rnn',
            tf.nn.bidirectional_dynamic_rnn)

        for idx, raw_i in enumerate(inputs):
            '''MIGHT WANT USE WAVENET/BYTENET ENCODER HERE INSTEAD BECAUSE IT SEEMS TO HAVE IMPLICIT ALIGNMENT'''
            '''https://arxiv.org/pdf/1601.06733.pdf apparently being able to attend to individual words (LSTMN h_states) (as oppsosed to last hstate) after attentive encoding only improves by 0.2%, so not worth adding intra for after encode, only during'''

            pre_fact_fw_and_bw, _ = shared_bidirectional_dynamic_rnn(
                self.intra_attention_GRU_cell_fw,
                self.intra_attention_GRU_cell_bw,
                raw_i,
                dtype=np.float32,
                sequence_length=get_seq_length(raw_i))

            #they reversed it twice, once before and once after: look at https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/rnn.py#L672 and L663
            fw_encode = tf.transpose(
                pre_fact_fw_and_bw[0], perm=[1, 0, 2]
            )[-1]  #hidden state at last word of sent when conditioned 0 to _length_
            bw_encode = tf.transpose(
                pre_fact_fw_and_bw[1], perm=[1, 0, 2]
            )[0]  #hidden state at first word of sent when conditioned _length_ to 0

            encoded_inputs.append(fw_encode + bw_encode)

        pre_facts = tf.transpose(tf.pack(encoded_inputs), perm=[1, 0, 2])
        '''WOULD PUTTING INTRA SENTENCE ATTN HERE BE REDUNDANT OR PERFORMANT'''
        '''do you need this scope?'''
        outputs_fw_and_bw, _ = tf.nn.bidirectional_dynamic_rnn(
            self.gru_cell,
            self.gru_cell,
            pre_facts,
            dtype=np.float32,
            sequence_length=get_seq_length(pre_facts))

        fact_vecs = outputs_fw_and_bw[0] + outputs_fw_and_bw[1]

        fact_vecs = tf.nn.dropout(fact_vecs, self.dropout_placeholder)

        return fact_vecs
Beispiel #2
0
 def step(self, action):
     if self.act == ACT_TYPE.PASSIVE:
         if self.mode == MODE.PASSIVE_DECISION:
             if action == 0 or action == 2:
                 self.finished = True
                 if action == 2:
                     self.intention = np.array([16, 17])
                     self.card_type = Category.BIGBANG.value
                 else:
                     self.card_type = Category.EMPTY.value
                 return
             elif action == 1:
                 self.mode = MODE.PASSIVE_BOMB
                 return
             elif action == 3:
                 self.mode = MODE.PASSIVE_RESPONSE
                 return
             else:
                 raise Exception('unexpected action')
         elif self.mode == MODE.PASSIVE_BOMB:
             # convert to value input
             self.intention = np.array([action + 3] * 4)
             self.finished = True
             self.card_type = Category.QUADRIC.value
             return
         elif self.mode == MODE.PASSIVE_RESPONSE:
             self.intention = give_cards_without_minor(
                 action, self.last_cards_value, self.category, None)
             if self.category == Category.THREE_ONE.value or \
                     self.category == Category.THREE_TWO.value or \
                     self.category == Category.THREE_ONE_LINE.value or \
                     self.category == Category.THREE_TWO_LINE.value or \
                     self.category == Category.FOUR_TWO.value:
                 if self.category == Category.THREE_TWO.value or self.category == Category.THREE_TWO_LINE.value:
                     self.minor_type = 1
                 self.mode = MODE.MINOR_RESPONSE
                 # modify the state for minor cards
                 # discard_onehot_from_s_60(self.prob_state, Card.val2onehot60(self.intention))
                 intention_char = to_char(self.intention)
                 for c in intention_char:
                     self.handcards_char.remove(c)
                 self.minor_length = get_seq_length(self.category,
                                                    self.last_cards_value)
                 if self.minor_length is None:
                     self.minor_length = 2 if self.category == Category.FOUR_TWO.value else 1
                 self.card_type = self.category
                 return
             else:
                 self.finished = True
                 self.card_type = self.category
                 return
         elif self.mode == MODE.MINOR_RESPONSE:
             minor_value_cards = [action + 3
                                  ] * (1 if self.minor_type == 0 else 2)
             # modify the state for minor cards
             minor_char = to_char(minor_value_cards)
             for c in minor_char:
                 self.handcards_char.remove(c)
             # discard_onehot_from_s_60(self.prob_state, Card.val2onehot60(minor_value_cards))
             self.intention = np.append(self.intention, minor_value_cards)
             assert self.minor_length > 0
             self.minor_length -= 1
             if self.minor_length == 0:
                 self.finished = True
                 return
             else:
                 return
     elif self.act == ACT_TYPE.ACTIVE:
         if self.mode == MODE.ACTIVE_DECISION:
             self.category = action + 1
             self.active_decision = action
             self.mode = MODE.ACTIVE_RESPONSE
             self.card_type = self.category
             return
         elif self.mode == MODE.ACTIVE_RESPONSE:
             if self.category == Category.SINGLE_LINE.value or \
                     self.category == Category.DOUBLE_LINE.value or \
                     self.category == Category.TRIPLE_LINE.value or \
                     self.category == Category.THREE_ONE_LINE.value or \
                     self.category == Category.THREE_TWO_LINE.value:
                 self.active_response = action
                 self.mode = MODE.ACTIVE_SEQ
                 return
             elif self.category == Category.THREE_ONE.value or \
                     self.category == Category.THREE_TWO.value or \
                     self.category == Category.FOUR_TWO.value:
                 if self.category == Category.THREE_TWO.value or self.category == Category.THREE_TWO_LINE.value:
                     self.minor_type = 1
                 self.mode = MODE.MINOR_RESPONSE
                 self.intention = give_cards_without_minor(
                     action, np.array([]), self.category, None)
                 # modify the state for minor cards
                 intention_char = to_char(self.intention)
                 for c in intention_char:
                     self.handcards_char.remove(c)
                 # discard_onehot_from_s_60(self.prob_state, Card.val2onehot60(self.intention))
                 self.minor_length = 2 if self.category == Category.FOUR_TWO.value else 1
                 return
             else:
                 self.intention = give_cards_without_minor(
                     action, np.array([]), self.category, None)
                 self.finished = True
                 return
         elif self.mode == MODE.ACTIVE_SEQ:
             self.minor_length = action + 1
             self.intention = give_cards_without_minor(
                 self.active_response, np.array([]), self.category,
                 action + 1)
             if self.category == Category.THREE_ONE_LINE.value or \
                     self.category == Category.THREE_TWO_LINE.value:
                 if self.category == Category.THREE_TWO.value or self.category == Category.THREE_TWO_LINE.value:
                     self.minor_type = 1
                 self.mode = MODE.MINOR_RESPONSE
                 # modify the state for minor cards
                 intention_char = to_char(self.intention)
                 for c in intention_char:
                     self.handcards_char.remove(c)
                 # discard_onehot_from_s_60(self.prob_state, Card.val2onehot60(self.intention))
             else:
                 self.finished = True
             return
         elif self.mode == MODE.MINOR_RESPONSE:
             minor_value_cards = [action + 3
                                  ] * (1 if self.minor_type == 0 else 2)
             # modify the state for minor cards
             minor_char = to_char(minor_value_cards)
             for c in minor_char:
                 self.handcards_char.remove(c)
             # discard_onehot_from_s_60(self.prob_state, Card.val2onehot60(minor_value_cards))
             self.intention = np.append(self.intention, minor_value_cards)
             assert self.minor_length > 0
             self.minor_length -= 1
             if self.minor_length == 0:
                 self.finished = True
                 return
             else:
                 return
Beispiel #3
0
def data_generator(rng):
    env = Env(rng.randint(1 << 31))
    # logger.info('called')

    while True:
        env.reset()
        env.prepare()
        r = 0
        while r == 0:
            last_cards_value = env.get_last_outcards()
            last_cards_char = to_char(last_cards_value)
            last_out_cards = Card.val2onehot60(last_cards_value)
            last_category_idx = env.get_last_outcategory_idx()
            curr_cards_char = to_char(env.get_curr_handcards())
            is_active = True if last_cards_value.size == 0 else False

            s = env.get_state_prob()
            # s = s[:60]
            intention, r, category_idx = env.step_auto()

            if category_idx == 14:
                continue
            minor_cards_targets = pick_minor_targets(category_idx,
                                                     to_char(intention))
            # self, state, last_cards, passive_decision_target, passive_bomb_target, passive_response_target,
            # active_decision_target, active_response_target, seq_length_target, minor_response_target, minor_type, mode
            if not is_active:
                if category_idx == Category.QUADRIC.value and category_idx != last_category_idx:
                    passive_decision_input = 1
                    passive_bomb_input = intention[0] - 3
                    yield s, last_out_cards, passive_decision_input, 0, 0, 0, 0, 0, 0, 0, 0
                    yield s, last_out_cards, 0, passive_bomb_input, 0, 0, 0, 0, 0, 0, 1

                else:
                    if category_idx == Category.BIGBANG.value:
                        passive_decision_input = 2
                        yield s, last_out_cards, passive_decision_input, 0, 0, 0, 0, 0, 0, 0, 0
                    else:
                        if category_idx != Category.EMPTY.value:
                            passive_decision_input = 3
                            # OFFSET_ONE
                            # 1st, Feb - remove relative card output since shift is hard for the network to learn
                            passive_response_input = intention[0] - 3
                            if passive_response_input < 0:
                                print("something bad happens")
                                passive_response_input = 0
                            yield s, last_out_cards, passive_decision_input, 0, 0, 0, 0, 0, 0, 0, 0
                            yield s, last_out_cards, 0, 0, passive_response_input, 0, 0, 0, 0, 0, 2
                        else:
                            passive_decision_input = 0
                            yield s, last_out_cards, passive_decision_input, 0, 0, 0, 0, 0, 0, 0, 0

            else:
                seq_length = get_seq_length(category_idx, intention)

                # ACTIVE OFFSET ONE!
                active_decision_input = category_idx - 1
                active_response_input = intention[0] - 3
                yield s, last_out_cards, 0, 0, 0, active_decision_input, 0, 0, 0, 0, 3
                yield s, last_out_cards, 0, 0, 0, 0, active_response_input, 0, 0, 0, 4

                if seq_length is not None:
                    # length offset one
                    seq_length_input = seq_length - 1
                    yield s, last_out_cards, 0, 0, 0, 0, 0, seq_length_input, 0, 0, 5

            if minor_cards_targets is not None:
                main_cards = pick_main_cards(category_idx, to_char(intention))
                handcards = curr_cards_char.copy()
                state = s.copy()
                for main_card in main_cards:
                    handcards.remove(main_card)
                cards_onehot = Card.char2onehot60(main_cards)

                # we must make the order in each 4 batch correct...
                discard_onehot_from_s_60(state, cards_onehot)

                is_pair = False
                minor_type = 0
                if category_idx == Category.THREE_TWO.value or category_idx == Category.THREE_TWO_LINE.value:
                    is_pair = True
                    minor_type = 1
                for target in minor_cards_targets:
                    target_val = Card.char2value_3_17(target) - 3
                    yield state.copy(
                    ), last_out_cards, 0, 0, 0, 0, 0, 0, target_val, minor_type, 6
                    cards = [target]
                    handcards.remove(target)
                    if is_pair:
                        if target not in handcards:
                            print('something wrong...')
                            print('minor', target)
                            print('main_cards', main_cards)
                            print('handcards', handcards)
                            print('intention', intention)
                            print('category_idx', category_idx)
                        else:
                            handcards.remove(target)
                            cards.append(target)

                    # correct for one-hot state
                    cards_onehot = Card.char2onehot60(cards)

                    # print(s.shape)
                    # print(cards_onehot.shape)
                    discard_onehot_from_s_60(state, cards_onehot)
Beispiel #4
0
def play_one_episode(env, func):
    def take_action_from_prob(prob, mask):
        prob = prob[0]
        # to avoid numeric difficulty
        prob[mask == 0] = -1
        return np.argmax(prob)

    # return char minor cards output
    def inference_minor_util60(role_id, handcards, num, is_pair, dup_mask, main_cards_char):
        for main_card in main_cards_char:
            handcards.remove(main_card)

        s = get_mask(handcards, action_space, None).astype(np.float32)
        outputs = []
        minor_type = 1 if is_pair else 0
        for i in range(num):
            input_single, input_pair, _, _ = get_masks(handcards, None)
            _, _, _, _, _, _, minor_response_prob = func(
                [np.array([role_id]), s.reshape(1, -1), np.zeros([1, 9085]), np.array([minor_type])]
            )

            # give minor cards
            mask = None
            if is_pair:
                mask = np.concatenate([input_pair, [0, 0]]) * dup_mask
            else:
                mask = input_single * dup_mask

            minor_response = take_action_from_prob(minor_response_prob, mask)
            dup_mask[minor_response] = 0

            # convert network output to char cards
            handcards.remove(to_char(minor_response + 3))
            if is_pair:
                handcards.remove(to_char(minor_response + 3))
            s = get_mask(handcards, action_space, None).astype(np.float32)

            # save to output
            outputs.append(to_char(minor_response + 3))
            if is_pair:
                outputs.append(to_char(minor_response + 3))
        return outputs

    def inference_minor_cards60(role_id, category, s, handcards, seq_length, dup_mask, main_cards_char):
        if category == Category.THREE_ONE.value:
            return inference_minor_util60(role_id, handcards, 1, False, dup_mask, main_cards_char)
        if category == Category.THREE_TWO.value:
            return inference_minor_util60(role_id, handcards, 1, True, dup_mask, main_cards_char)
        if category == Category.THREE_ONE_LINE.value:
            return inference_minor_util60(role_id, handcards, seq_length, False, dup_mask, main_cards_char)
        if category == Category.THREE_TWO_LINE.value:
            return inference_minor_util60(role_id, handcards, seq_length, True, dup_mask, main_cards_char)
        if category == Category.FOUR_TWO.value:
            return inference_minor_util60(role_id, handcards, 2, False, dup_mask, main_cards_char)

    env.reset()
    init_cards = np.arange(21)
    # init_cards = np.append(init_cards[::4], init_cards[1::4])
    env.prepare_manual(init_cards)
    r = 0
    while r == 0:
        last_cards_value = env.get_last_outcards()
        last_cards_char = to_char(last_cards_value)
        last_out_cards = Card.val2onehot60(last_cards_value)
        last_category_idx = env.get_last_outcategory_idx()
        curr_cards_char = to_char(env.get_curr_handcards())
        is_active = True if last_cards_value.size == 0 else False

        s = get_mask(curr_cards_char, action_space, None if is_active else last_cards_char).astype(np.float32)
        last_state = get_mask(last_cards_char, action_space, None).astype(np.float32)
        # print(s.shape)

        role_id = env.get_role_ID()
        # print('%s current cards' % ('lord' if role_id == 2 else 'farmer'), curr_cards_char)

        intention = None
        if role_id == 2:
            if is_active:

                # first get mask
                decision_mask, response_mask, _, length_mask = get_mask_alter(curr_cards_char, [], last_category_idx)

                _, _, _, active_decision_prob, active_response_prob, active_seq_prob, _ = func(
                    [np.array([role_id]), s.reshape(1, -1), np.zeros([1, 9085]), np.zeros([s.shape[0]])]
                )

                # make decision depending on output
                active_decision = take_action_from_prob(active_decision_prob, decision_mask)

                active_category_idx = active_decision + 1

                # get response
                active_response = take_action_from_prob(active_response_prob, response_mask[active_decision])

                seq_length = 0
                # next sequence length
                if active_category_idx == Category.SINGLE_LINE.value or \
                        active_category_idx == Category.DOUBLE_LINE.value or \
                        active_category_idx == Category.TRIPLE_LINE.value or \
                        active_category_idx == Category.THREE_ONE_LINE.value or \
                        active_category_idx == Category.THREE_TWO_LINE.value:
                    seq_length = take_action_from_prob(active_seq_prob, length_mask[active_decision][active_response]) + 1

                # give main cards
                intention = give_cards_without_minor(active_response, last_cards_value, active_category_idx, seq_length)

                # then give minor cards
                if active_category_idx == Category.THREE_ONE.value or \
                        active_category_idx == Category.THREE_TWO.value or \
                        active_category_idx == Category.THREE_ONE_LINE.value or \
                        active_category_idx == Category.THREE_TWO_LINE.value or \
                        active_category_idx == Category.FOUR_TWO.value:
                    dup_mask = np.ones([15])
                    if seq_length > 0:
                        for i in range(seq_length):
                            dup_mask[intention[0] - 3 + i] = 0
                    else:
                        dup_mask[intention[0] - 3] = 0
                    intention = np.concatenate([intention,
                                                to_value(inference_minor_cards60(role_id, active_category_idx, s.copy(),
                                                                                 curr_cards_char.copy(), seq_length,
                                                                                 dup_mask, to_char(intention)))])
            else:
                # print(to_char(last_cards_value), is_bomb, last_category_idx)
                decision_mask, response_mask, bomb_mask, _ = get_mask_alter(curr_cards_char, to_char(last_cards_value),
                                                                            last_category_idx)

                passive_decision_prob, passive_bomb_prob, passive_response_prob, _, _, _, _ = func(
                    [np.array([role_id]), s.reshape(1, -1), last_state.reshape(1, -1), np.zeros([s.shape[0]])])

                passive_decision = take_action_from_prob(passive_decision_prob, decision_mask)

                if passive_decision == 0:
                    intention = np.array([])
                elif passive_decision == 1:

                    passive_bomb = take_action_from_prob(passive_bomb_prob, bomb_mask)

                    # converting 0-based index to 3-based value
                    intention = np.array([passive_bomb + 3] * 4)

                elif passive_decision == 2:
                    intention = np.array([16, 17])
                elif passive_decision == 3:
                    passive_response = take_action_from_prob(passive_response_prob, response_mask)

                    intention = give_cards_without_minor(passive_response, last_cards_value, last_category_idx, None)
                    if last_category_idx == Category.THREE_ONE.value or \
                            last_category_idx == Category.THREE_TWO.value or \
                            last_category_idx == Category.THREE_ONE_LINE.value or \
                            last_category_idx == Category.THREE_TWO_LINE.value or \
                            last_category_idx == Category.FOUR_TWO.value:
                        dup_mask = np.ones([15])
                        seq_length = get_seq_length(last_category_idx, last_cards_value)
                        if seq_length:
                            for i in range(seq_length):
                                dup_mask[intention[0] - 3 + i] = 0
                        else:
                            dup_mask[intention[0] - 3] = 0
                        intention = np.concatenate([intention,
                                                    to_value(inference_minor_cards60(role_id, last_category_idx, s.copy(),
                                                                                     curr_cards_char.copy(), seq_length,
                                                                                     dup_mask, to_char(intention)))])
            # since step auto needs full last card group info, we do not explicitly feed card type
            r, _, _ = env.step_manual(intention)
            # print('lord gives', to_char(intention))
            assert (intention is not None)
        else:
            intention, r, _ = env.step_auto()
            # print('farmer gives', to_char(intention))
    # if r > 0:
    #     print('farmer wins')
    # else:
    #     print('lord wins')
    return int(r > 0)
Beispiel #5
0
def play_one_episode(env, func):
    env.reset()
    env.prepare()
    r = 0
    stats = [StatCounter() for _ in range(7)]
    while r == 0:
        last_cards_value = env.get_last_outcards()
        last_cards_char = to_char(last_cards_value)
        last_out_cards = Card.val2onehot60(last_cards_value)
        last_category_idx = env.get_last_outcategory_idx()
        curr_cards_char = to_char(env.get_curr_handcards())
        is_active = True if last_cards_value.size == 0 else False

        s = env.get_state_prob()
        intention, r, category_idx = env.step_auto()

        if category_idx == 14:
            continue
        minor_cards_targets = pick_minor_targets(category_idx,
                                                 to_char(intention))

        if not is_active:
            if category_idx == Category.QUADRIC.value and category_idx != last_category_idx:
                passive_decision_input = 1
                passive_bomb_input = intention[0] - 3
                passive_decision_prob, passive_bomb_prob, _, _, _, _, _ = func(
                    [
                        s.reshape(1, -1),
                        last_out_cards.reshape(1, -1),
                        np.zeros([s.shape[0]])
                    ])
                stats[0].feed(
                    int(passive_decision_input == np.argmax(
                        passive_decision_prob)))
                stats[1].feed(
                    int(passive_bomb_input == np.argmax(passive_bomb_prob)))

            else:
                if category_idx == Category.BIGBANG.value:
                    passive_decision_input = 2
                    passive_decision_prob, _, _, _, _, _, _ = func([
                        s.reshape(1, -1),
                        last_out_cards.reshape(1, -1),
                        np.zeros([s.shape[0]])
                    ])
                    stats[0].feed(
                        int(passive_decision_input == np.argmax(
                            passive_decision_prob)))
                else:
                    if category_idx != Category.EMPTY.value:
                        passive_decision_input = 3
                        # OFFSET_ONE
                        # 1st, Feb - remove relative card output since shift is hard for the network to learn
                        passive_response_input = intention[0] - 3
                        if passive_response_input < 0:
                            print("something bad happens")
                            passive_response_input = 0
                        passive_decision_prob, _, passive_response_prob, _, _, _, _ = func(
                            [
                                s.reshape(1, -1),
                                last_out_cards.reshape(1, -1),
                                np.zeros([s.shape[0]])
                            ])
                        stats[0].feed(
                            int(passive_decision_input == np.argmax(
                                passive_decision_prob)))
                        stats[2].feed(
                            int(passive_response_input == np.argmax(
                                passive_response_prob)))
                    else:
                        passive_decision_input = 0
                        passive_decision_prob, _, _, _, _, _, _ = func([
                            s.reshape(1, -1),
                            last_out_cards.reshape(1, -1),
                            np.zeros([s.shape[0]])
                        ])
                        stats[0].feed(
                            int(passive_decision_input == np.argmax(
                                passive_decision_prob)))

        else:
            seq_length = get_seq_length(category_idx, intention)

            # ACTIVE OFFSET ONE!
            active_decision_input = category_idx - 1
            active_response_input = intention[0] - 3
            _, _, _, active_decision_prob, active_response_prob, active_seq_prob, _ = func(
                [
                    s.reshape(1, -1),
                    last_out_cards.reshape(1, -1),
                    np.zeros([s.shape[0]])
                ])

            stats[3].feed(
                int(active_decision_input == np.argmax(active_decision_prob)))
            stats[4].feed(
                int(active_response_input == np.argmax(active_response_prob)))

            if seq_length is not None:
                # length offset one
                seq_length_input = seq_length - 1
                stats[5].feed(
                    int(seq_length_input == np.argmax(active_seq_prob)))

        if minor_cards_targets is not None:
            main_cards = pick_main_cards(category_idx, to_char(intention))
            handcards = curr_cards_char.copy()
            state = s.copy()
            for main_card in main_cards:
                handcards.remove(main_card)
            cards_onehot = Card.char2onehot60(main_cards)

            # we must make the order in each 4 batch correct...
            discard_onehot_from_s_60(state, cards_onehot)

            is_pair = False
            minor_type = 0
            if category_idx == Category.THREE_TWO.value or category_idx == Category.THREE_TWO_LINE.value:
                is_pair = True
                minor_type = 1
            for target in minor_cards_targets:
                target_val = Card.char2value_3_17(target) - 3
                _, _, _, _, _, _, minor_response_prob = func([
                    state.copy().reshape(1, -1),
                    last_out_cards.reshape(1, -1),
                    np.array([minor_type])
                ])
                stats[6].feed(
                    int(target_val == np.argmax(minor_response_prob)))
                cards = [target]
                handcards.remove(target)
                if is_pair:
                    if target not in handcards:
                        logger.warn('something wrong...')
                        logger.warn('minor', target)
                        logger.warn('main_cards', main_cards)
                        logger.warn('handcards', handcards)
                    else:
                        handcards.remove(target)
                        cards.append(target)

                # correct for one-hot state
                cards_onehot = Card.char2onehot60(cards)

                # print(s.shape)
                # print(cards_onehot.shape)
                discard_onehot_from_s_60(state, cards_onehot)
    return stats
    def get_input_representation(self, embeddings):
        """Get fact (sentence) vectors via embedding, positional encoding and bi-directional GRU"""
        # get word vectors from embedding
        inputs = tf.nn.embedding_lookup(embeddings, self.input_placeholder)
        '''
        # use encoding to get sentence representation
        inputs = tf.reduce_sum(inputs * self.encoding, 2)
        '''

        #https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/rnn/python/ops/rnn_cell.py#L1005
        #https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py#L382
        '''EXPERIMENT WITH PUTTING INTRA SENTENCE ATTN ON bi-directional WRAPPER AS OPPOSED TO GRU WRAPPER'''
        '''and maybe allow reverse phase during birnn to attend to forward phases h_states'''

        #print self.intra_attention_GRU_cell_fw
        encoded_inputs = []
        inputs = tf.split(1, self.max_input_len, inputs)
        inputs = [tf.squeeze(i, squeeze_dims=[1]) for i in inputs]
        '''need to change this into tf.while_loop()'''
        '''tf.make_template can solve this sharing issue'''
        shared_bidirectional_dynamic_rnn = tf.make_template(
            'shared_bidirectional_dynamic_rnn',
            tf.nn.bidirectional_dynamic_rnn)

        for idx, raw_i in enumerate(inputs):
            #for idx, raw_i in enumerate(inputs[1:3]):
            '''MIGHT WANT USE WAVENET/BYTENET ENCODER HERE INSTEAD BECAUSE IT SEEMS TO HAVE IMPLICIT ALIGNMENT'''
            '''https://arxiv.org/pdf/1601.06733.pdf apparently being able to attend to individual words (LSTMN h_states) (as oppsosed to last hstate) after attentive encoding only improves by 0.2%, so not worth adding intra for after encode, only during'''
            '''need to check this, why does output yield  (100, 46, 80), but state (final) yield (?, 80), shouldn't final_state be (100, 80)'''
            #_, pre_fact_fw_and_bw = shared_bidirectional_dynamic_rnn(self.intra_attention_GRU_cell_fw, self.intra_attention_GRU_cell_bw, raw_i, dtype=np.float32, sequence_length=get_seq_length(raw_i))
            #pre_fact_fw_and_bw, _ = shared_bidirectional_dynamic_rnn(self.intra_attention_GRU_cell_fw, self.intra_attention_GRU_cell_fw, raw_i, dtype=np.float32, sequence_length=get_seq_length(raw_i))
            pre_fact_fw_and_bw, _ = shared_bidirectional_dynamic_rnn(
                self.intra_attention_GRU_cell_fw,
                self.intra_attention_GRU_cell_bw,
                raw_i,
                dtype=np.float32,
                sequence_length=get_seq_length(raw_i))
            '''HOW BIRNN TYING SUPPOSED TO WORK? IN RNNSEARCH, WEIGHT ARE UNTIED, BUT WORD EMBEDDING INPUTS ARE TIED: arxiv.org/pdf/1409.0473v7.pdf'''
            '''^MAYBE ADD AUXILLARY TASK OF AUTENCODING SENTENCES (LIKE IN NEURAL REASONER) TO PREVENT OVERFITTING'''

            #they reversed it twice, once before and once after: look at https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/rnn.py#L672 and L663
            fw_encode = tf.transpose(
                pre_fact_fw_and_bw[0], perm=[1, 0, 2]
            )[-1]  #hidden state at last word of sent when conditioned 0 to _length_
            bw_encode = tf.transpose(
                pre_fact_fw_and_bw[1], perm=[1, 0, 2]
            )[0]  #hidden state at first word of sent when conditioned _length_ to 0

            encoded_inputs.append(fw_encode + bw_encode)

        pre_facts = tf.transpose(tf.pack(encoded_inputs), perm=[1, 0, 2])
        '''WOULD PUTTING INTRA SENTENCE ATTN HERE BE REDUNDANT OR PERFORMANT'''
        '''do you need this scope?'''
        #with tf.variable_scope("input_fusion", reuse=True, initializer=_xavier_weight_init()):
        outputs_fw_and_bw, _ = tf.nn.bidirectional_dynamic_rnn(
            self.gru_cell,
            self.gru_cell,
            pre_facts,
            dtype=np.float32,
            sequence_length=get_seq_length(pre_facts))

        fact_vecs = outputs_fw_and_bw[0] + outputs_fw_and_bw[1]

        fact_vecs = tf.nn.dropout(fact_vecs, self.dropout_placeholder)

        return fact_vecs