def get_input_representation(self, embeddings): """Get fact (sentence) vectors via embedding, positional encoding and bi-directional GRU""" # get word vectors from embedding inputs = tf.nn.embedding_lookup(embeddings, self.input_placeholder) #https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/rnn/python/ops/rnn_cell.py#L1005 #https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py#L382 encoded_inputs = [] inputs = tf.split(1, self.max_input_len, inputs) inputs = [tf.squeeze(i, squeeze_dims=[1]) for i in inputs] '''need to change this into tf.while_loop()''' '''tf.make_template can solve this sharing issue''' shared_bidirectional_dynamic_rnn = tf.make_template( 'shared_bidirectional_dynamic_rnn', tf.nn.bidirectional_dynamic_rnn) for idx, raw_i in enumerate(inputs): '''MIGHT WANT USE WAVENET/BYTENET ENCODER HERE INSTEAD BECAUSE IT SEEMS TO HAVE IMPLICIT ALIGNMENT''' '''https://arxiv.org/pdf/1601.06733.pdf apparently being able to attend to individual words (LSTMN h_states) (as oppsosed to last hstate) after attentive encoding only improves by 0.2%, so not worth adding intra for after encode, only during''' pre_fact_fw_and_bw, _ = shared_bidirectional_dynamic_rnn( self.intra_attention_GRU_cell_fw, self.intra_attention_GRU_cell_bw, raw_i, dtype=np.float32, sequence_length=get_seq_length(raw_i)) #they reversed it twice, once before and once after: look at https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/rnn.py#L672 and L663 fw_encode = tf.transpose( pre_fact_fw_and_bw[0], perm=[1, 0, 2] )[-1] #hidden state at last word of sent when conditioned 0 to _length_ bw_encode = tf.transpose( pre_fact_fw_and_bw[1], perm=[1, 0, 2] )[0] #hidden state at first word of sent when conditioned _length_ to 0 encoded_inputs.append(fw_encode + bw_encode) pre_facts = tf.transpose(tf.pack(encoded_inputs), perm=[1, 0, 2]) '''WOULD PUTTING INTRA SENTENCE ATTN HERE BE REDUNDANT OR PERFORMANT''' '''do you need this scope?''' outputs_fw_and_bw, _ = tf.nn.bidirectional_dynamic_rnn( self.gru_cell, self.gru_cell, pre_facts, dtype=np.float32, sequence_length=get_seq_length(pre_facts)) fact_vecs = outputs_fw_and_bw[0] + outputs_fw_and_bw[1] fact_vecs = tf.nn.dropout(fact_vecs, self.dropout_placeholder) return fact_vecs
def step(self, action): if self.act == ACT_TYPE.PASSIVE: if self.mode == MODE.PASSIVE_DECISION: if action == 0 or action == 2: self.finished = True if action == 2: self.intention = np.array([16, 17]) self.card_type = Category.BIGBANG.value else: self.card_type = Category.EMPTY.value return elif action == 1: self.mode = MODE.PASSIVE_BOMB return elif action == 3: self.mode = MODE.PASSIVE_RESPONSE return else: raise Exception('unexpected action') elif self.mode == MODE.PASSIVE_BOMB: # convert to value input self.intention = np.array([action + 3] * 4) self.finished = True self.card_type = Category.QUADRIC.value return elif self.mode == MODE.PASSIVE_RESPONSE: self.intention = give_cards_without_minor( action, self.last_cards_value, self.category, None) if self.category == Category.THREE_ONE.value or \ self.category == Category.THREE_TWO.value or \ self.category == Category.THREE_ONE_LINE.value or \ self.category == Category.THREE_TWO_LINE.value or \ self.category == Category.FOUR_TWO.value: if self.category == Category.THREE_TWO.value or self.category == Category.THREE_TWO_LINE.value: self.minor_type = 1 self.mode = MODE.MINOR_RESPONSE # modify the state for minor cards # discard_onehot_from_s_60(self.prob_state, Card.val2onehot60(self.intention)) intention_char = to_char(self.intention) for c in intention_char: self.handcards_char.remove(c) self.minor_length = get_seq_length(self.category, self.last_cards_value) if self.minor_length is None: self.minor_length = 2 if self.category == Category.FOUR_TWO.value else 1 self.card_type = self.category return else: self.finished = True self.card_type = self.category return elif self.mode == MODE.MINOR_RESPONSE: minor_value_cards = [action + 3 ] * (1 if self.minor_type == 0 else 2) # modify the state for minor cards minor_char = to_char(minor_value_cards) for c in minor_char: self.handcards_char.remove(c) # discard_onehot_from_s_60(self.prob_state, Card.val2onehot60(minor_value_cards)) self.intention = np.append(self.intention, minor_value_cards) assert self.minor_length > 0 self.minor_length -= 1 if self.minor_length == 0: self.finished = True return else: return elif self.act == ACT_TYPE.ACTIVE: if self.mode == MODE.ACTIVE_DECISION: self.category = action + 1 self.active_decision = action self.mode = MODE.ACTIVE_RESPONSE self.card_type = self.category return elif self.mode == MODE.ACTIVE_RESPONSE: if self.category == Category.SINGLE_LINE.value or \ self.category == Category.DOUBLE_LINE.value or \ self.category == Category.TRIPLE_LINE.value or \ self.category == Category.THREE_ONE_LINE.value or \ self.category == Category.THREE_TWO_LINE.value: self.active_response = action self.mode = MODE.ACTIVE_SEQ return elif self.category == Category.THREE_ONE.value or \ self.category == Category.THREE_TWO.value or \ self.category == Category.FOUR_TWO.value: if self.category == Category.THREE_TWO.value or self.category == Category.THREE_TWO_LINE.value: self.minor_type = 1 self.mode = MODE.MINOR_RESPONSE self.intention = give_cards_without_minor( action, np.array([]), self.category, None) # modify the state for minor cards intention_char = to_char(self.intention) for c in intention_char: self.handcards_char.remove(c) # discard_onehot_from_s_60(self.prob_state, Card.val2onehot60(self.intention)) self.minor_length = 2 if self.category == Category.FOUR_TWO.value else 1 return else: self.intention = give_cards_without_minor( action, np.array([]), self.category, None) self.finished = True return elif self.mode == MODE.ACTIVE_SEQ: self.minor_length = action + 1 self.intention = give_cards_without_minor( self.active_response, np.array([]), self.category, action + 1) if self.category == Category.THREE_ONE_LINE.value or \ self.category == Category.THREE_TWO_LINE.value: if self.category == Category.THREE_TWO.value or self.category == Category.THREE_TWO_LINE.value: self.minor_type = 1 self.mode = MODE.MINOR_RESPONSE # modify the state for minor cards intention_char = to_char(self.intention) for c in intention_char: self.handcards_char.remove(c) # discard_onehot_from_s_60(self.prob_state, Card.val2onehot60(self.intention)) else: self.finished = True return elif self.mode == MODE.MINOR_RESPONSE: minor_value_cards = [action + 3 ] * (1 if self.minor_type == 0 else 2) # modify the state for minor cards minor_char = to_char(minor_value_cards) for c in minor_char: self.handcards_char.remove(c) # discard_onehot_from_s_60(self.prob_state, Card.val2onehot60(minor_value_cards)) self.intention = np.append(self.intention, minor_value_cards) assert self.minor_length > 0 self.minor_length -= 1 if self.minor_length == 0: self.finished = True return else: return
def data_generator(rng): env = Env(rng.randint(1 << 31)) # logger.info('called') while True: env.reset() env.prepare() r = 0 while r == 0: last_cards_value = env.get_last_outcards() last_cards_char = to_char(last_cards_value) last_out_cards = Card.val2onehot60(last_cards_value) last_category_idx = env.get_last_outcategory_idx() curr_cards_char = to_char(env.get_curr_handcards()) is_active = True if last_cards_value.size == 0 else False s = env.get_state_prob() # s = s[:60] intention, r, category_idx = env.step_auto() if category_idx == 14: continue minor_cards_targets = pick_minor_targets(category_idx, to_char(intention)) # self, state, last_cards, passive_decision_target, passive_bomb_target, passive_response_target, # active_decision_target, active_response_target, seq_length_target, minor_response_target, minor_type, mode if not is_active: if category_idx == Category.QUADRIC.value and category_idx != last_category_idx: passive_decision_input = 1 passive_bomb_input = intention[0] - 3 yield s, last_out_cards, passive_decision_input, 0, 0, 0, 0, 0, 0, 0, 0 yield s, last_out_cards, 0, passive_bomb_input, 0, 0, 0, 0, 0, 0, 1 else: if category_idx == Category.BIGBANG.value: passive_decision_input = 2 yield s, last_out_cards, passive_decision_input, 0, 0, 0, 0, 0, 0, 0, 0 else: if category_idx != Category.EMPTY.value: passive_decision_input = 3 # OFFSET_ONE # 1st, Feb - remove relative card output since shift is hard for the network to learn passive_response_input = intention[0] - 3 if passive_response_input < 0: print("something bad happens") passive_response_input = 0 yield s, last_out_cards, passive_decision_input, 0, 0, 0, 0, 0, 0, 0, 0 yield s, last_out_cards, 0, 0, passive_response_input, 0, 0, 0, 0, 0, 2 else: passive_decision_input = 0 yield s, last_out_cards, passive_decision_input, 0, 0, 0, 0, 0, 0, 0, 0 else: seq_length = get_seq_length(category_idx, intention) # ACTIVE OFFSET ONE! active_decision_input = category_idx - 1 active_response_input = intention[0] - 3 yield s, last_out_cards, 0, 0, 0, active_decision_input, 0, 0, 0, 0, 3 yield s, last_out_cards, 0, 0, 0, 0, active_response_input, 0, 0, 0, 4 if seq_length is not None: # length offset one seq_length_input = seq_length - 1 yield s, last_out_cards, 0, 0, 0, 0, 0, seq_length_input, 0, 0, 5 if minor_cards_targets is not None: main_cards = pick_main_cards(category_idx, to_char(intention)) handcards = curr_cards_char.copy() state = s.copy() for main_card in main_cards: handcards.remove(main_card) cards_onehot = Card.char2onehot60(main_cards) # we must make the order in each 4 batch correct... discard_onehot_from_s_60(state, cards_onehot) is_pair = False minor_type = 0 if category_idx == Category.THREE_TWO.value or category_idx == Category.THREE_TWO_LINE.value: is_pair = True minor_type = 1 for target in minor_cards_targets: target_val = Card.char2value_3_17(target) - 3 yield state.copy( ), last_out_cards, 0, 0, 0, 0, 0, 0, target_val, minor_type, 6 cards = [target] handcards.remove(target) if is_pair: if target not in handcards: print('something wrong...') print('minor', target) print('main_cards', main_cards) print('handcards', handcards) print('intention', intention) print('category_idx', category_idx) else: handcards.remove(target) cards.append(target) # correct for one-hot state cards_onehot = Card.char2onehot60(cards) # print(s.shape) # print(cards_onehot.shape) discard_onehot_from_s_60(state, cards_onehot)
def play_one_episode(env, func): def take_action_from_prob(prob, mask): prob = prob[0] # to avoid numeric difficulty prob[mask == 0] = -1 return np.argmax(prob) # return char minor cards output def inference_minor_util60(role_id, handcards, num, is_pair, dup_mask, main_cards_char): for main_card in main_cards_char: handcards.remove(main_card) s = get_mask(handcards, action_space, None).astype(np.float32) outputs = [] minor_type = 1 if is_pair else 0 for i in range(num): input_single, input_pair, _, _ = get_masks(handcards, None) _, _, _, _, _, _, minor_response_prob = func( [np.array([role_id]), s.reshape(1, -1), np.zeros([1, 9085]), np.array([minor_type])] ) # give minor cards mask = None if is_pair: mask = np.concatenate([input_pair, [0, 0]]) * dup_mask else: mask = input_single * dup_mask minor_response = take_action_from_prob(minor_response_prob, mask) dup_mask[minor_response] = 0 # convert network output to char cards handcards.remove(to_char(minor_response + 3)) if is_pair: handcards.remove(to_char(minor_response + 3)) s = get_mask(handcards, action_space, None).astype(np.float32) # save to output outputs.append(to_char(minor_response + 3)) if is_pair: outputs.append(to_char(minor_response + 3)) return outputs def inference_minor_cards60(role_id, category, s, handcards, seq_length, dup_mask, main_cards_char): if category == Category.THREE_ONE.value: return inference_minor_util60(role_id, handcards, 1, False, dup_mask, main_cards_char) if category == Category.THREE_TWO.value: return inference_minor_util60(role_id, handcards, 1, True, dup_mask, main_cards_char) if category == Category.THREE_ONE_LINE.value: return inference_minor_util60(role_id, handcards, seq_length, False, dup_mask, main_cards_char) if category == Category.THREE_TWO_LINE.value: return inference_minor_util60(role_id, handcards, seq_length, True, dup_mask, main_cards_char) if category == Category.FOUR_TWO.value: return inference_minor_util60(role_id, handcards, 2, False, dup_mask, main_cards_char) env.reset() init_cards = np.arange(21) # init_cards = np.append(init_cards[::4], init_cards[1::4]) env.prepare_manual(init_cards) r = 0 while r == 0: last_cards_value = env.get_last_outcards() last_cards_char = to_char(last_cards_value) last_out_cards = Card.val2onehot60(last_cards_value) last_category_idx = env.get_last_outcategory_idx() curr_cards_char = to_char(env.get_curr_handcards()) is_active = True if last_cards_value.size == 0 else False s = get_mask(curr_cards_char, action_space, None if is_active else last_cards_char).astype(np.float32) last_state = get_mask(last_cards_char, action_space, None).astype(np.float32) # print(s.shape) role_id = env.get_role_ID() # print('%s current cards' % ('lord' if role_id == 2 else 'farmer'), curr_cards_char) intention = None if role_id == 2: if is_active: # first get mask decision_mask, response_mask, _, length_mask = get_mask_alter(curr_cards_char, [], last_category_idx) _, _, _, active_decision_prob, active_response_prob, active_seq_prob, _ = func( [np.array([role_id]), s.reshape(1, -1), np.zeros([1, 9085]), np.zeros([s.shape[0]])] ) # make decision depending on output active_decision = take_action_from_prob(active_decision_prob, decision_mask) active_category_idx = active_decision + 1 # get response active_response = take_action_from_prob(active_response_prob, response_mask[active_decision]) seq_length = 0 # next sequence length if active_category_idx == Category.SINGLE_LINE.value or \ active_category_idx == Category.DOUBLE_LINE.value or \ active_category_idx == Category.TRIPLE_LINE.value or \ active_category_idx == Category.THREE_ONE_LINE.value or \ active_category_idx == Category.THREE_TWO_LINE.value: seq_length = take_action_from_prob(active_seq_prob, length_mask[active_decision][active_response]) + 1 # give main cards intention = give_cards_without_minor(active_response, last_cards_value, active_category_idx, seq_length) # then give minor cards if active_category_idx == Category.THREE_ONE.value or \ active_category_idx == Category.THREE_TWO.value or \ active_category_idx == Category.THREE_ONE_LINE.value or \ active_category_idx == Category.THREE_TWO_LINE.value or \ active_category_idx == Category.FOUR_TWO.value: dup_mask = np.ones([15]) if seq_length > 0: for i in range(seq_length): dup_mask[intention[0] - 3 + i] = 0 else: dup_mask[intention[0] - 3] = 0 intention = np.concatenate([intention, to_value(inference_minor_cards60(role_id, active_category_idx, s.copy(), curr_cards_char.copy(), seq_length, dup_mask, to_char(intention)))]) else: # print(to_char(last_cards_value), is_bomb, last_category_idx) decision_mask, response_mask, bomb_mask, _ = get_mask_alter(curr_cards_char, to_char(last_cards_value), last_category_idx) passive_decision_prob, passive_bomb_prob, passive_response_prob, _, _, _, _ = func( [np.array([role_id]), s.reshape(1, -1), last_state.reshape(1, -1), np.zeros([s.shape[0]])]) passive_decision = take_action_from_prob(passive_decision_prob, decision_mask) if passive_decision == 0: intention = np.array([]) elif passive_decision == 1: passive_bomb = take_action_from_prob(passive_bomb_prob, bomb_mask) # converting 0-based index to 3-based value intention = np.array([passive_bomb + 3] * 4) elif passive_decision == 2: intention = np.array([16, 17]) elif passive_decision == 3: passive_response = take_action_from_prob(passive_response_prob, response_mask) intention = give_cards_without_minor(passive_response, last_cards_value, last_category_idx, None) if last_category_idx == Category.THREE_ONE.value or \ last_category_idx == Category.THREE_TWO.value or \ last_category_idx == Category.THREE_ONE_LINE.value or \ last_category_idx == Category.THREE_TWO_LINE.value or \ last_category_idx == Category.FOUR_TWO.value: dup_mask = np.ones([15]) seq_length = get_seq_length(last_category_idx, last_cards_value) if seq_length: for i in range(seq_length): dup_mask[intention[0] - 3 + i] = 0 else: dup_mask[intention[0] - 3] = 0 intention = np.concatenate([intention, to_value(inference_minor_cards60(role_id, last_category_idx, s.copy(), curr_cards_char.copy(), seq_length, dup_mask, to_char(intention)))]) # since step auto needs full last card group info, we do not explicitly feed card type r, _, _ = env.step_manual(intention) # print('lord gives', to_char(intention)) assert (intention is not None) else: intention, r, _ = env.step_auto() # print('farmer gives', to_char(intention)) # if r > 0: # print('farmer wins') # else: # print('lord wins') return int(r > 0)
def play_one_episode(env, func): env.reset() env.prepare() r = 0 stats = [StatCounter() for _ in range(7)] while r == 0: last_cards_value = env.get_last_outcards() last_cards_char = to_char(last_cards_value) last_out_cards = Card.val2onehot60(last_cards_value) last_category_idx = env.get_last_outcategory_idx() curr_cards_char = to_char(env.get_curr_handcards()) is_active = True if last_cards_value.size == 0 else False s = env.get_state_prob() intention, r, category_idx = env.step_auto() if category_idx == 14: continue minor_cards_targets = pick_minor_targets(category_idx, to_char(intention)) if not is_active: if category_idx == Category.QUADRIC.value and category_idx != last_category_idx: passive_decision_input = 1 passive_bomb_input = intention[0] - 3 passive_decision_prob, passive_bomb_prob, _, _, _, _, _ = func( [ s.reshape(1, -1), last_out_cards.reshape(1, -1), np.zeros([s.shape[0]]) ]) stats[0].feed( int(passive_decision_input == np.argmax( passive_decision_prob))) stats[1].feed( int(passive_bomb_input == np.argmax(passive_bomb_prob))) else: if category_idx == Category.BIGBANG.value: passive_decision_input = 2 passive_decision_prob, _, _, _, _, _, _ = func([ s.reshape(1, -1), last_out_cards.reshape(1, -1), np.zeros([s.shape[0]]) ]) stats[0].feed( int(passive_decision_input == np.argmax( passive_decision_prob))) else: if category_idx != Category.EMPTY.value: passive_decision_input = 3 # OFFSET_ONE # 1st, Feb - remove relative card output since shift is hard for the network to learn passive_response_input = intention[0] - 3 if passive_response_input < 0: print("something bad happens") passive_response_input = 0 passive_decision_prob, _, passive_response_prob, _, _, _, _ = func( [ s.reshape(1, -1), last_out_cards.reshape(1, -1), np.zeros([s.shape[0]]) ]) stats[0].feed( int(passive_decision_input == np.argmax( passive_decision_prob))) stats[2].feed( int(passive_response_input == np.argmax( passive_response_prob))) else: passive_decision_input = 0 passive_decision_prob, _, _, _, _, _, _ = func([ s.reshape(1, -1), last_out_cards.reshape(1, -1), np.zeros([s.shape[0]]) ]) stats[0].feed( int(passive_decision_input == np.argmax( passive_decision_prob))) else: seq_length = get_seq_length(category_idx, intention) # ACTIVE OFFSET ONE! active_decision_input = category_idx - 1 active_response_input = intention[0] - 3 _, _, _, active_decision_prob, active_response_prob, active_seq_prob, _ = func( [ s.reshape(1, -1), last_out_cards.reshape(1, -1), np.zeros([s.shape[0]]) ]) stats[3].feed( int(active_decision_input == np.argmax(active_decision_prob))) stats[4].feed( int(active_response_input == np.argmax(active_response_prob))) if seq_length is not None: # length offset one seq_length_input = seq_length - 1 stats[5].feed( int(seq_length_input == np.argmax(active_seq_prob))) if minor_cards_targets is not None: main_cards = pick_main_cards(category_idx, to_char(intention)) handcards = curr_cards_char.copy() state = s.copy() for main_card in main_cards: handcards.remove(main_card) cards_onehot = Card.char2onehot60(main_cards) # we must make the order in each 4 batch correct... discard_onehot_from_s_60(state, cards_onehot) is_pair = False minor_type = 0 if category_idx == Category.THREE_TWO.value or category_idx == Category.THREE_TWO_LINE.value: is_pair = True minor_type = 1 for target in minor_cards_targets: target_val = Card.char2value_3_17(target) - 3 _, _, _, _, _, _, minor_response_prob = func([ state.copy().reshape(1, -1), last_out_cards.reshape(1, -1), np.array([minor_type]) ]) stats[6].feed( int(target_val == np.argmax(minor_response_prob))) cards = [target] handcards.remove(target) if is_pair: if target not in handcards: logger.warn('something wrong...') logger.warn('minor', target) logger.warn('main_cards', main_cards) logger.warn('handcards', handcards) else: handcards.remove(target) cards.append(target) # correct for one-hot state cards_onehot = Card.char2onehot60(cards) # print(s.shape) # print(cards_onehot.shape) discard_onehot_from_s_60(state, cards_onehot) return stats
def get_input_representation(self, embeddings): """Get fact (sentence) vectors via embedding, positional encoding and bi-directional GRU""" # get word vectors from embedding inputs = tf.nn.embedding_lookup(embeddings, self.input_placeholder) ''' # use encoding to get sentence representation inputs = tf.reduce_sum(inputs * self.encoding, 2) ''' #https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/rnn/python/ops/rnn_cell.py#L1005 #https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py#L382 '''EXPERIMENT WITH PUTTING INTRA SENTENCE ATTN ON bi-directional WRAPPER AS OPPOSED TO GRU WRAPPER''' '''and maybe allow reverse phase during birnn to attend to forward phases h_states''' #print self.intra_attention_GRU_cell_fw encoded_inputs = [] inputs = tf.split(1, self.max_input_len, inputs) inputs = [tf.squeeze(i, squeeze_dims=[1]) for i in inputs] '''need to change this into tf.while_loop()''' '''tf.make_template can solve this sharing issue''' shared_bidirectional_dynamic_rnn = tf.make_template( 'shared_bidirectional_dynamic_rnn', tf.nn.bidirectional_dynamic_rnn) for idx, raw_i in enumerate(inputs): #for idx, raw_i in enumerate(inputs[1:3]): '''MIGHT WANT USE WAVENET/BYTENET ENCODER HERE INSTEAD BECAUSE IT SEEMS TO HAVE IMPLICIT ALIGNMENT''' '''https://arxiv.org/pdf/1601.06733.pdf apparently being able to attend to individual words (LSTMN h_states) (as oppsosed to last hstate) after attentive encoding only improves by 0.2%, so not worth adding intra for after encode, only during''' '''need to check this, why does output yield (100, 46, 80), but state (final) yield (?, 80), shouldn't final_state be (100, 80)''' #_, pre_fact_fw_and_bw = shared_bidirectional_dynamic_rnn(self.intra_attention_GRU_cell_fw, self.intra_attention_GRU_cell_bw, raw_i, dtype=np.float32, sequence_length=get_seq_length(raw_i)) #pre_fact_fw_and_bw, _ = shared_bidirectional_dynamic_rnn(self.intra_attention_GRU_cell_fw, self.intra_attention_GRU_cell_fw, raw_i, dtype=np.float32, sequence_length=get_seq_length(raw_i)) pre_fact_fw_and_bw, _ = shared_bidirectional_dynamic_rnn( self.intra_attention_GRU_cell_fw, self.intra_attention_GRU_cell_bw, raw_i, dtype=np.float32, sequence_length=get_seq_length(raw_i)) '''HOW BIRNN TYING SUPPOSED TO WORK? IN RNNSEARCH, WEIGHT ARE UNTIED, BUT WORD EMBEDDING INPUTS ARE TIED: arxiv.org/pdf/1409.0473v7.pdf''' '''^MAYBE ADD AUXILLARY TASK OF AUTENCODING SENTENCES (LIKE IN NEURAL REASONER) TO PREVENT OVERFITTING''' #they reversed it twice, once before and once after: look at https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/rnn.py#L672 and L663 fw_encode = tf.transpose( pre_fact_fw_and_bw[0], perm=[1, 0, 2] )[-1] #hidden state at last word of sent when conditioned 0 to _length_ bw_encode = tf.transpose( pre_fact_fw_and_bw[1], perm=[1, 0, 2] )[0] #hidden state at first word of sent when conditioned _length_ to 0 encoded_inputs.append(fw_encode + bw_encode) pre_facts = tf.transpose(tf.pack(encoded_inputs), perm=[1, 0, 2]) '''WOULD PUTTING INTRA SENTENCE ATTN HERE BE REDUNDANT OR PERFORMANT''' '''do you need this scope?''' #with tf.variable_scope("input_fusion", reuse=True, initializer=_xavier_weight_init()): outputs_fw_and_bw, _ = tf.nn.bidirectional_dynamic_rnn( self.gru_cell, self.gru_cell, pre_facts, dtype=np.float32, sequence_length=get_seq_length(pre_facts)) fact_vecs = outputs_fw_and_bw[0] + outputs_fw_and_bw[1] fact_vecs = tf.nn.dropout(fact_vecs, self.dropout_placeholder) return fact_vecs