def encode_decode(self): actor_embedding = embed_seq(input_seq=self.input_, from_=self.dimension, to_=self.input_embed, is_training=self.is_training, BN=True, initializer=self.initializer) actor_encoding = encode_seq(input_seq=actor_embedding, input_dim=self.input_embed, num_stacks=self.num_stacks, num_heads=self.num_heads, num_neurons=self.num_neurons, is_training=self.is_training) if self.is_training == False: actor_encoding = tf.tile(actor_encoding, [self.batch_size, 1, 1]) idx_list, log_probs, entropies = [], [], [ ] # tours index, log_probs, entropies mask = tf.zeros((self.batch_size, self.max_length)) # mask for actions n_hidden = actor_encoding.get_shape().as_list()[2] # input_embed W_ref = tf.get_variable("W_ref", [1, n_hidden, self.num_units], initializer=self.initializer) W_q = tf.get_variable("W_q", [self.query_dim, self.num_units], initializer=self.initializer) v = tf.get_variable("v", [self.num_units], initializer=self.initializer) encoded_ref = tf.nn.conv1d( actor_encoding, W_ref, 1, "VALID" ) # actor_encoding is the ref for actions [Batch size, seq_length, n_hidden] query1 = tf.zeros((self.batch_size, n_hidden)) # initial state query2 = tf.zeros((self.batch_size, n_hidden)) # previous state query3 = tf.zeros( (self.batch_size, n_hidden)) # previous previous state W_1 = tf.get_variable( "W_1", [n_hidden, self.query_dim], initializer=self.initializer) # update trajectory (state) W_2 = tf.get_variable("W_2", [n_hidden, self.query_dim], initializer=self.initializer) W_3 = tf.get_variable("W_3", [n_hidden, self.query_dim], initializer=self.initializer) for step in range(self.max_length): # sample from POINTER query = tf.nn.relu( tf.matmul(query1, W_1) + tf.matmul(query2, W_2) + tf.matmul(query3, W_3)) logits = pointer(encoded_ref=encoded_ref, query=query, mask=mask, W_ref=W_ref, W_q=W_q, v=v, C=config.C, temperature=config.temperature) prob = distr.Categorical(logits) # logits = masked_scores idx = prob.sample() idx_list.append(idx) # tour index log_probs.append(prob.log_prob(idx)) # log prob entropies.append(prob.entropy()) # entropies mask = mask + tf.one_hot(idx, self.max_length) # mask idx_ = tf.stack([tf.range(self.batch_size, dtype=tf.int32), idx], 1) # idx with batch query3 = query2 query2 = query1 query1 = tf.gather_nd(actor_encoding, idx_) # update trajectory (state) idx_list.append(idx_list[0]) # return to start self.tour = tf.stack(idx_list, axis=1) # permutations self.log_prob = tf.add_n( log_probs) # corresponding log-probability for backprop self.entropies = tf.add_n(entropies) tf.summary.scalar('log_prob_mean', tf.reduce_mean(self.log_prob)) tf.summary.scalar('entropies_mean', tf.reduce_mean(self.entropies))
def r_net(self): hps = self._hps with tf.variable_scope('question_encoding'): q_rep = self.question_inputs q_states = [] for i in xrange(hps.num_layers): with tf.variable_scope('layer%d' % i): q_cell = tf.contrib.rnn.GRUCell(hps.size) q_rep, q_state = tf.nn.bidirectional_dynamic_rnn( q_cell, q_cell, q_rep, sequence_length=self.question_lens, dtype=self.dtype) q_rep = tf.concat(q_rep, axis=-1) q_states.append(q_state) assert q_rep.get_shape()[-1].value == 2 * hps.size with tf.variable_scope('context_encoding'): c_rep = self.context_inputs for i in xrange(hps.num_layers): with tf.variable_scope('layer%d' % i): c_cell = tf.contrib.rnn.GRUCell(hps.size) c_rep, c_state = tf.nn.bidirectional_dynamic_rnn( c_cell, c_cell, c_rep, initial_state_fw=q_states[i][0], initial_state_bw=q_states[i][1], sequence_length=self.context_lens) c_rep = tf.concat(c_rep, axis=-1) assert c_rep.get_shape()[-1].value == 2 * hps.size with tf.variable_scope('question_aware'): q_a_cell = tf.contrib.rnn.GRUCell(hps.size) context_q = multihead_attention(c_rep, q_rep) inputs = sfu(c_rep, context_q) c_rep, state = tf.nn.bidirectional_dynamic_rnn(q_a_cell, q_a_cell, inputs, self.context_lens, dtype=self.dtype) c_rep = tf.concat(c_rep, axis=-1) with tf.variable_scope('self_attention'): s_a_cell = tf.contrib.rnn.GRUCell(hps.size) context_c = multihead_attention(c_rep, c_rep) inputs = sfu(c_rep, context_c) c_rep, state = tf.nn.bidirectional_dynamic_rnn(s_a_cell, s_a_cell, inputs, self.context_lens, dtype=self.dtype) c_rep = tf.concat(c_rep, axis=-1) # if hps.mode == 'train': # c_rep = tf.nn.dropout(c_rep, 1.0 - hps.dropout_rate) assert c_rep.get_shape()[-1].value == 2 * hps.size with tf.variable_scope('output_layer'): answer_cell = tf.contrib.rnn.GRUCell(2 * hps.size) with tf.variable_scope('pointer'): v_q = tf.get_variable('question_parameters', [hps.batch_size, 2 * hps.size], self.dtype, tf.truncated_normal_initializer()) _, state = pointer(q_rep, v_q, answer_cell) tf.get_variable_scope().reuse_variables() start_pos_scores, state = pointer(c_rep, state, answer_cell) tf.get_variable_scope().reuse_variables() end_pos_scores, state = pointer(c_rep, state, answer_cell) self.pos_scores = [start_pos_scores, end_pos_scores]