def _score_context_linear(self, h, context, checklist): ''' Concatenate state h and context, combine them to a vector, then project to a scalar. h: (batch_size, context_len, rnn_size) context: (batch_size, context_len, context_size) checklist: (batch_size, context_len, 1) Return context_scores (batch_size, context_len) ''' attn_size = self.rnn_size with tf.variable_scope('ScoreContextLinear'): with tf.variable_scope('Combine'): if self.checklist: feature = [h, context, checklist] else: feature = [h, context] attns = activation(batch_linear( feature, attn_size, False)) # (batch_size, context_len, attn_size) with tf.variable_scope('Project'): attns = tf.squeeze(batch_linear(attns, 1, False), [2]) # (batch_size, context_len) #if self.checklist: # weight = tf.get_variable('cl_weight', []) # attns = attns - tf.scalar_mul(weight, tf.squeeze(checklist, [2])) return attns
def _build_output(self, output_dict): ''' Take RNN outputs and produce logits over the vocab. ''' outputs = output_dict['outputs'] outputs = transpose_first_two_dims( outputs) # (batch_size, seq_len, output_size) logits = batch_linear(outputs, self.num_symbols, True) #logits = BasicDecoder.penalize_repetition(logits) return logits
def _score_context_bilinear(self, h, context): ''' Project h to context_size then do dot-product with context. h: (batch_size, context_len, rnn_size) context: (batch_size, context_len, context_size) Return context_scores (batch_size, context_len) ''' context_size = context.get_shape().as_list()[-1] with tf.variable_scope('ScoreContextBilinear'): h = batch_linear(h, context_size, False) # (batch_size, context_len, context_size) attns = tf.reduce_sum(tf.mul(h, context), 2) # (batch_size, context_len) return attns
def embed_path(self, node_embedding, edge_embedding, paths): ''' Compute embedding of a path (edge_label, node_id). node_embedding: (batch_size, num_nodes, node_embed_size) edge_embedding: (num_edge_label, edge_embed_size) paths: each path is a tuple of (node_id, edge_label, node_id). (batch_size, num_paths, 3) ''' edge_embeds = tf.nn.embedding_lookup(edge_embedding, paths[:, :, 1]) node_embeds = batch_embedding_lookup(node_embedding, paths[:, :, 2]) path_embed_size = self.config.node_embed_size path_embeds = activation( batch_linear([edge_embeds, node_embeds], path_embed_size, True)) return path_embeds
def _update_utterance(self, entity_indices, utterance, curr_utterances): ''' We first transform utterance into a dense matrix of the same size as curr_utterances, then return their sum. entity_indices: entity ids correponding to rows to be updated in the curr_utterances (batch_size, entity_cache_size) utterance: hidden states from the RNN (batch_size, utterance_size) NOTE: each curr_utterance matrix should have a row (e.g. the last one) as padded utterance. Padded entities in entity_indices corresponds to the padded utterance. This is handled by GraphBatch during construnction of the input data. ''' entity_inds_shape = tf.shape(entity_indices) B = entity_inds_shape[0] # batch_size is a variable E = entity_inds_shape[1] # number of entities to be updated U = self.config.utterance_size # Construct indices corresponding to each entry to be updated in self.utterances # self.utterance has shape (batch_size, num_nodes, utterance_size) # Therefore each row in the indices matrix specifies (batch_id, node_id, utterance_dim) batch_inds = tf.reshape( tf.tile(tf.reshape(tf.range(B), [-1, 1]), [1, E * U]), [-1, 1]) node_inds = tf.reshape( tf.tile(tf.reshape(entity_indices, [-1, 1]), [1, U]), [-1, 1]) utterance_inds = tf.reshape(tf.tile(tf.range(U), [E * B]), [-1, 1]) inds = tf.concat(1, [batch_inds, node_inds, utterance_inds]) # Repeat utterance for each entity utterance = tf.reshape(tf.tile(utterance, [1, E]), [-1]) new_utterance = tf.sparse_to_dense(inds, tf.shape(curr_utterances), utterance, validate_indices=False) if self.config.learned_decay: with tf.variable_scope('UpdateUtterance', reuse=self.update_initialized): weight = tf.sigmoid( batch_linear( tf.concat(2, [curr_utterances, new_utterance]), 1, True)) # (batch_size, num_nodes, 1) if not self.update_initialized: self.update_initialized = True if self.config.learned_decay: return tf.mul(1 - weight, curr_utterances) + tf.mul( weight, new_utterance) else: return curr_utterances * self.config.decay + new_utterance
def select(self, init_output, context): context_len = tf.shape(context)[1] init_state = tf.tile( tf.expand_dims(init_output, 1), [1, context_len, 1]) # (batch_size, context_len, rnn_size) with tf.variable_scope('SelectEntity'): selection = batch_linear(tf.concat(2, [init_state, context]), 1, True) # (batch_size, context_len, 1) selection_scores = tf.squeeze(selection, [2]) selection = tf.sigmoid(selection) selected_context = tf.reduce_sum(tf.mul(selection, context), 1) # (batch_size, context_size) # Normalize selected_context = tf.div(selected_context, (tf.reduce_sum(selection, 1) + EPS)) return selected_context, selection_scores
def _build_output(self, output_dict): vocab_logits = super(GatedCopyGraphDecoder, self)._build_output( output_dict) # (batch_size, seq_len, num_symbols) attn_scores = transpose_first_two_dims( output_dict['attn_scores']) # (batch_size, seq_len, num_nodes) rnn_outputs = transpose_first_two_dims( output_dict['outputs']) # (batch_size, seq_len, output_size) with tf.variable_scope('Gating'): prob_vocab = tf.sigmoid(batch_linear( rnn_outputs, 1, True)) # (batch_size, seq_len, 1) prob_copy = 1 - prob_vocab log_prob_vocab = tf.log(prob_vocab + EPS) log_prob_copy = tf.log(prob_copy + EPS) # Reweight the vocab and attn distribution and convert them to logits vocab_logits = log_prob_vocab + vocab_logits - tf.reduce_logsumexp( vocab_logits, 2, keep_dims=True) attn_logits = log_prob_copy + attn_scores - tf.reduce_logsumexp( attn_scores, 2, keep_dims=True) return tf.concat(2, [vocab_logits, attn_logits]), tf.concat( 2, [log_prob_vocab, log_prob_copy])