예제 #1
 def _score_context_linear(self, h, context, checklist):
     Concatenate state h and context, combine them to a vector, then project to a scalar.
     h: (batch_size, context_len, rnn_size)
     context: (batch_size, context_len, context_size)
     checklist: (batch_size, context_len, 1)
     Return context_scores (batch_size, context_len)
     attn_size = self.rnn_size
     with tf.variable_scope('ScoreContextLinear'):
         with tf.variable_scope('Combine'):
             if self.checklist:
                 feature = [h, context, checklist]
                 feature = [h, context]
             attns = activation(batch_linear(
                 feature, attn_size,
                 False))  # (batch_size, context_len, attn_size)
         with tf.variable_scope('Project'):
             attns = tf.squeeze(batch_linear(attns, 1, False),
                                [2])  # (batch_size, context_len)
             #if self.checklist:
             #    weight = tf.get_variable('cl_weight', [])
             #    attns = attns - tf.scalar_mul(weight, tf.squeeze(checklist, [2]))
     return attns
예제 #2
 def _build_output(self, output_dict):
     Take RNN outputs and produce logits over the vocab.
     outputs = output_dict['outputs']
     outputs = transpose_first_two_dims(
         outputs)  # (batch_size, seq_len, output_size)
     logits = batch_linear(outputs, self.num_symbols, True)
     #logits = BasicDecoder.penalize_repetition(logits)
     return logits
예제 #3
 def _score_context_bilinear(self, h, context):
     Project h to context_size then do dot-product with context.
     h: (batch_size, context_len, rnn_size)
     context: (batch_size, context_len, context_size)
     Return context_scores (batch_size, context_len)
     context_size = context.get_shape().as_list()[-1]
     with tf.variable_scope('ScoreContextBilinear'):
         h = batch_linear(h, context_size,
                          False)  # (batch_size, context_len, context_size)
         attns = tf.reduce_sum(tf.mul(h, context),
                               2)  # (batch_size, context_len)
     return attns
예제 #4
 def embed_path(self, node_embedding, edge_embedding, paths):
     Compute embedding of a path (edge_label, node_id).
     node_embedding: (batch_size, num_nodes, node_embed_size)
     edge_embedding: (num_edge_label, edge_embed_size)
     paths: each path is a tuple of (node_id, edge_label, node_id).
     (batch_size, num_paths, 3)
     edge_embeds = tf.nn.embedding_lookup(edge_embedding, paths[:, :, 1])
     node_embeds = batch_embedding_lookup(node_embedding, paths[:, :, 2])
     path_embed_size = self.config.node_embed_size
     path_embeds = activation(
         batch_linear([edge_embeds, node_embeds], path_embed_size, True))
     return path_embeds
예제 #5
    def _update_utterance(self, entity_indices, utterance, curr_utterances):
        We first transform utterance into a dense matrix of the same size as curr_utterances,
        then return their sum.
        entity_indices: entity ids correponding to rows to be updated in the curr_utterances
        (batch_size, entity_cache_size)
        utterance: hidden states from the RNN
        (batch_size, utterance_size)
        NOTE: each curr_utterance matrix should have a row (e.g. the last one) as padded utterance.
        Padded entities in entity_indices corresponds to the padded utterance. This is handled
        by GraphBatch during construnction of the input data.
        entity_inds_shape = tf.shape(entity_indices)
        B = entity_inds_shape[0]  # batch_size is a variable
        E = entity_inds_shape[1]  # number of entities to be updated
        U = self.config.utterance_size
        # Construct indices corresponding to each entry to be updated in self.utterances
        # self.utterance has shape (batch_size, num_nodes, utterance_size)
        # Therefore each row in the indices matrix specifies (batch_id, node_id, utterance_dim)
        batch_inds = tf.reshape(
            tf.tile(tf.reshape(tf.range(B), [-1, 1]), [1, E * U]), [-1, 1])
        node_inds = tf.reshape(
            tf.tile(tf.reshape(entity_indices, [-1, 1]), [1, U]), [-1, 1])
        utterance_inds = tf.reshape(tf.tile(tf.range(U), [E * B]), [-1, 1])
        inds = tf.concat(1, [batch_inds, node_inds, utterance_inds])

        # Repeat utterance for each entity
        utterance = tf.reshape(tf.tile(utterance, [1, E]), [-1])
        new_utterance = tf.sparse_to_dense(inds,

        if self.config.learned_decay:
            with tf.variable_scope('UpdateUtterance',
                weight = tf.sigmoid(
                        tf.concat(2, [curr_utterances, new_utterance]), 1,
                        True))  # (batch_size, num_nodes, 1)
                if not self.update_initialized:
                    self.update_initialized = True

        if self.config.learned_decay:
            return tf.mul(1 - weight, curr_utterances) + tf.mul(
                weight, new_utterance)
            return curr_utterances * self.config.decay + new_utterance
예제 #6
 def select(self, init_output, context):
     context_len = tf.shape(context)[1]
     init_state = tf.tile(
         tf.expand_dims(init_output, 1),
         [1, context_len, 1])  # (batch_size, context_len, rnn_size)
     with tf.variable_scope('SelectEntity'):
         selection = batch_linear(tf.concat(2, [init_state, context]), 1,
                                  True)  # (batch_size, context_len, 1)
         selection_scores = tf.squeeze(selection, [2])
         selection = tf.sigmoid(selection)
         selected_context = tf.reduce_sum(tf.mul(selection, context),
                                          1)  # (batch_size, context_size)
         # Normalize
         selected_context = tf.div(selected_context,
                                   (tf.reduce_sum(selection, 1) + EPS))
     return selected_context, selection_scores
예제 #7
 def _build_output(self, output_dict):
     vocab_logits = super(GatedCopyGraphDecoder, self)._build_output(
         output_dict)  # (batch_size, seq_len, num_symbols)
     attn_scores = transpose_first_two_dims(
         output_dict['attn_scores'])  # (batch_size, seq_len, num_nodes)
     rnn_outputs = transpose_first_two_dims(
         output_dict['outputs'])  # (batch_size, seq_len, output_size)
     with tf.variable_scope('Gating'):
         prob_vocab = tf.sigmoid(batch_linear(
             rnn_outputs, 1, True))  # (batch_size, seq_len, 1)
         prob_copy = 1 - prob_vocab
         log_prob_vocab = tf.log(prob_vocab + EPS)
         log_prob_copy = tf.log(prob_copy + EPS)
     # Reweight the vocab and attn distribution and convert them to logits
     vocab_logits = log_prob_vocab + vocab_logits - tf.reduce_logsumexp(
         vocab_logits, 2, keep_dims=True)
     attn_logits = log_prob_copy + attn_scores - tf.reduce_logsumexp(
         attn_scores, 2, keep_dims=True)
     return tf.concat(2, [vocab_logits, attn_logits]), tf.concat(
         2, [log_prob_vocab, log_prob_copy])