Esempio n. 1
0
def question_encoder(question, hparams, name="encoder"):
    """Question encoder, run LSTM encoder and get the last output as encoding."""
    with tf.variable_scope(name, "encoder", values=[question]):
        question = common_layers.flatten4d3d(question)
        padding = common_attention.embedding_to_padding(question)
        length = common_attention.padding_to_length(padding)

        max_question_length = hparams.max_question_length
        question = question[:, :max_question_length, :]
        actual_question_length = common_layers.shape_list(question)[1]
        length = tf.minimum(length, max_question_length)
        padding = [[0, 0], [0, max_question_length - actual_question_length],
                   [0, 0]]
        question = tf.pad(question, padding)
        question_shape = question.get_shape().as_list()
        question_shape[1] = max_question_length
        question.set_shape(question_shape)

        # apply tanh dropout on question embedding
        question = tf.tanh(question)
        question = tf.nn.dropout(question, keep_prob=1. - hparams.dropout)

        question = [question[:, i, :] for i in range(max_question_length)]

        # rnn_layers = [_get_rnn_cell(hparams)
        #               for _ in range(hparams.num_rnn_layers)]
        # rnn_multi_cell = tf.nn.rnn_cell.MultiRNNCell(rnn_layers)
        rnn_cell = _get_rnn_cell(hparams)
        # outputs, _ = tf.nn.dynamic_rnn(
        #     rnn_cell, question, length, dtype=tf.float32)
        _, state = tf.nn.static_rnn(rnn_cell,
                                    question,
                                    sequence_length=length,
                                    dtype=tf.float32)
        # outputs = [tf.expand_dims(output, axis=1) for output in outputs]
        # outputs = tf.concat(outputs, axis=1)

        # utils.collect_named_outputs("vqa_attention_debug", "question_output",
        #                             outputs)
        # utils.collect_named_outputs("vqa_attention_debug", "question_state",
        #                             state.h)

        # batch_size = common_layers.shape_list(outputs)[0]
        # row_indices = tf.range(batch_size)
        # # length - 1 as index
        # indices = tf.transpose([row_indices, tf.maximum(length-1, 0)])
        # last_output = tf.gather_nd(outputs, indices)

        # utils.collect_named_outputs("vqa_attention_debug",
        #                             "question_final_output", last_output)

    return state.h
Esempio n. 2
0
def question_encoder(question, hparams, name="encoder"):
  """Question encoder, run LSTM encoder and get the last output as encoding."""
  with tf.variable_scope(name, "encoder", values=[question]):
    question = common_layers.flatten4d3d(question)
    padding = common_attention.embedding_to_padding(question)
    length = common_attention.padding_to_length(padding)

    max_question_length = hparams.max_question_length
    question = question[:, :max_question_length, :]
    actual_question_length = common_layers.shape_list(question)[1]
    length = tf.minimum(length, max_question_length)
    padding = [[0, 0],
               [0, max_question_length-actual_question_length],
               [0, 0]]
    question = tf.pad(question, padding)
    question_shape = question.get_shape().as_list()
    question_shape[1] = max_question_length
    question.set_shape(question_shape)

    # apply tanh dropout on question embedding
    question = tf.tanh(question)
    question = tf.nn.dropout(question, keep_prob=1.-hparams.dropout)

    question = [question[:, i, :] for i in range(max_question_length)]

    # rnn_layers = [_get_rnn_cell(hparams)
    #               for _ in range(hparams.num_rnn_layers)]
    # rnn_multi_cell = tf.contrib.rnn.MultiRNNCell(rnn_layers)
    rnn_cell = _get_rnn_cell(hparams)
    # outputs, _ = tf.nn.dynamic_rnn(
    #     rnn_cell, question, length, dtype=tf.float32)
    _, state = tf.nn.static_rnn(rnn_cell, question, sequence_length=length,
                                dtype=tf.float32)
    # outputs = [tf.expand_dims(output, axis=1) for output in outputs]
    # outputs = tf.concat(outputs, axis=1)

    # utils.collect_named_outputs("vqa_attention_debug", "question_output",
    #                             outputs)
    # utils.collect_named_outputs("vqa_attention_debug", "question_state",
    #                             state.h)

    # batch_size = common_layers.shape_list(outputs)[0]
    # row_indices = tf.range(batch_size)
    # # length - 1 as index
    # indices = tf.transpose([row_indices, tf.maximum(length-1, 0)])
    # last_output = tf.gather_nd(outputs, indices)

    # utils.collect_named_outputs("vqa_attention_debug",
    #                             "question_final_output", last_output)

  return state.h
Esempio n. 3
0
def question_encoder(question, hparams, name="encoder"):
    """Question encoder, run LSTM encoder and get the last output as encoding."""
    with tf.variable_scope(name, "encoder", values=[question]):
        question = common_layers.flatten4d3d(question)
        padding = common_attention.embedding_to_padding(question)
        length = common_attention.padding_to_length(padding)

        max_question_length = hparams.max_question_length
        question = question[:, :max_question_length, :]
        actual_question_length = common_layers.shape_list(question)[1]
        length = tf.minimum(length, max_question_length)
        padding = [[0, 0], [0, max_question_length - actual_question_length],
                   [0, 0]]
        question = tf.pad(question, padding)
        question_shape = question.get_shape().as_list()
        question_shape[1] = max_question_length
        question.set_shape(question_shape)

        question = [question[:, i, :] for i in range(max_question_length)]

        # rnn_layers = [_get_rnn_cell(hparams)
        #               for _ in range(hparams.num_rnn_layers)]
        # rnn_multi_cell = tf.contrib.rnn.MultiRNNCell(rnn_layers)
        rnn_cell = _get_rnn_cell(hparams)
        # outputs, _ = tf.nn.dynamic_rnn(
        #     rnn_cell, question, length, dtype=tf.float32)
        outputs, _ = tf.nn.static_rnn(rnn_cell,
                                      question,
                                      sequence_length=length,
                                      dtype=tf.float32)
        outputs = [tf.expand_dims(output, axis=1) for output in outputs]
        outputs = tf.concat(outputs, axis=1)

        batch_size = common_layers.shape_list(outputs)[0]
        row_indices = tf.range(batch_size)
        # length - 1 as index
        indices = tf.transpose([row_indices, tf.maximum(length - 1, 0)])
        last_output = tf.gather_nd(outputs, indices)

    return last_output
    def compute_knowledge_selection_and_loss(self, features, encoder_output,
                                             fact_embedding, fact_lengths,
                                             margin, num_negative_samples):
        """Compute knowledge selection and loss.

    Args:
      features: features.
      encoder_output: <tf.float32>[batch_size, input_length, hidden_dim]
      fact_embedding: <tf.float32>[batch_size*triple_num, max_triple_length,
        emb_dim]
      fact_lengths: # <tf.int32>[batch_size*triple_num]
      margin: integer value for max margin in TransE loss,
      num_negative_samples: shuffle and sample multiple negative examples for
      the TransE loss

    Returns:
      knowledge_weights:
      knowledge_loss:
    """
        hparams = self._hparams
        encoder_output_shape = common_layers.shape_list(encoder_output)
        encoder_hidden_dim = encoder_output_shape[-1]
        inputs = features["inputs"]
        # <tf.float32>[batch_size, input_length, emb_dim]
        inputs = tf.squeeze(inputs, 2)
        # <tf.float32>[batch_size, input_length]
        context_padding = common_attention.embedding_to_padding(inputs)
        # <tf.float32>[batch_size]
        context_lens = tf.to_float(
            common_attention.padding_to_length(context_padding))
        # <tf.float32>[batch_size, 1]
        context_lens = tf.expand_dims(context_lens, -1)
        # Compute context vector summary.
        # <tf.float32>[batch_size, hidden_dim]
        context_vector_summary = compute_summary_embedding(
            encoder_output, context_lens, hparams)
        knowledge_encoder_output = compute_average_embedding(
            fact_embedding, fact_lengths)
        # <tf.float32>[batch_size, triple_num, emb_dim]
        knowledge_encoder_output = tf.reshape(
            knowledge_encoder_output,
            [-1, self.triple_num, encoder_hidden_dim])
        original_knowledge_encoder_output = knowledge_encoder_output
        if hparams.similarity_fuction == "dot_product":
            triple_logits = tf.squeeze(
                tf.matmul(knowledge_encoder_output,
                          tf.expand_dims(context_vector_summary, 2)), -1)
        elif hparams.similarity_fuction == "bilinear":
            # Tile the context vector summary.
            # <tf.float32>[batch_size, triple_num*hidden_dim]
            tiled_context_vector = tf.tile(context_vector_summary,
                                           [1, self.triple_num])
            # <tf.float32>[batch_size, triple_num, hidden_dim]
            context_vector = tf.reshape(
                tiled_context_vector,
                [-1, self.triple_num, encoder_hidden_dim])
            # compute outer product
            context_vector = tf.expand_dims(context_vector, -1)
            knowledge_encoder_output = tf.expand_dims(knowledge_encoder_output,
                                                      2)
            # <tf.float32>[batch_size, triple_num, hidden_dim, hidden_dim]
            outer_product = tf.matmul(context_vector, knowledge_encoder_output)
            outer_product = tf.reshape(
                outer_product,
                [-1, self.triple_num, encoder_hidden_dim * encoder_hidden_dim])
            triple_logits = tf.squeeze(
                tf.layers.dense(outer_product, 1, name="knolwedge_final_mlp"),
                -1)

        avg_triple_loss = 0.0
        triple_labels = features["triple_labels"]

        subject_mask = tf.reshape(
            features["subject_mask"],
            [-1, self.triple_num, hparams.max_triple_length])
        subject_mask = tf.reshape(subject_mask,
                                  [-1, hparams.max_triple_length])

        predicate_mask = tf.reshape(
            features["predicate_mask"],
            [-1, self.triple_num, hparams.max_triple_length])
        predicate_mask = tf.reshape(predicate_mask,
                                    [-1, hparams.max_triple_length])

        object_mask = tf.reshape(
            features["object_mask"],
            [-1, self.triple_num, hparams.max_triple_length])
        object_mask = tf.reshape(object_mask, [-1, hparams.max_triple_length])

        # mask : [bs, max_seq_len, triple_num]
        # the below operation will result in [bs*triple_num,emb_dim]
        subject_length = tf.cast(
            tf.expand_dims(tf.reduce_sum(subject_mask, -1), 1),
            tf.float32)  # [bs*tn]
        object_length = tf.cast(
            tf.expand_dims(tf.reduce_sum(object_mask, -1), 1), tf.float32)
        predicate_length = tf.cast(
            tf.expand_dims(tf.reduce_sum(predicate_mask, -1), 1), tf.float32)

        # expand dimension 2 to be able to broadcast
        subject_mask = tf.cast(tf.expand_dims(subject_mask, 2), tf.float32)
        predicate_mask = tf.cast(tf.expand_dims(predicate_mask, 2), tf.float32)
        object_mask = tf.cast(tf.expand_dims(object_mask, 2), tf.float32)

        subject_vect = tf.reduce_sum(tf.multiply(
            fact_embedding, subject_mask), 1) / (
                subject_length +
                tf.broadcast_to(tf.constant([1e-5]), tf.shape(subject_length)))
        object_vect = tf.reduce_sum(tf.multiply(
            fact_embedding, object_mask), 1) / (
                object_length +
                tf.broadcast_to(tf.constant([1e-5]), tf.shape(object_length)))
        predicate_vect = tf.reduce_sum(
            tf.multiply(fact_embedding, predicate_mask),
            1) / (predicate_length + tf.broadcast_to(
                tf.constant([1e-5]), tf.shape(predicate_length)))

        # Shuffled rows to generate adversarial samples
        shuffled_subject_vect = []
        shuffled_object_vect = []

        for _ in range(num_negative_samples):
            shuffled_subject_vect += [
                tf.gather(
                    subject_vect,
                    tf.random.shuffle(tf.range(tf.shape(subject_vect)[0])))
            ]  # [bs*tn,d]
            shuffled_object_vect += [
                tf.gather(
                    object_vect,
                    tf.random.shuffle(tf.range(tf.shape(object_vect)[0])))
            ]  # [bs*tn,d]

        # KB pretraining loss

        positive_loss = tf.reduce_mean(
            tf.squared_difference(subject_vect + predicate_vect, object_vect))
        negative_loss = 0
        for n_adv in range(num_negative_samples):
            negative_loss += tf.reduce_mean(
                tf.squared_difference(
                    shuffled_subject_vect[n_adv] + predicate_vect,
                    object_vect))
            negative_loss += tf.reduce_mean(
                tf.squared_difference(subject_vect + predicate_vect,
                                      shuffled_object_vect[n_adv]))

        # TransE Loss

        negative_loss = negative_loss / (2 * num_negative_samples)

        transe_loss = tf.clip_by_value(margin + positive_loss - negative_loss,
                                       clip_value_min=0,
                                       clip_value_max=100)
        if hparams.mode != tf.estimator.ModeKeys.PREDICT:
            triple_losses = tf.nn.weighted_cross_entropy_with_logits(
                labels=triple_labels,
                logits=triple_logits,
                pos_weight=hparams.pos_weight)
            avg_triple_loss = tf.reduce_mean(triple_losses)
            tf.summary.scalar("triple_loss", avg_triple_loss)

        return triple_logits, avg_triple_loss, original_knowledge_encoder_output, transe_loss
Esempio n. 5
0
  def compute_knowledge_selection_and_loss(self, features, encoder_output,
                                           fact_embedding, fact_lengths):
    """Compute knowledge selection and loss.

    Args:
      features: features.
      encoder_output: <tf.float32>[batch_size, input_length, hidden_dim]
      fact_embedding: <tf.float32>[batch_size*max_triple_num, max_triple_length,
        emb_dim]
      fact_lengths: # <tf.int32>[batch_size*max_triple_num]

    Returns:
      knowledge_weights:
      knowledge_loss:
    """
    hparams = self._hparams
    encoder_output_shape = common_layers.shape_list(encoder_output)
    encoder_hidden_dim = encoder_output_shape[-1]
    inputs = features["inputs"]
    # <tf.float32>[batch_size, input_length, emb_dim]
    inputs = tf.squeeze(inputs, 2)
    # <tf.float32>[batch_size, input_length]
    context_padding = common_attention.embedding_to_padding(inputs)
    # <tf.float32>[batch_size]
    context_lens = tf.to_float(
        common_attention.padding_to_length(context_padding))
    # <tf.float32>[batch_size, 1]
    context_lens = tf.expand_dims(context_lens, -1)
    # Compute context vector summary.
    # <tf.float32>[batch_size, hidden_dim]
    context_vector_summary = compute_summary_embedding(encoder_output,
                                                       context_lens, hparams)
    knowledge_encoder_output = compute_average_embedding(
        fact_embedding, fact_lengths)
    # <tf.float32>[batch_size, triple_num, emb_dim]
    knowledge_encoder_output = tf.reshape(
        knowledge_encoder_output, [-1, self.triple_num, encoder_hidden_dim])
    original_knowledge_encoder_output = knowledge_encoder_output
    if hparams.similarity_fuction == "dot_product":
      triple_logits = tf.squeeze(
          tf.matmul(knowledge_encoder_output,
                    tf.expand_dims(context_vector_summary, 2)), -1)
    elif hparams.similarity_fuction == "bilinear":
      # Tile the context vector summary.
      # <tf.float32>[batch_size, max_triple_num*hidden_dim]
      tiled_context_vector = tf.tile(context_vector_summary,
                                     [1, self.triple_num])
      # <tf.float32>[batch_size, max_triple_num, hidden_dim]
      context_vector = tf.reshape(tiled_context_vector,
                                  [-1, self.triple_num, encoder_hidden_dim])
      # compute outer product
      context_vector = tf.expand_dims(context_vector, -1)
      knowledge_encoder_output = tf.expand_dims(knowledge_encoder_output, 2)
      # <tf.float32>[batch_size, max_triple_num, hidden_dim, hidden_dim]
      outer_product = tf.matmul(context_vector, knowledge_encoder_output)
      outer_product = tf.reshape(
          outer_product,
          [-1, self.triple_num, encoder_hidden_dim * encoder_hidden_dim])
      triple_logits = tf.squeeze(
          tf.layers.dense(outer_product, 1, name="knolwedge_final_mlp"), -1)

    avg_triple_loss = 0.0
    triple_labels = features["triple_labels"]
    triple_labels = triple_labels[:, :self.triple_num]
    if hparams.mode != tf.estimator.ModeKeys.PREDICT:
      triple_losses = tf.nn.weighted_cross_entropy_with_logits(
          labels=triple_labels,
          logits=triple_logits,
          pos_weight=hparams.pos_weight)
      avg_triple_loss = tf.reduce_mean(triple_losses)
      tf.summary.scalar("triple_loss", avg_triple_loss)

    return triple_logits, avg_triple_loss, original_knowledge_encoder_output