Esempio n. 1
0
    def predict(self, inputs, **kwargs):
        """Predicts the resulting tensors.

    Args:
      inputs: A dictionary of input tensors keyed by names.

    Returns:
      predictions: A dictionary of prediction tensors keyed by name.
    """
        is_training = self._is_training
        options = self._model_proto

        (answer_choices, answer_choices_len,
         answer_label) = (inputs[InputFields.answer_choices_with_question],
                          inputs[InputFields.answer_choices_with_question_len],
                          inputs[InputFields.answer_label])
        batch_size = answer_choices.shape[0]

        # Convert tokens to ids.
        token_to_id_layer = token_to_id.TokenToIdLayer(options.vocab_file,
                                                       options.unk_token_id)
        answer_choices_token_ids = token_to_id_layer(answer_choices)
        answer_choices_token_ids_reshaped = tf.reshape(
            answer_choices_token_ids, [batch_size * NUM_CHOICES, -1])

        # Convert word ids to embedding vectors.
        glove_embedding_array = create_embedding_matrix(
            options.glove_file, options.vocab_file)
        embedding = tf.get_variable('word/embedding',
                                    initializer=glove_embedding_array,
                                    trainable=True)
        answer_choices_embs_reshaped = tf.nn.embedding_lookup(
            embedding, answer_choices_token_ids_reshaped, max_norm=None)

        # Encode the sequence using BiLSTM model.
        with tf.variable_scope('answer_choice_encoder'):
            _, answer_choices_feature_reshaped = rnn.RNN(
                answer_choices_embs_reshaped,
                tf.reshape(answer_choices_len, [batch_size * NUM_CHOICES]),
                options.rnn_config,
                is_training=is_training)
        answer_choices_feature = tf.reshape(answer_choices_feature_reshaped,
                                            [batch_size, NUM_CHOICES, -1])

        # Classification layer.
        output = tf.compat.v1.layers.dense(answer_choices_feature,
                                           units=1,
                                           activation=None)
        output = tf.squeeze(output, axis=-1)

        return {FIELD_ANSWER_PREDICTION: output}
Esempio n. 2
0
  def _recognition_to_cognition(self, question_inp_features, question_len,
                                answer_inp_features, answer_len,
                                object_features, num_objects):
    """Creates the `RecognitionToCognition` network.

    Args:
      question_inp_features: Input question features, a [batch*NUM_CHOICES,
        max_question_len, feature_dims] float tensor.
      question_len: Question length, a [batch*NUM_CHOICES] int tensor.
      answer_inp_features: Input answer features, a [batch*NUM_CHOICES,
        max_answer_len , feature_dims] float tensor.
      answer_len: Answer length, a [batch*NUM_CHOICES] int tensor.
      object_features: Object features, a [batch, max_num_objects, object_dims]
        float tensor.
      num_objects: A [batch] int tensor.

    Returns:
      final_features: A [batch, output_dims] float tensor.
      answer_seq_features: Contextualized answer features, a [batch*NUM_CHOICES,
        max_answer_len , feature_dims] float tensor.
    """
    is_training = self._is_training
    options = self._model_proto

    (question_max_len, answer_max_len) = (tf.shape(question_inp_features)[1],
                                          tf.shape(answer_inp_features)[1])
    batch_size = object_features.shape[0]
    max_num_objects = tf.shape(object_features)[1]

    # Encode the sequence using BiLSTM model.
    with tf.variable_scope('grounding_encoder'):
      question_seq_features, _ = rnn.RNN(question_inp_features,
                                         question_len,
                                         options.rnn_config,
                                         is_training=is_training)

    with tf.variable_scope('grounding_encoder', reuse=True):
      answer_seq_features, _ = rnn.RNN(answer_inp_features,
                                       answer_len,
                                       options.rnn_config,
                                       is_training=is_training)

    # Get the question features attended by the answers.
    #   qa_mask: [batch*NUM_CHOICES, question_max_len, 1].
    #   qa_similarity: [batch*NUM_CHOICES, question_max_len, answer_max_len].
    #   qa_attention_weights: [batch*NUM_CHOICES, question_max_len, answer_max_len].
    #   attended_question: [batch*NUM_CHOICES, answer_max_len, feature_dims].
    qa_mask = tf.expand_dims(
        tf.sequence_mask(question_len, question_max_len, dtype=tf.float32), 2)
    with tf.variable_scope('qa_bilinear'):
      qa_similarity = attention_ops.bilinear(question_seq_features,
                                             answer_seq_features)
    qa_attention_weights = masked_ops.masked_softmax(data=qa_similarity,
                                                     mask=qa_mask,
                                                     dim=1)
    attended_question = tf.einsum('bqa,bqd->bad', qa_attention_weights,
                                  question_seq_features)

    # Attention over the objects.
    #   oa_mask: [batch, max_num_object, 1].
    #   oa_similarity: [batch*NUM_CHOICES, max_num_object, answer_max_len]
    #   oa_attention_weights: [batch*NUM_CHOICES, max_num_object, answer_max_len].
    #   attended_objects: [batch*NUM_CHOICES, answer_max_len, object_dims].

    tile_fn = lambda x: tf.gather(tf.expand_dims(x, 1), [0] * NUM_CHOICES,
                                  axis=1)
    object_features = tf.reshape(
        tile_fn(object_features),
        [batch_size * NUM_CHOICES, -1, object_features.shape[-1]])
    num_objects = tf.reshape(tile_fn(num_objects), [-1])

    oa_mask = tf.expand_dims(
        tf.sequence_mask(num_objects, max_num_objects, dtype=tf.float32), 2)
    with tf.variable_scope('oa_bilinear'):
      oa_similarity = attention_ops.bilinear(object_features,
                                             answer_seq_features)
    oa_attention_weights = masked_ops.masked_softmax(data=oa_similarity,
                                                     mask=oa_mask,
                                                     dim=1)
    attended_objects = tf.einsum('boa,bod->bad', oa_attention_weights,
                                 object_features)

    # Reasoning module.
    reasoning_inp_features = tf.concat(
        [answer_seq_features, attended_question, attended_objects], -1)

    with tf.variable_scope('reasoning'):
      reasoning_seq_features, _ = rnn.RNN(reasoning_inp_features,
                                          answer_len,
                                          options.rnn_config,
                                          is_training=is_training)

    # Pool features from the sequence.
    pooling_fn = (masked_ops.masked_max_nd
                  if options.use_max_pooling else masked_ops.masked_avg_nd)

    final_seq_features = tf.concat([
        reasoning_seq_features, answer_seq_features, attended_question,
        attended_objects
    ], -1)
    final_features = pooling_fn(data=final_seq_features,
                                mask=tf.sequence_mask(answer_len,
                                                      answer_max_len,
                                                      dtype=tf.float32),
                                dim=1)

    # Export summaries.
    tf.compat.v1.summary.histogram('attention/qa_similarity', qa_similarity)
    tf.compat.v1.summary.histogram('attention/oa_similarity', oa_similarity)
    return (tf.squeeze(final_features, 1), answer_seq_features)
Esempio n. 3
0
    def predict(self, inputs, **kwargs):
        """Predicts the resulting tensors.

    Args:
      inputs: A dictionary of input tensors keyed by names.

    Returns:
      predictions: A dictionary of prediction tensors keyed by name.
    """
        is_training = self._is_training
        options = self._model_proto

        (num_objects, object_bboxes, object_labels, object_scores,
         object_features) = (inputs[InputFields.num_objects],
                             inputs[InputFields.object_bboxes],
                             inputs[InputFields.object_labels],
                             inputs[InputFields.object_scores],
                             inputs[InputFields.object_features])
        (answer_choices, answer_choices_len,
         answer_label) = (inputs[InputFields.answer_choices_with_question],
                          inputs[InputFields.answer_choices_with_question_len],
                          inputs[InputFields.answer_label])
        batch_size = answer_choices.shape[0]

        # Image feature.
        object_masks = tf.sequence_mask(num_objects,
                                        tf.shape(object_bboxes)[1],
                                        dtype=tf.float32)
        # object_features = tf.compat.v1.layers.dense(object_features,
        #                                             units=512,
        #                                             activation=tf.nn.tanh)
        image_feature = masked_ops.masked_avg_nd(object_features,
                                                 object_masks,
                                                 dim=1)

        # Convert tokens to ids.
        token_to_id_layer = token_to_id.TokenToIdLayer(options.vocab_file,
                                                       options.unk_token_id)
        answer_choices_token_ids = token_to_id_layer(answer_choices)
        answer_choices_token_ids_reshaped = tf.reshape(
            answer_choices_token_ids, [batch_size * NUM_CHOICES, -1])

        # Convert word ids to embedding vectors.
        glove_embedding_array = create_embedding_matrix(
            options.glove_file, options.vocab_file)
        embedding = tf.get_variable('word/embedding',
                                    initializer=glove_embedding_array,
                                    trainable=True)
        answer_choices_embs_reshaped = tf.nn.embedding_lookup(
            embedding, answer_choices_token_ids_reshaped, max_norm=None)

        # Encode the sequence using BiLSTM model.
        with tf.variable_scope('answer_choice_encoder'):
            _, answer_choices_feature_reshaped = rnn.RNN(
                answer_choices_embs_reshaped,
                tf.reshape(answer_choices_len, [batch_size * NUM_CHOICES]),
                options.rnn_config,
                is_training=is_training)
        answer_choices_feature = tf.reshape(answer_choices_feature_reshaped,
                                            [batch_size, NUM_CHOICES, -1])
        inputs = tf.concat([
            answer_choices_feature,
            tf.tile(image_feature, [1, NUM_CHOICES, 1])
        ], -1)
        output = tf.compat.v1.layers.dense(inputs,
                                           units=512,
                                           activation=tf.nn.relu6)
        output = tf.compat.v1.layers.dense(inputs, units=1, activation=None)
        output = tf.squeeze(output, axis=-1)

        return {FIELD_ANSWER_PREDICTION: output}
Esempio n. 4
0
    def generate_adversarial_masks(self,
                                   choice_ids,
                                   choice_lengths,
                                   question_lengths,
                                   labels,
                                   hard=True):
        """Masked language modeling."""
        options = self._model_proto
        is_training = self._is_training

        batch_size = choice_ids.shape[0]
        max_choice_len = tf.shape(choice_ids)[-1]

        with tf.variable_scope('adversarial'):
            # Lookup for token embeddings.
            # Note: DONOT share it with BERT, use a brand new embedding matrix instead.
            with tf.variable_scope("embeddings", reuse=False):
                (choice_embeddings_reshaped,
                 _) = bert_modeling.embedding_lookup(
                     input_ids=tf.reshape(choice_ids,
                                          [batch_size * NUM_CHOICES, -1]),
                     vocab_size=self._bert_config.vocab_size,
                     embedding_size=self._bert_config.hidden_size,
                     initializer_range=self._bert_config.initializer_range,
                     word_embedding_name="word_embeddings",
                     use_one_hot_embeddings=False,
                     word_embedding_trainable=options.
                     adversarial_train_word_embedding)
                choice_lengths_reshaped = tf.reshape(choice_lengths, [-1])

            # Create label embedding.
            if options.use_label_embedding:
                full_label_embeddings = tf.get_variable(
                    name='label_embedding',
                    shape=[2, self._bert_config.hidden_size],
                    initializer=bert_modeling.create_initializer(
                        self._bert_config.initializer_range))
                one_hot_labels = tf.one_hot(labels,
                                            NUM_CHOICES,
                                            on_value=1,
                                            off_value=0)
                label_embeddings = tf.nn.embedding_lookup(
                    full_label_embeddings, one_hot_labels)
                label_embeddings_reshaped = tf.reshape(label_embeddings, [
                    batch_size * NUM_CHOICES, 1, self._bert_config.hidden_size
                ])

                choice_embeddings_reshaped += label_embeddings_reshaped

            # Layer norm.
            choice_embeddings_reshaped = bert_modeling.layer_norm_and_dropout(
                choice_embeddings_reshaped,
                dropout_prob=self._bert_config.hidden_dropout_prob)

            # RNN.
            choice_features_reshaped, _ = rnn.RNN(
                choice_embeddings_reshaped,
                choice_lengths_reshaped,
                options=options.adversarial_rnn,
                is_training=is_training)

            # Fully-connected layer
            choice_features = tf.reshape(choice_features_reshaped, [
                batch_size, NUM_CHOICES, -1, choice_features_reshaped.shape[-1]
            ])
            choice_shortcut_logits = slim.fully_connected(choice_features,
                                                          num_outputs=1,
                                                          activation_fn=None,
                                                          scope='logits')
            choice_shortcut_logits = tf.multiply(
                options.adversarial_logits_scale,
                tf.squeeze(choice_shortcut_logits, -1))
        # END - with tf.variable_scope('adversarial'):

        # Gumbel-Softmax to get the probable shortcut.
        choice_masks = tf.logical_and(
            tf.sequence_mask(choice_lengths, maxlen=max_choice_len),
            tf.logical_not(
                tf.sequence_mask(question_lengths, maxlen=max_choice_len)))
        choice_masks = tf.cast(choice_masks, tf.float32)

        temperature = tf.Variable(options.temperature_init_value,
                                  name='adversarial/temperature_var',
                                  trainable=options.temperature_trainable,
                                  dtype=tf.float32)
        temperature = tf.maximum(temperature, EPSILON)

        tf.summary.histogram('shortcut/logtis', choice_shortcut_logits)
        tf.summary.scalar('metrics/temperature', temperature)

        choice_shortcut_logits = choice_shortcut_logits - \
            INF * (1.0 - choice_masks)
        tf.summary.histogram('shortcut/probas',
                             tf.nn.softmax(choice_shortcut_logits))

        a_sample = RelaxedOneHotCategorical(temperature,
                                            logits=choice_shortcut_logits,
                                            allow_nan_stats=False).sample()

        if hard:
            k = tf.shape(choice_shortcut_logits)[-1]
            a_hard_sample = tf.cast(tf.one_hot(tf.argmax(a_sample, -1), k),
                                    a_sample.dtype)
            a_sample = tf.stop_gradient(a_hard_sample - a_sample) + a_sample

        # Returns the mask sampled from the distribution.
        return a_sample, choice_shortcut_logits, choice_features, temperature, choice_masks
Esempio n. 5
0
  def predict(self, inputs, **kwargs):
    """Predicts the resulting tensors.

    Args:
      inputs: A dictionary of input tensors keyed by names.

    Returns:
      predictions: A dictionary of prediction tensors keyed by name.
    """
    options = self._model_proto
    is_training = self._is_training

    token_to_id_layer = token_to_id.TokenToIdLayer(options.vocab_file,
                                                   options.unk_token_id)
    fc_scope_fn = hyperparams.build_hyperparams(options.fc_hyperparams,
                                                is_training)

    # Extract input fields.
    (question, question_len, answer_choices,
     answer_choices_len) = (inputs[InputFields.question],
                            inputs[InputFields.question_len],
                            inputs[InputFields.answer_choices],
                            inputs[InputFields.answer_choices_len])
    batch_size = answer_choices.shape[0]

    # Convert question tokens into token ids.
    question_token_ids = token_to_id_layer(question)

    # Convert answer choice tokens into token ids.
    answer_choices_token_ids = token_to_id_layer(answer_choices)
    answer_choices_token_ids = tf.reshape(answer_choices_token_ids,
                                          [batch_size * NUM_CHOICES, -1])
    answer_choices_len = tf.reshape(answer_choices_len,
                                    [batch_size * NUM_CHOICES])

    # Convert word ids to embedding vectors.
    glove_embedding_array = create_embedding_matrix(options.glove_file,
                                                    options.vocab_file)
    embedding = tf.get_variable('word/embedding',
                                initializer=glove_embedding_array,
                                trainable=True)
    question_embs = tf.nn.embedding_lookup(embedding,
                                           question_token_ids,
                                           max_norm=None)
    answer_choices_embs = tf.nn.embedding_lookup(embedding,
                                                 answer_choices_token_ids,
                                                 max_norm=None)

    # Tile the question embeddings.
    question_embs = tf.gather(tf.expand_dims(question_embs, 1),
                              [0] * NUM_CHOICES,
                              axis=1)
    question_embs = tf.reshape(
        question_embs, [batch_size * NUM_CHOICES, -1, question_embs.shape[-1]])
    question_len = tf.gather(tf.expand_dims(question_len, 1), [0] * NUM_CHOICES,
                             axis=1)
    question_len = tf.reshape(question_len, [batch_size * NUM_CHOICES])

    # Encode the sequence using BiLSTM model.
    with tf.variable_scope('question_encoder'):
      _, question_features = rnn.RNN(question_embs,
                                     question_len,
                                     options.rnn_config,
                                     is_training=is_training)
    with tf.variable_scope('answer_choice_encoder'):
      _, answer_features = rnn.RNN(answer_choices_embs,
                                   answer_choices_len,
                                   options.rnn_config,
                                   is_training=is_training)
    final_features = tf.concat(
        [answer_features, answer_features * question_features], axis=-1)

    # MLP.
    with slim.arg_scope(fc_scope_fn()):
      with tf.variable_scope('classification'):
        with tf.variable_scope('hidden'):
          output = tf.contrib.layers.fully_connected(final_features,
                                                     num_outputs=1024,
                                                     activation_fn=tf.nn.relu)
          output = tf.contrib.layers.dropout(
              output,
              keep_prob=options.dropout_keep_prob,
              is_training=is_training)
        with tf.variable_scope('output'):
          output = tf.contrib.layers.fully_connected(output,
                                                     num_outputs=1,
                                                     activation_fn=None)
          output = tf.reshape(output, [batch_size, NUM_CHOICES])

    return {
        FIELD_ANSWER_PREDICTION: output,
    }