Esempio n. 1
0
    def test_masked_avg_nd(self):
        self.assertAllClose(
            ops.masked_avg_nd(data=[[[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]],
                                    [[7.0, 8.0], [9.0, 10.0], [11.0, 12.0]]],
                              mask=tf.convert_to_tensor([[1, 0, 1], [0, 1, 0]],
                                                        dtype=tf.float32)),
            [[[3, 4]], [[9, 10]]])

        self.assertAllClose(
            ops.masked_avg_nd(data=[[[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]],
                                    [[7.0, 8.0], [9.0, 10.0], [11.0, 12.0]]],
                              mask=tf.convert_to_tensor([[0, 0, 0], [0, 0, 0]],
                                                        dtype=tf.float32)),
            [[[0, 0]], [[0, 0]]])
Esempio n. 2
0
    def _encode_knowledge(self,
                          tokens,
                          tokens_len,
                          vocab_file,
                          glove_file,
                          slim_fc_scope,
                          default_dims=128,
                          is_training=True):
        """Encodes knowledge into vector representations.

    Args:
      tokens: A [batch, max_sentence_len, max_knowledge_len] int tensor.
      tokens_len: A [batch, max_sentence_len] int tensor.

    Returns:
      A [batch, max_sentence_len, dims] float tensor.
    """
        glove_embedding_array = _create_embedding_matrix(
            glove_file, vocab_file, default_dims=default_dims)
        embedding = tf.get_variable('knowledge/embedding',
                                    initializer=glove_embedding_array,
                                    trainable=True)
        if embedding.shape[-1] != default_dims:
            with slim.arg_scope(slim_fc_scope):
                embedding = slim.fully_connected(embedding,
                                                 num_outputs=default_dims,
                                                 activation_fn=None,
                                                 scope='glove_projection')

        tokens_embedding = tf.nn.embedding_lookup(embedding,
                                                  tokens,
                                                  max_norm=None)
        tokens_mask = tf.sequence_mask(lengths=tokens_len,
                                       maxlen=tf.shape(tokens)[2],
                                       dtype=tf.float32)
        output = masked_ops.masked_avg_nd(data=tokens_embedding,
                                          mask=tokens_mask,
                                          dim=2)
        return tf.squeeze(output, axis=2)
Esempio n. 3
0
    def predict(self, inputs, **kwargs):
        """Predicts the resulting tensors.

    Args:
      inputs: A dictionary of input tensors keyed by names.

    Returns:
      predictions: A dictionary of prediction tensors keyed by name.
    """
        is_training = self._is_training
        options = self._model_proto

        (num_objects, object_bboxes, object_labels, object_scores,
         object_features) = (inputs[InputFields.num_objects],
                             inputs[InputFields.object_bboxes],
                             inputs[InputFields.object_labels],
                             inputs[InputFields.object_scores],
                             inputs[InputFields.object_features])
        (answer_choices, answer_choices_len,
         answer_label) = (inputs[InputFields.answer_choices_with_question],
                          inputs[InputFields.answer_choices_with_question_len],
                          inputs[InputFields.answer_label])
        batch_size = answer_choices.shape[0]

        # Image feature.
        object_masks = tf.sequence_mask(num_objects,
                                        tf.shape(object_bboxes)[1],
                                        dtype=tf.float32)
        # object_features = tf.compat.v1.layers.dense(object_features,
        #                                             units=512,
        #                                             activation=tf.nn.tanh)
        image_feature = masked_ops.masked_avg_nd(object_features,
                                                 object_masks,
                                                 dim=1)

        # Convert tokens to ids.
        token_to_id_layer = token_to_id.TokenToIdLayer(options.vocab_file,
                                                       options.unk_token_id)
        answer_choices_token_ids = token_to_id_layer(answer_choices)
        answer_choices_token_ids_reshaped = tf.reshape(
            answer_choices_token_ids, [batch_size * NUM_CHOICES, -1])

        # Convert word ids to embedding vectors.
        glove_embedding_array = create_embedding_matrix(
            options.glove_file, options.vocab_file)
        embedding = tf.get_variable('word/embedding',
                                    initializer=glove_embedding_array,
                                    trainable=True)
        answer_choices_embs_reshaped = tf.nn.embedding_lookup(
            embedding, answer_choices_token_ids_reshaped, max_norm=None)

        # Encode the sequence using BiLSTM model.
        with tf.variable_scope('answer_choice_encoder'):
            _, answer_choices_feature_reshaped = rnn.RNN(
                answer_choices_embs_reshaped,
                tf.reshape(answer_choices_len, [batch_size * NUM_CHOICES]),
                options.rnn_config,
                is_training=is_training)
        answer_choices_feature = tf.reshape(answer_choices_feature_reshaped,
                                            [batch_size, NUM_CHOICES, -1])
        inputs = tf.concat([
            answer_choices_feature,
            tf.tile(image_feature, [1, NUM_CHOICES, 1])
        ], -1)
        output = tf.compat.v1.layers.dense(inputs,
                                           units=512,
                                           activation=tf.nn.relu6)
        output = tf.compat.v1.layers.dense(inputs, units=1, activation=None)
        output = tf.squeeze(output, axis=-1)

        return {FIELD_ANSWER_PREDICTION: output}
Esempio n. 4
0
File: vbert.py Progetto: yekeren/VCR
    def predict(self, inputs, **kwargs):
        """Predicts the resulting tensors.

    Args:
      inputs: A dictionary of input tensors keyed by names.

    Returns:
      predictions: A dictionary of prediction tensors keyed by name.
    """
        is_training = self._is_training
        options = self._model_proto

        (image, height, width, num_objects, object_bboxes, object_labels,
         object_scores, answer_choices, answer_choices_len,
         answer_label) = (inputs[InputFields.img_data],
                          inputs[InputFields.img_height],
                          inputs[InputFields.img_width],
                          inputs[InputFields.num_objects],
                          inputs[InputFields.object_bboxes],
                          inputs[InputFields.object_labels],
                          inputs[InputFields.object_scores],
                          inputs[InputFields.answer_choices_with_question],
                          inputs[InputFields.answer_choices_with_question_len],
                          inputs[InputFields.answer_label])

        # Visualize image and object bboxes.
        batch_size = image.shape[0]

        image_batch_shape = tf.shape(image)
        object_bboxes = _to_batch_coordinates(object_bboxes, height, width,
                                              image_batch_shape[1],
                                              image_batch_shape[2])
        image_with_boxes = visualization.draw_bounding_boxes_on_image_tensors(
            image, num_objects, object_bboxes, object_labels, object_scores)
        tf.summary.image('vcr/detection', image_with_boxes, max_outputs=10)

        # Extract FRCNN feature.
        frcnn_features = fast_rcnn.FastRCNN(tf.cast(image, tf.float32),
                                            object_bboxes,
                                            options=options.fast_rcnn_config,
                                            is_training=is_training)
        object_masks = tf.sequence_mask(num_objects,
                                        tf.shape(object_bboxes)[1],
                                        dtype=tf.float32)
        image_feature = masked_ops.masked_avg_nd(frcnn_features,
                                                 object_masks,
                                                 dim=1)

        # Convert tokens into token ids.
        token_to_id_layer = token_to_id.TokenToIdLayer(
            options.bert_vocab_file, options.bert_unk_token_id)
        answer_choices_token_ids = token_to_id_layer(answer_choices)
        answer_choices_token_ids_reshaped = tf.reshape(
            answer_choices_token_ids, [batch_size * NUM_CHOICES, -1])

        answer_choices_mask = tf.sequence_mask(
            answer_choices_len, maxlen=tf.shape(answer_choices)[-1])
        answer_choices_mask_reshaped = tf.reshape(
            answer_choices_mask, [batch_size * NUM_CHOICES, -1])

        # Bert prediction.
        bert_config = BertConfig.from_json_file(options.bert_config_file)
        bert_model = BertModel(bert_config,
                               is_training,
                               input_ids=answer_choices_token_ids_reshaped,
                               input_mask=answer_choices_mask_reshaped)

        answer_choices_cls_feature_reshaped = bert_model.get_pooled_output()
        answer_choices_cls_feature = tf.reshape(
            answer_choices_cls_feature_reshaped, [batch_size, NUM_CHOICES, -1])

        assignment_map, _ = get_assignment_map_from_checkpoint(
            tf.global_variables(), options.bert_checkpoint_file)

        # Fuse image feature.
        image_feature_tiled = tf.tile(image_feature, [1, NUM_CHOICES, 1])
        answer_choices_cls_feature = tf.concat(
            [answer_choices_cls_feature, image_feature_tiled], -1)

        # Classification layer.
        output = tf.compat.v1.layers.dense(answer_choices_cls_feature,
                                           units=1,
                                           activation=None)
        output = tf.squeeze(output, axis=-1)

        return {FIELD_ANSWER_PREDICTION: output}