コード例 #1
0
def _update_decoded_example(decoded_example, options):
    """Updates the decoded example, add size to the varlen feature.

  Args:
    decoded_example: A tensor dictionary keyed by name.
    options: An instance of reader_pb2.Reader.

  Returns:
    decoded_example: The same instance with content modified.
  """
    token_to_id_fn = token_to_id.TokenToIdLayer(
        options.vocab_file, options.out_of_vocabulary_token_id)
    detection_to_id_fn = token_to_id.TokenToIdLayer(
        options.detection_vocab_file, 0)

    # Image and bounding boxes.
    image = decoded_example['img_data']
    image_shape = tf.shape(image)

    detections = Detections(
        decoded_example.pop('detection_boxes'),
        detection_to_id_fn(decoded_example.pop('detection_classes')),
        decoded_example.pop('detection_scores'))

    decoded_example.update({
        'img_height': image_shape[0],
        'img_width': image_shape[1],
        'detections': detections.to_dict(),
    })

    # Answer and rationale choices.
    for i in range(NUM_CHOICES):
        answer_choice = MixedSequence(
            token_to_id_fn(decoded_example.pop('answer_choice_%i' % i)),
            decoded_example.pop('answer_choice_tag_%i' % i))
        rationale_choice = MixedSequence(
            token_to_id_fn(decoded_example.pop('rationale_choice_%i' % i)),
            decoded_example.pop('rationale_choice_tag_%i' % i))

        decoded_example.update({
            'answer_choice_%i' % i:
            answer_choice.to_dict(),
            'rationale_choice_%i' % i:
            rationale_choice.to_dict(),
        })

    # Question and answer.
    question = MixedSequence(token_to_id_fn(decoded_example.pop('question')),
                             decoded_example.pop('question_tag'))

    decoded_example.update({'question': question.to_dict()})

    return decoded_example
コード例 #2
0
def initialize(options, dt):
    if not isinstance(options, model_pb2.Cap2SGPreprocess):
        raise ValueError('Options has to be a Cap2SGPreprocess proto.')

    if not isinstance(dt, DataTuple):
        raise ValueError('Invalid DataTuple object.')

    # Load GloVe embeddings.
    glove_dict = _load_glove_data(options.glove_vocabulary_file,
                                  options.glove_embedding_file)

    # Initialize token2id and id2token functions.
    token2id, id2token = _read_vocabulary(options.vocabulary_file, glove_dict,
                                          options.minimum_frequency)
    dt.vocab_size = len(token2id)
    dt.token2id_func = token_to_id.TokenToIdLayer(token2id, oov_id=0)
    dt.id2token_func = id_to_token.IdToTokenLayer(id2token, oov='OOV')

    # Create word embeddings.
    dt.dims = options.embedding_dims
    if options.embedding_trainable:
        dt.embeddings = tf.get_variable('embeddings',
                                        initializer=_initialize_from_glove(
                                            glove_dict, token2id, dt.dims),
                                        trainable=options.embedding_trainable)
    else:
        dt.embeddings = tf.constant(_initialize_from_glove(
            glove_dict, token2id, dt.dims),
                                    name='embeddings')
    dt.embedding_func = lambda x: tf.nn.embedding_lookup(dt.embeddings, x)

    # Create class biases.
    (dt.bias_entity, dt.bias_attribute,
     dt.bias_relation) = _initialize_biases(dt.embeddings, options.bias_mode)
    return dt
コード例 #3
0
ファイル: finetune_cc.py プロジェクト: yekeren/VCR
    def __init__(self, model_proto, is_training):
        super(FinetuneCC, self).__init__(model_proto, is_training)

        if not isinstance(model_proto, model_pb2.FinetuneCC):
            raise ValueError('Options has to be an FinetuneCC proto.')

        options = model_proto

        self._token_to_id_func = token_to_id.TokenToIdLayer(
            options.bert_vocab_file, options.bert_unk_token_id)
        self._bert_config = BertConfig.from_json_file(options.bert_config_file)

        self._slim_fc_scope = hyperparams.build_hyperparams(
            options.fc_hyperparams, is_training)()

        if options.rationale_model:
            self._field_label = InputFields.rationale_label
            self._field_choices = InputFields.rationale_choices_with_question
            self._field_choices_tag = InputFields.rationale_choices_with_question_tag
            self._field_choices_len = InputFields.rationale_choices_with_question_len
        else:
            self._field_label = InputFields.answer_label
            self._field_choices = InputFields.answer_choices_with_question
            self._field_choices_tag = InputFields.answer_choices_with_question_tag
            self._field_choices_len = InputFields.answer_choices_with_question_len
コード例 #4
0
def _get_class_embedding_vectors(label,
                                 vocab_file,
                                 vocab_size,
                                 embedding_dims=300,
                                 scope='object_embedding',
                                 max_norm=None):
    """Gets token embedding vectors.

  Args:
    label: A string tensor.
    vocab_file: Path to the vocabulary file.
    vocab_size: Size of the vocabulary.
    embedding_dims: Dimensions of the embedding vectors.

  Returns:
    label_embedding: Embedding of the label.
  """
    label_ids = token_to_id.TokenToIdLayer(vocab_file,
                                           unk_token_id=vocab_size)(label)
    with tf.variable_scope(scope):
        object_embedding = tf.get_variable(
            'weights', shape=[vocab_size + 1, embedding_dims], trainable=True)
    return tf.nn.embedding_lookup(object_embedding,
                                  label_ids,
                                  max_norm=max_norm)
コード例 #5
0
def _get_class_embedding_vectors(label,
                                 vocab_file,
                                 embeddings_index,
                                 init_width=0.03):
  """Gets token embedding vectors.

  Args:
    label: A string tensor.
    vocab_file: Path to the vocabulary file.
    embedding_dims: Dimensions of the embedding vectors.

  Returns:
    label_embedding: Embedding of the label.
  """
  embedding_matrix = _create_embedding_matrix(embeddings_index, vocab_file,
                                              init_width)
  unk_token_id, embedding_dims = embedding_matrix.shape

  embedding_matrix = np.concatenate([
      embedding_matrix,
      np.random.uniform(-init_width, init_width,
                        (1, embedding_dims)).astype(np.float32)
  ])

  embedding = tf.get_variable('object/embedding',
                              initializer=embedding_matrix,
                              trainable=True)

  label_ids = token_to_id.TokenToIdLayer(vocab_file,
                                         unk_token_id=unk_token_id)(label)
  return tf.nn.embedding_lookup(embedding, label_ids, max_norm=_MAX_NORM)
コード例 #6
0
    def predict(self, inputs, **kwargs):
        """Predicts the resulting tensors.

    Args:
      inputs: A dictionary of input tensors keyed by names.

    Returns:
      predictions: A dictionary of prediction tensors keyed by name.
    """
        is_training = self._is_training
        options = self._model_proto

        token_to_id_layer = token_to_id.TokenToIdLayer(
            options.bert_vocab_file, options.bert_unk_token_id)
        bert_config = BertConfig.from_json_file(options.bert_config_file)
        slim_fc_scope = hyperparams.build_hyperparams(options.fc_hyperparams,
                                                      is_training)()

        # Prediction.
        answer_logits = self._predict_logits(
            inputs[self._field_answer_choices],
            inputs[self._field_answer_choices_len], token_to_id_layer,
            bert_config, slim_fc_scope, options.dropout_keep_prob, is_training)

        # Restore from checkpoint.
        assignment_map, _ = get_assignment_map_from_checkpoint(
            tf.global_variables(), options.bert_checkpoint_file)
        tf.compat.v1.train.init_from_checkpoint(options.bert_checkpoint_file,
                                                assignment_map)

        return {
            FIELD_ANSWER_PREDICTION: answer_logits,
        }
コード例 #7
0
ファイル: token_to_id_test.py プロジェクト: yekeren/WSSGG
    def test_token_to_id(self):
        test_layer = token_to_id.TokenToIdLayer({'hello': 5, 'world': 11}, 97)

        output = test_layer(tf.convert_to_tensor(['hello', ',', 'world', '!']))
        self.assertAllEqual(output, [5, 97, 11, 97])

        output = test_layer(tf.convert_to_tensor(['hell', ',', 'world', '!!']))
        self.assertAllEqual(output, [97, 97, 11, 97])
コード例 #8
0
ファイル: bert.py プロジェクト: yekeren/VCR
    def predict(self, inputs, **kwargs):
        """Predicts the resulting tensors.

    Args:
      inputs: A dictionary of input tensors keyed by names.

    Returns:
      predictions: A dictionary of prediction tensors keyed by name.
    """
        is_training = self._is_training
        options = self._model_proto

        (answer_choices, answer_choices_len,
         answer_label) = (inputs[InputFields.answer_choices_with_question],
                          inputs[InputFields.answer_choices_with_question_len],
                          inputs[InputFields.answer_label])

        # Create model layers.
        token_to_id_layer = token_to_id.TokenToIdLayer(
            options.bert_vocab_file, options.bert_unk_token_id)

        # Convert tokens into token ids.
        batch_size = answer_choices.shape[0]

        answer_choices_token_ids = token_to_id_layer(answer_choices)
        answer_choices_token_ids_reshaped = tf.reshape(
            answer_choices_token_ids, [batch_size * NUM_CHOICES, -1])

        answer_choices_mask = tf.sequence_mask(
            answer_choices_len, maxlen=tf.shape(answer_choices)[-1])
        answer_choices_mask_reshaped = tf.reshape(
            answer_choices_mask, [batch_size * NUM_CHOICES, -1])

        # Bert prediction.
        bert_config = BertConfig.from_json_file(options.bert_config_file)
        bert_model = BertModel(bert_config,
                               is_training,
                               input_ids=answer_choices_token_ids_reshaped,
                               input_mask=answer_choices_mask_reshaped)

        answer_choices_cls_feature_reshaped = bert_model.get_pooled_output()
        answer_choices_cls_feature = tf.reshape(
            answer_choices_cls_feature_reshaped, [batch_size, NUM_CHOICES, -1])

        assignment_map, _ = get_assignment_map_from_checkpoint(
            tf.global_variables(), options.bert_checkpoint_file)

        tf.compat.v1.train.init_from_checkpoint(options.bert_checkpoint_file,
                                                assignment_map)

        # Classification layer.
        output = tf.compat.v1.layers.dense(answer_choices_cls_feature,
                                           units=1,
                                           activation=None)
        output = tf.squeeze(output, axis=-1)

        return {FIELD_ANSWER_PREDICTION: output}
コード例 #9
0
    def test_token_to_id_bert(self):
        test_layer = token_to_id.TokenToIdLayer(
            'data/bert/keras/cased_L-12_H-768_A-12/vocab.txt',
            unk_token_id=100)

        output = test_layer(tf.convert_to_tensor(['hello', ',', 'world', '!']))
        self.assertAllEqual(output, [19082, 117, 1362, 106])

        output = test_layer(tf.convert_to_tensor(['hello', ',', 'world',
                                                  '!!']))
        self.assertAllEqual(output, [19082, 117, 1362, 100])
コード例 #10
0
    def test_token_to_id_2d(self):
        vocab_file = self._create_temp_vocab_file()
        test_layer = token_to_id.TokenToIdLayer(vocab_file, unk_token_id=4)

        output = test_layer(
            tf.convert_to_tensor([['hello', ',', 'world', '!'],
                                  ['hell', ',', 'world', '!!']]))
        self.assertAllEqual(output, [[2, 6, 3, 7], [4, 6, 3, 4]])

        os.unlink(vocab_file)
        self.assertFalse(os.path.exists(vocab_file))
コード例 #11
0
ファイル: bilstm_glove.py プロジェクト: yekeren/VCR
    def predict(self, inputs, **kwargs):
        """Predicts the resulting tensors.

    Args:
      inputs: A dictionary of input tensors keyed by names.

    Returns:
      predictions: A dictionary of prediction tensors keyed by name.
    """
        is_training = self._is_training
        options = self._model_proto

        (answer_choices, answer_choices_len,
         answer_label) = (inputs[InputFields.answer_choices_with_question],
                          inputs[InputFields.answer_choices_with_question_len],
                          inputs[InputFields.answer_label])
        batch_size = answer_choices.shape[0]

        # Convert tokens to ids.
        token_to_id_layer = token_to_id.TokenToIdLayer(options.vocab_file,
                                                       options.unk_token_id)
        answer_choices_token_ids = token_to_id_layer(answer_choices)
        answer_choices_token_ids_reshaped = tf.reshape(
            answer_choices_token_ids, [batch_size * NUM_CHOICES, -1])

        # Convert word ids to embedding vectors.
        glove_embedding_array = create_embedding_matrix(
            options.glove_file, options.vocab_file)
        embedding = tf.get_variable('word/embedding',
                                    initializer=glove_embedding_array,
                                    trainable=True)
        answer_choices_embs_reshaped = tf.nn.embedding_lookup(
            embedding, answer_choices_token_ids_reshaped, max_norm=None)

        # Encode the sequence using BiLSTM model.
        with tf.variable_scope('answer_choice_encoder'):
            _, answer_choices_feature_reshaped = rnn.RNN(
                answer_choices_embs_reshaped,
                tf.reshape(answer_choices_len, [batch_size * NUM_CHOICES]),
                options.rnn_config,
                is_training=is_training)
        answer_choices_feature = tf.reshape(answer_choices_feature_reshaped,
                                            [batch_size, NUM_CHOICES, -1])

        # Classification layer.
        output = tf.compat.v1.layers.dense(answer_choices_feature,
                                           units=1,
                                           activation=None)
        output = tf.squeeze(output, axis=-1)

        return {FIELD_ANSWER_PREDICTION: output}
コード例 #12
0
ファイル: token_to_id_test.py プロジェクト: yekeren/WSSGG
    def test_token_to_id_2d(self):
        test_layer = token_to_id.TokenToIdLayer(
            {
                'one': 2,
                'world': 3,
                'dream': 5
            }, 4)

        output = test_layer(
            tf.convert_to_tensor([['hello', ',', 'world', '!'],
                                  ['hell', ',', 'world', '!!']]))
        self.assertAllEqual(output, [[4, 4, 3, 4], [4, 4, 3, 4]])

        output = test_layer(
            tf.convert_to_tensor([['one', 'world', 'one', 'dream'],
                                  ['one', 'word', 'one', 'dream']]))
        self.assertAllEqual(output, [[2, 3, 2, 5], [2, 4, 2, 5]])
コード例 #13
0
  def test_masked_lm(self):
    example_sentence = [
        'alice', 'became', '[MASK]', 'after', 'felt', 'left', 'out', 'by',
        'her', 'friends.'
    ]
    num_token_predictions = 1
    lm_mask = [2]

    bert_unk_token_id = 100
    bert_dir = 'data/bert/keras/cased_L-12_H-768_A-12'
    bert_vocab_file = "{}/vocab.txt".format(bert_dir)
    bert_config_file = "{}/bert_config.json".format(bert_dir)
    bert_checkpoint_file = "{}/bert_model.ckpt".format(bert_dir)
    num_classes = 2
    sequence_length = len(example_sentence)
    vocab = load_vocab(bert_vocab_file)

    token_to_id_layer = token_to_id.TokenToIdLayer(bert_vocab_file,
                                                   bert_unk_token_id)

    bert_config = BertConfig.from_json_file(bert_config_file)
    transformer_encoder = get_transformer_encoder(bert_config, sequence_length)

    pretrainer_model = bert_pretrainer.BertPretrainer(
        network=transformer_encoder,
        num_classes=num_classes,
        num_token_predictions=num_token_predictions,
        output='predictions')

    checkpoint = tf.train.Checkpoint(model=transformer_encoder)
    status = checkpoint.restore(bert_checkpoint_file)

    with tf.compat.v1.Session() as sess:
      status.initialize_or_restore(sess)
      values = sess.run(transformer_encoder.trainable_variables)
      print(values[-1])
      j = 1
コード例 #14
0
    def _predict(self, inputs, **kwargs):
        """Predicts the resulting tensors.

    Args:
      inputs: A dictionary of input tensors keyed by names.

    Returns:
      predictions: A dictionary of prediction tensors keyed by name.
    """
        is_training = self._is_training
        options = self._model_proto
        token_to_id_layer = token_to_id.TokenToIdLayer(
            options.embedding_vocab_file, options.embedding_unk_token_id)

        predictions = {}

        # Extract text annotations.
        (question, question_len, answer,
         answer_len) = (inputs[InputFields.question],
                        inputs[InputFields.question_len],
                        inputs[InputFields.answer_choices],
                        inputs[InputFields.answer_choices_len])
        batch_size = question.shape[0]

        # Convert word to embedding vectors.
        (question_token_ids, answer_token_ids) = (token_to_id_layer(question),
                                                  token_to_id_layer(answer))
        glove_embedding_array = _create_embedding_matrix(
            options.embedding_glove_file, options.embedding_vocab_file)
        embedding = tf.get_variable('word/embedding',
                                    initializer=glove_embedding_array,
                                    trainable=True)

        question_embs = tf.nn.embedding_lookup(embedding,
                                               question_token_ids,
                                               max_norm=None)
        answer_embs = tf.nn.embedding_lookup(embedding,
                                             answer_token_ids,
                                             max_norm=None)

        # Trim lengths of the object arrays to `max_num_objects`.
        (num_objects, object_bboxes, object_labels, object_scores,
         object_features, max_num_objects) = _trim_to_max_num_objects(
             inputs[InputFields.num_objects],
             inputs[InputFields.object_bboxes],
             inputs[InputFields.object_labels],
             inputs[InputFields.object_scores],
             inputs[InputFields.object_features],
             max_num_objects=options.max_num_objects)

        question_tags = _assign_invalid_tags(inputs[InputFields.question_tag],
                                             max_num_objects)
        answer_tags = _assign_invalid_tags(
            inputs[InputFields.answer_choices_tag], max_num_objects)

        # Merge class label embeddings to the Fast-RCNN features.
        object_embeddings = _get_class_embedding_vectors(
            object_labels, options.label_file, options.label_vocab_size,
            options.label_embedding_dims)
        object_features = _project_object_features(
            object_features,
            object_embeddings,
            output_dims=options.visual_feature_dims,
            dropout_keep_prob=options.dropout_keep_prob,
            is_training=is_training)

        # Reshape answer-related tensors
        # to the shape of [batch_size * NUM_CHOICES, max_seq_len, ...].
        (question_embs, question_tags, question_len, answer_embs, answer_tags,
         answer_len) = _reshape_answer_related_tensors(question_embs,
                                                       question_tags,
                                                       question_len,
                                                       answer_embs,
                                                       answer_tags, answer_len)

        # Ground both the question and the answer choices.
        question_object_features = _ground_tag_using_object_features(
            object_features, question_tags)
        answer_object_features = _ground_tag_using_object_features(
            object_features, answer_tags)
        question_rnn_inputs = tf.concat(
            [question_embs, question_object_features], -1)
        answer_rnn_inputs = tf.concat([answer_embs, answer_object_features],
                                      -1)

        # Build the recognition to cognition model.
        final_features, answer_seq_features = self._recognition_to_cognition(
            question_rnn_inputs, question_len, answer_rnn_inputs, answer_len,
            object_features, num_objects, predictions)

        # Compute the joint representation.
        with tf.variable_scope('classification'):
            with tf.variable_scope('hidden'):
                output = tf.contrib.layers.fully_connected(
                    final_features, num_outputs=1024, activation_fn=tf.nn.relu)
            output = tf.contrib.layers.dropout(
                output,
                keep_prob=options.dropout_keep_prob,
                is_training=is_training)
            with tf.variable_scope('output'):
                output = tf.contrib.layers.fully_connected(output,
                                                           num_outputs=1,
                                                           activation_fn=None)

        output = tf.reshape(output, [batch_size, NUM_CHOICES])

        predictions.update({
            FIELD_ANSWER_PREDICTION: output,
            'image_id': inputs[InputFields.img_id]
        })
        return predictions
コード例 #15
0
ファイル: b2t2_frozen.py プロジェクト: yekeren/VCR
    def predict(self, inputs, **kwargs):
        """Predicts the resulting tensors.

    Args:
      inputs: A dictionary of input tensors keyed by names.

    Returns:
      predictions: A dictionary of prediction tensors keyed by name.
    """
        is_training = self._is_training
        options = self._model_proto

        fc_scope_fn = hyperparams.build_hyperparams(options.fc_hyperparams,
                                                    is_training)

        (answer_choices, answer_choices_tag, answer_choices_len,
         answer_label) = (inputs[InputFields.answer_choices_with_question],
                          inputs[InputFields.answer_choices_with_question_tag],
                          inputs[InputFields.answer_choices_with_question_len],
                          inputs[InputFields.answer_label])
        batch_size = answer_choices.shape[0]

        # Trim lengths of the object arrays to `max_num_objects`.
        (num_objects, object_bboxes, object_labels, object_scores,
         object_features, max_num_objects) = _trim_to_max_num_objects(
             inputs[InputFields.num_objects],
             inputs[InputFields.object_bboxes],
             inputs[InputFields.object_labels],
             inputs[InputFields.object_scores],
             inputs[InputFields.object_features],
             max_num_objects=options.max_num_objects)

        answer_choices_tag = _assign_invalid_tags(answer_choices_tag,
                                                  max_num_objects)

        # Merge class label embeddings to the Fast-RCNN features.
        object_features = _project_object_features(
            object_features,
            output_dims=options.visual_feature_dims,
            dropout_keep_prob=options.dropout_keep_prob,
            is_training=is_training)
        object_feature_dims = object_features.shape[-1]

        # Convert tokens into token ids.
        token_to_id_layer = token_to_id.TokenToIdLayer(
            options.bert_vocab_file, options.bert_unk_token_id)
        answer_choices_token_ids = token_to_id_layer(answer_choices)
        answer_choices_token_ids = tf.reshape(answer_choices_token_ids,
                                              [batch_size * NUM_CHOICES, -1])
        answer_choices_mask = tf.sequence_mask(
            answer_choices_len, maxlen=tf.shape(answer_choices)[-1])
        answer_choices_mask = tf.reshape(answer_choices_mask,
                                         [batch_size * NUM_CHOICES, -1])

        # Create tag features sequence.
        answer_choices_tag = tf.reshape(answer_choices_tag,
                                        [batch_size * NUM_CHOICES, -1])
        answer_choices_tag_features = _ground_tag_using_object_features(
            object_features, answer_choices_tag)

        # Convert class labels into token ids, tile object features.
        object_mask = tf.sequence_mask(num_objects,
                                       maxlen=tf.shape(object_labels)[-1])
        object_mask = tf.gather(tf.expand_dims(object_mask, 1),
                                [0] * NUM_CHOICES,
                                axis=1)
        object_mask = tf.reshape(object_mask, [batch_size * NUM_CHOICES, -1])
        object_label_token_ids = token_to_id_layer(object_labels)

        object_label_token_ids = tf.gather(tf.expand_dims(
            object_label_token_ids, 1), [0] * NUM_CHOICES,
                                           axis=1)
        object_label_token_ids = tf.reshape(object_label_token_ids,
                                            [batch_size * NUM_CHOICES, -1])
        object_features = tf.gather(tf.expand_dims(object_features, 1),
                                    [0] * NUM_CHOICES,
                                    axis=1)
        object_features = tf.reshape(
            object_features,
            [batch_size * NUM_CHOICES, -1, object_feature_dims])

        # Create Bert model.
        input_ids = tf.concat(
            [answer_choices_token_ids, object_label_token_ids], -1)
        input_tag_features = tf.concat(
            [answer_choices_tag_features, object_features], 1)
        input_mask = tf.concat([answer_choices_mask, object_mask], -1)

        final_features = self._bert_model(input_ids, input_tag_features,
                                          input_mask)

        # Classification layer.
        with slim.arg_scope(fc_scope_fn()):
            output = tf.contrib.layers.fully_connected(final_features,
                                                       num_outputs=1,
                                                       activation_fn=None)
        output = tf.reshape(output, [batch_size, NUM_CHOICES])
        return {FIELD_ANSWER_PREDICTION: output}
コード例 #16
0
ファイル: vcr_bilstm_concat.py プロジェクト: yekeren/VCR
  def predict(self, inputs, **kwargs):
    """Predicts the resulting tensors.

    Args:
      inputs: A dictionary of input tensors keyed by names.

    Returns:
      predictions: A dictionary of prediction tensors keyed by name.
    """
    is_training = self._is_training
    options = self._model_proto

    (question, question_len, answer_choices, answer_choices_len,
     answer_label) = (inputs[InputFields.question],
                      inputs[InputFields.question_len],
                      inputs[InputFields.answer_choices],
                      inputs[InputFields.answer_choices_len],
                      inputs[InputFields.answer_label])

    # Create model layers.
    token_to_id_layer = token_to_id.TokenToIdLayer(options.vocab_file,
                                                   options.unk_token_id)
    embeddings_initializer = 'uniform'
    if options.glove_file:
      embeddings_initializer = tf.keras.initializers.Constant(
          create_embedding_matrix(options.glove_file, options.vocab_file,
                                  options.embedding_dims))
    embedding_layer = tf.keras.layers.Embedding(
        options.vocab_size,
        options.embedding_dims,
        embeddings_initializer=embeddings_initializer)
    question_lstm_layer = tf.keras.layers.Bidirectional(
        tf.keras.layers.LSTM(options.lstm_units,
                             dropout=options.lstm_dropout,
                             recurrent_dropout=options.lstm_recurrent_dropout,
                             return_state=True),
        name='question_bidirectional')
    answer_choice_lstm_layer = tf.keras.layers.Bidirectional(
        tf.keras.layers.LSTM(options.lstm_units,
                             dropout=options.lstm_dropout,
                             recurrent_dropout=options.lstm_recurrent_dropout),
        name='answer_bidirectional')

    # Convert tokens into embeddings.
    batch_size = answer_choices.shape[0]
    (question_token_ids,
     answer_choices_token_ids) = (token_to_id_layer(question),
                                  token_to_id_layer(answer_choices))
    (question_embs,
     answer_choices_embs) = (embedding_layer(question_token_ids),
                             embedding_layer(answer_choices_token_ids))

    # Question LSTM encoder.
    question_mask = tf.sequence_mask(question_len,
                                     maxlen=tf.shape(question)[-1])
    question_outputs = question_lstm_layer(question_embs,
                                           mask=question_mask,
                                           training=is_training)
    question_feature, question_states = (question_outputs[0],
                                         question_outputs[1:])
    question_states_tiled = []
    for question_state in question_states:
      question_state = tf.gather(tf.expand_dims(question_state, axis=1),
                                 indices=[0] * NUM_CHOICES,
                                 axis=1)
      question_states_tiled.append(
          tf.reshape(question_state, [-1, question_state.shape[-1]]))

    # Answer LSTM encoder.
    answer_choices_mask = tf.sequence_mask(answer_choices_len,
                                           maxlen=tf.shape(answer_choices)[-1])
    answer_choices_mask_reshaped = tf.reshape(answer_choices_mask,
                                              [batch_size * NUM_CHOICES, -1])
    answer_choices_embs_reshaped = tf.reshape(
        answer_choices_embs,
        [batch_size * NUM_CHOICES, -1, options.embedding_dims])

    answer_choices_feature_reshaped = answer_choice_lstm_layer(
        answer_choices_embs_reshaped,
        mask=answer_choices_mask_reshaped,
        training=is_training,
        initial_state=question_states_tiled
        if options.text_feature == model_pb2.QUESTION_AND_ANSWER else None)

    answer_choices_feature = tf.reshape(answer_choices_feature_reshaped,
                                        [batch_size, NUM_CHOICES, -1])

    output = tf.keras.layers.Dense(1, activation=None)(answer_choices_feature)
    output = tf.squeeze(output, axis=-1)

    return {FIELD_ANSWER_PREDICTION: output}
コード例 #17
0
def main(_):
    logging.set_verbosity(logging.INFO)

    for i in range(_NUM_PARTITIONS):
        tf.io.gfile.makedirs(
            os.path.join(FLAGS.output_bert_feature_dir, '%02d' % i))

    # Create Bert model.
    bert_tokenizer = tokenization.FullTokenizer(
        vocab_file=FLAGS.bert_vocab_file, do_lower_case=FLAGS.do_lower_case)

    # Bert prediction.
    input_placeholder = tf.placeholder(shape=[None], dtype=tf.string)
    token_to_id_layer = token_to_id.TokenToIdLayer(FLAGS.bert_vocab_file,
                                                   unk_token_id=UNK)

    bert_config = BertConfig.from_json_file(FLAGS.bert_config_file)
    bert_model = BertModel(bert_config,
                           is_training=False,
                           input_ids=token_to_id_layer(
                               tf.expand_dims(input_placeholder, 0)))
    sequence_output = bert_model.get_sequence_output()[0]
    pooled_output = bert_model.get_pooled_output()[0]
    saver = tf.compat.v1.train.Saver()

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.compat.v1.Session(config=config)
    sess.run(tf.compat.v1.tables_initializer())
    saver.restore(sess, FLAGS.bert_checkpoint_file)

    for name in sess.run(tf.compat.v1.report_uninitialized_variables()):
        logging.warn('%s is uninitialized!', name)

    def _bert_fn(sequence):
        return sess.run([sequence_output, pooled_output],
                        feed_dict={input_placeholder: sequence})

    # Load annotations.
    annots = _load_annotations(FLAGS.annotations_jsonl_file)
    logging.info('Loaded %i annotations.', len(annots))

    shard_id, num_shards = FLAGS.shard_id, FLAGS.num_shards
    assert 0 <= shard_id < num_shards

    for idx, annot in enumerate(annots):
        if (idx + 1) % 1000 == 0:
            logging.info('On example %i/%i.', idx + 1, len(annots))

        annot_id = int(annot['annot_id'].split('-')[-1])
        if annot_id % num_shards != shard_id:
            continue

        # Check npy file.
        part_id = get_partition_id(annot['annot_id'])
        output_file = os.path.join(FLAGS.output_bert_feature_dir,
                                   '%02d' % part_id,
                                   annot['annot_id'] + '.npy')
        if os.path.isfile(output_file):
            logging.info('%s is there.', output_file)
            continue

        annot_id = int(annot['annot_id'].split('-')[-1])
        if annot_id % num_shards != shard_id:
            continue

        # Create TF example.
        bert_outputs = _create_bert_embeddings(annot, bert_tokenizer,
                                               FLAGS.do_lower_case, _bert_fn)
        with open(output_file, 'wb') as f:
            np.save(f, bert_outputs)

    logging.info('Done')
コード例 #18
0
def _update_decoded_example(decoded_example, options):
  """Updates the decoded example, add size to the varlen feature.

  Args:
    decoded_example: A tensor dictionary keyed by name.
    options: An instance of reader_pb2.Reader.

  Returns:
    decoded_example: The same instance with content modified.
  """
  token_to_id_func = token_to_id.TokenToIdLayer(
      options.vocab_file, options.out_of_vocabulary_token_id)

  # Number of objects.
  detection_boxes = decoded_example[InputFields.detection_boxes]
  detection_classes = decoded_example[InputFields.detection_classes]
  num_detections = tf.shape(detection_boxes)[0]

  # Question length.
  question = decoded_example[InputFields.question]
  question_tag = decoded_example[InputFields.question_tag]
  question_len = tf.shape(question)[0]

  # Answer and rationale choices.
  answer_choices_list = [
      decoded_example.pop(TFExampleFields.answer_choice + '_%i' % i)
      for i in range(1, 1 + NUM_CHOICES)
  ]
  answer_choices_tag_list = [
      decoded_example.pop(TFExampleFields.answer_choice_tag + '_%i' % i)
      for i in range(1, 1 + NUM_CHOICES)
  ]
  (answer_choices, answer_choices_len) = _pad_sequences(answer_choices_list)
  (answer_choices_tag, _) = _pad_sequences(answer_choices_tag_list, -1)

  rationale_choices_list = [
      decoded_example.pop(TFExampleFields.rationale_choice + '_%i' % i)
      for i in range(1, 1 + NUM_CHOICES)
  ]
  rationale_choices_tag_list = [
      decoded_example.pop(TFExampleFields.rationale_choice_tag + '_%i' % i)
      for i in range(1, 1 + NUM_CHOICES)
  ]
  (rationale_choices,
   rationale_choices_len) = _pad_sequences(rationale_choices_list)
  (rationale_choices_tag, _) = _pad_sequences(rationale_choices_tag_list, -1)

  # Mixed question -> answer, question-answer -> rationale.
  answer_len = answer_choices_len[decoded_example[InputFields.answer_label]]
  answer = answer_choices[decoded_example[
      InputFields.answer_label]][:answer_len]
  answer_tag = answer_choices_tag[decoded_example[
      InputFields.answer_label]][:answer_len]

  mixed_answer_choices_list = [
      tf.concat([question, ['[SEP]'], x], 0) for x in answer_choices_list
  ]
  mixed_answer_choices_tag_list = [
      tf.concat([question_tag, [-1], x], 0) for x in answer_choices_tag_list
  ]
  (mixed_answer_choices,
   mixed_answer_choices_len) = _pad_sequences(mixed_answer_choices_list)
  (mixed_answer_choices_tag, _) = _pad_sequences(mixed_answer_choices_tag_list,
                                                 pad=-1)

  mixed_rationale_choices_list = [
      tf.concat([question, ['[SEP]'], answer, ['[SEP]'], x], 0)
      for x in rationale_choices_list
  ]
  mixed_rationale_choices_tag_list = [
      tf.concat([question_tag, [-1], answer_tag, [-1], x], 0)
      for x in rationale_choices_tag_list
  ]
  (mixed_rationale_choices,
   mixed_rationale_choices_len) = _pad_sequences(mixed_rationale_choices_list)
  (mixed_rationale_choices_tag,
   _) = _pad_sequences(mixed_rationale_choices_tag_list, pad=-1)

  # Image shape.
  image = decoded_example[InputFields.img_data]
  image_shape = tf.shape(image)

  # min_size = tf.reduce_min(image_shape[:2])
  # scale = 1.0 * options.desired_size / tf.cast(min_size, dtype=tf.float32)

  # def resize_fn():
  #   new_height = scale * tf.cast(image_shape[0], dtype=tf.float32)
  #   new_width = scale * tf.cast(image_shape[1], dtype=tf.float32)
  #   new_height = tf.cast(new_height, dtype=tf.int32)
  #   new_width = tf.cast(new_width, dtype=tf.int32)
  #   resized_image = tf.image.resize(image,
  #                                   size=tf.stack([new_height, new_width], 0))
  #   return tf.cast(resized_image, dtype=tf.uint8)

  # image = tf.cond(scale >= 1.0, true_fn=lambda: image, false_fn=resize_fn)
  # image_shape = tf.shape(image)

  decoded_example.update({
      InputFields.img_data:
          image,
      InputFields.img_height:
          image_shape[0],
      InputFields.img_width:
          image_shape[1],
      InputFields.num_detections:
          num_detections,
      InputFields.detection_classes:
          token_to_id_func(detection_classes),
      InputFields.question:
          tf.tile(tf.expand_dims(token_to_id_func(question), 0),
                  [NUM_CHOICES, 1]),
      InputFields.question_tag:
          tf.tile(tf.expand_dims(question_tag, 0), [NUM_CHOICES, 1]),
      InputFields.question_len:
          tf.tile(tf.expand_dims(question_len, 0), [NUM_CHOICES]),
      InputFields.answer_len:
          tf.tile(tf.expand_dims(answer_len, 0), [NUM_CHOICES]),
      InputFields.answer_choices:
          token_to_id_func(answer_choices),
      InputFields.answer_choices_tag:
          answer_choices_tag,
      InputFields.answer_choices_len:
          answer_choices_len,
      InputFields.rationale_choices:
          token_to_id_func(rationale_choices),
      InputFields.rationale_choices_tag:
          rationale_choices_tag,
      InputFields.rationale_choices_len:
          rationale_choices_len,
      InputFields.mixed_answer_choices:
          token_to_id_func(mixed_answer_choices),
      InputFields.mixed_answer_choices_tag:
          mixed_answer_choices_tag,
      InputFields.mixed_answer_choices_len:
          mixed_answer_choices_len,
      InputFields.mixed_rationale_choices:
          token_to_id_func(mixed_rationale_choices),
      InputFields.mixed_rationale_choices_tag:
          mixed_rationale_choices_tag,
      InputFields.mixed_rationale_choices_len:
          mixed_rationale_choices_len,
  })

  return decoded_example
コード例 #19
0
ファイル: vcr_bilstm_bert.py プロジェクト: yekeren/VCR
  def predict(self, inputs, **kwargs):
    """Predicts the resulting tensors.

    Args:
      inputs: A dictionary of input tensors keyed by names.

    Returns:
      predictions: A dictionary of prediction tensors keyed by name.
    """
    is_training = self._is_training
    options = self._model_proto

    (answer_choices, answer_choices_len,
     answer_label) = (inputs[InputFields.answer_choices_with_question],
                      inputs[InputFields.answer_choices_with_question_len],
                      inputs[InputFields.answer_label])

    # Create model layers.
    token_to_id_layer = token_to_id.TokenToIdLayer(options.bert_vocab_file,
                                                   options.bert_unk_token_id)

    bert_config = BertConfig.from_json_file(options.bert_config_file)
    self.transformer_encoder = get_transformer_encoder(bert_config, None)

    checkpoint = tf.train.Checkpoint(model=self.transformer_encoder)
    self.transformer_encoder_load_status = checkpoint.restore(
        options.bert_checkpoint_file)

    answer_choice_lstm_layer = tf.keras.layers.Bidirectional(
        tf.keras.layers.LSTM(options.lstm_units,
                             dropout=options.lstm_dropout,
                             recurrent_dropout=options.lstm_recurrent_dropout),
        name='answer_bidirectional')

    # Convert tokens into embeddings.
    batch_size = answer_choices.shape[0]
    answer_choices_token_ids = token_to_id_layer(answer_choices)

    answer_choices_token_ids_reshaped = tf.reshape(
        answer_choices_token_ids, [batch_size * NUM_CHOICES, -1])

    # Answer BiLSTM encoder.
    answer_choices_mask = tf.sequence_mask(answer_choices_len,
                                           maxlen=tf.shape(answer_choices)[-1])
    answer_choices_mask_reshaped = tf.reshape(answer_choices_mask,
                                              [batch_size * NUM_CHOICES, -1])

    answer_choices_embs_reshaped, _ = self.transformer_encoder(
        [
            answer_choices_token_ids_reshaped, answer_choices_mask_reshaped,
            tf.zeros_like(answer_choices_token_ids_reshaped, dtype=tf.int32)
        ],
        training=is_training)

    answer_choices_feature_reshaped = answer_choice_lstm_layer(
        answer_choices_embs_reshaped,
        mask=answer_choices_mask_reshaped,
        training=is_training)

    answer_choices_feature = tf.reshape(answer_choices_feature_reshaped,
                                        [batch_size, NUM_CHOICES, -1])

    output = tf.keras.layers.Dense(1, activation=None)(answer_choices_feature)
    output = tf.squeeze(output, axis=-1)

    return {FIELD_ANSWER_PREDICTION: output}
コード例 #20
0
ファイル: vibilstm_glove.py プロジェクト: yekeren/VCR
    def predict(self, inputs, **kwargs):
        """Predicts the resulting tensors.

    Args:
      inputs: A dictionary of input tensors keyed by names.

    Returns:
      predictions: A dictionary of prediction tensors keyed by name.
    """
        is_training = self._is_training
        options = self._model_proto

        (num_objects, object_bboxes, object_labels, object_scores,
         object_features) = (inputs[InputFields.num_objects],
                             inputs[InputFields.object_bboxes],
                             inputs[InputFields.object_labels],
                             inputs[InputFields.object_scores],
                             inputs[InputFields.object_features])
        (answer_choices, answer_choices_len,
         answer_label) = (inputs[InputFields.answer_choices_with_question],
                          inputs[InputFields.answer_choices_with_question_len],
                          inputs[InputFields.answer_label])
        batch_size = answer_choices.shape[0]

        # Image feature.
        object_masks = tf.sequence_mask(num_objects,
                                        tf.shape(object_bboxes)[1],
                                        dtype=tf.float32)
        # object_features = tf.compat.v1.layers.dense(object_features,
        #                                             units=512,
        #                                             activation=tf.nn.tanh)
        image_feature = masked_ops.masked_avg_nd(object_features,
                                                 object_masks,
                                                 dim=1)

        # Convert tokens to ids.
        token_to_id_layer = token_to_id.TokenToIdLayer(options.vocab_file,
                                                       options.unk_token_id)
        answer_choices_token_ids = token_to_id_layer(answer_choices)
        answer_choices_token_ids_reshaped = tf.reshape(
            answer_choices_token_ids, [batch_size * NUM_CHOICES, -1])

        # Convert word ids to embedding vectors.
        glove_embedding_array = create_embedding_matrix(
            options.glove_file, options.vocab_file)
        embedding = tf.get_variable('word/embedding',
                                    initializer=glove_embedding_array,
                                    trainable=True)
        answer_choices_embs_reshaped = tf.nn.embedding_lookup(
            embedding, answer_choices_token_ids_reshaped, max_norm=None)

        # Encode the sequence using BiLSTM model.
        with tf.variable_scope('answer_choice_encoder'):
            _, answer_choices_feature_reshaped = rnn.RNN(
                answer_choices_embs_reshaped,
                tf.reshape(answer_choices_len, [batch_size * NUM_CHOICES]),
                options.rnn_config,
                is_training=is_training)
        answer_choices_feature = tf.reshape(answer_choices_feature_reshaped,
                                            [batch_size, NUM_CHOICES, -1])
        inputs = tf.concat([
            answer_choices_feature,
            tf.tile(image_feature, [1, NUM_CHOICES, 1])
        ], -1)
        output = tf.compat.v1.layers.dense(inputs,
                                           units=512,
                                           activation=tf.nn.relu6)
        output = tf.compat.v1.layers.dense(inputs, units=1, activation=None)
        output = tf.squeeze(output, axis=-1)

        return {FIELD_ANSWER_PREDICTION: output}
コード例 #21
0
ファイル: compatible_qa.py プロジェクト: yekeren/VCR
  def predict(self, inputs, **kwargs):
    """Predicts the resulting tensors.

    Args:
      inputs: A dictionary of input tensors keyed by names.

    Returns:
      predictions: A dictionary of prediction tensors keyed by name.
    """
    options = self._model_proto
    is_training = self._is_training

    token_to_id_layer = token_to_id.TokenToIdLayer(options.vocab_file,
                                                   options.unk_token_id)
    fc_scope_fn = hyperparams.build_hyperparams(options.fc_hyperparams,
                                                is_training)

    # Extract input fields.
    (question, question_len, answer_choices,
     answer_choices_len) = (inputs[InputFields.question],
                            inputs[InputFields.question_len],
                            inputs[InputFields.answer_choices],
                            inputs[InputFields.answer_choices_len])
    batch_size = answer_choices.shape[0]

    # Convert question tokens into token ids.
    question_token_ids = token_to_id_layer(question)

    # Convert answer choice tokens into token ids.
    answer_choices_token_ids = token_to_id_layer(answer_choices)
    answer_choices_token_ids = tf.reshape(answer_choices_token_ids,
                                          [batch_size * NUM_CHOICES, -1])
    answer_choices_len = tf.reshape(answer_choices_len,
                                    [batch_size * NUM_CHOICES])

    # Convert word ids to embedding vectors.
    glove_embedding_array = create_embedding_matrix(options.glove_file,
                                                    options.vocab_file)
    embedding = tf.get_variable('word/embedding',
                                initializer=glove_embedding_array,
                                trainable=True)
    question_embs = tf.nn.embedding_lookup(embedding,
                                           question_token_ids,
                                           max_norm=None)
    answer_choices_embs = tf.nn.embedding_lookup(embedding,
                                                 answer_choices_token_ids,
                                                 max_norm=None)

    # Tile the question embeddings.
    question_embs = tf.gather(tf.expand_dims(question_embs, 1),
                              [0] * NUM_CHOICES,
                              axis=1)
    question_embs = tf.reshape(
        question_embs, [batch_size * NUM_CHOICES, -1, question_embs.shape[-1]])
    question_len = tf.gather(tf.expand_dims(question_len, 1), [0] * NUM_CHOICES,
                             axis=1)
    question_len = tf.reshape(question_len, [batch_size * NUM_CHOICES])

    # Encode the sequence using BiLSTM model.
    with tf.variable_scope('question_encoder'):
      _, question_features = rnn.RNN(question_embs,
                                     question_len,
                                     options.rnn_config,
                                     is_training=is_training)
    with tf.variable_scope('answer_choice_encoder'):
      _, answer_features = rnn.RNN(answer_choices_embs,
                                   answer_choices_len,
                                   options.rnn_config,
                                   is_training=is_training)
    final_features = tf.concat(
        [answer_features, answer_features * question_features], axis=-1)

    # MLP.
    with slim.arg_scope(fc_scope_fn()):
      with tf.variable_scope('classification'):
        with tf.variable_scope('hidden'):
          output = tf.contrib.layers.fully_connected(final_features,
                                                     num_outputs=1024,
                                                     activation_fn=tf.nn.relu)
          output = tf.contrib.layers.dropout(
              output,
              keep_prob=options.dropout_keep_prob,
              is_training=is_training)
        with tf.variable_scope('output'):
          output = tf.contrib.layers.fully_connected(output,
                                                     num_outputs=1,
                                                     activation_fn=None)
          output = tf.reshape(output, [batch_size, NUM_CHOICES])

    return {
        FIELD_ANSWER_PREDICTION: output,
    }
コード例 #22
0
  def _predict(self, inputs, **kwargs):
    """Predicts the resulting tensors.

    Args:
      inputs: A dictionary of input tensors keyed by names.

    Returns:
      predictions: A dictionary of prediction tensors keyed by name.
    """
    is_training = self._is_training
    options = self._model_proto
    token_to_id_layer = token_to_id.TokenToIdLayer(
        options.embedding_vocab_file, options.embedding_unk_token_id)

    predictions = {}

    # Extract text annotations.
    question_len = inputs[InputFields.question_len]
    question_token_ids = token_to_id_layer(inputs[InputFields.question])

    answer_len = inputs[InputFields.answer_choices_len]
    answer_token_ids = token_to_id_layer(inputs[InputFields.answer_choices])

    batch_size = question_token_ids.shape[0]

    # Load GloVe data.
    embeddings_index = _load_embeddings(options.embedding_glove_file)
    embedding_dims = embeddings_index['the'].shape[-1]

    # Convert word to embedding vectors.
    embedding = tf.get_variable('word/embedding',
                                initializer=_create_embedding_matrix(
                                    embeddings_index,
                                    options.embedding_vocab_file),
                                trainable=True)
    embed_fn = lambda x: tf.nn.embedding_lookup(embedding, x, max_norm=_MAX_NORM)

    question_embs = embed_fn(question_token_ids)
    answer_embs = embed_fn(answer_token_ids)

    # Trim lengths of the object arrays to `max_num_objects`.
    (num_objects, object_bboxes, object_labels, object_scores, object_features,
     max_num_objects) = _trim_to_max_num_objects(
         inputs[InputFields.num_objects],
         inputs[InputFields.object_bboxes],
         inputs[InputFields.object_labels],
         inputs[InputFields.object_scores],
         inputs[InputFields.object_features],
         max_num_objects=options.max_num_objects)

    question_tags = _assign_invalid_tags(inputs[InputFields.question_tag],
                                         max_num_objects)
    answer_tags = _assign_invalid_tags(inputs[InputFields.answer_choices_tag],
                                       max_num_objects)

    # Merge class label embeddings to the Fast-RCNN features.

    object_features = _project_object_features(
        object_features,
        object_embeddings=_get_class_embedding_vectors(object_labels,
                                                       options.label_file,
                                                       embeddings_index),
        output_dims=options.visual_feature_dims,
        dropout_keep_prob=options.dropout_keep_prob,
        is_training=is_training)

    # Reshape answer-related tensors
    # to the shape of [batch_size * NUM_CHOICES, max_seq_len, ...].
    (question_embs, question_tags, question_len, answer_embs, answer_tags,
     answer_len) = _reshape_answer_related_tensors(question_embs, question_tags,
                                                   question_len, answer_embs,
                                                   answer_tags, answer_len)

    # Adversarial masking.
    with tf.variable_scope('adversarial_masking'):
      # adv_embedding = tf.get_variable('word/adv_embedding',
      #                                 initializer=_create_embedding_matrix(
      #                                     embeddings_index,
      #                                     options.embedding_vocab_file),
      #                                 trainable=True)
      # adv_embed_fn = lambda x: tf.nn.embedding_lookup(adv_embedding, x, max_norm=_MAX_NORM)

      # adv_question_embs = adv_embed_fn(question_token_ids)
      # adv_answer_embs = adv_embed_fn(answer_token_ids)

      # (adv_question_embs,
      #  adv_answer_embs) = _reshape_answer_tensors(adv_question_embs,
      #                                             adv_answer_embs)

      answer_shortcut_mask, temperature = self._adversarial_masking(
          tf.stop_gradient(question_embs), tf.stop_gradient(question_len),
          tf.stop_gradient(answer_embs), tf.stop_gradient(answer_len),
          is_training, predictions)

    # Ground both the question and the answer choices.
    question_object_features = _ground_tag_using_object_features(
        object_features, question_tags)
    answer_object_features = _ground_tag_using_object_features(
        object_features, answer_tags)

    # 0. Original prediction.
    outputs = self._predict_answer(question_embs, question_object_features,
                                   question_len, answer_embs,
                                   answer_object_features, answer_len,
                                   object_features, num_objects)

    with tf.variable_scope(tf.get_variable_scope(), reuse=True):

      # 1. Optimize the R2C with adversarial attack.
      outputs_adv_r2c = self._predict_answer(
          question_embs, question_object_features, question_len,
          tf.multiply(answer_embs, tf.stop_gradient(answer_shortcut_mask)),
          answer_object_features, answer_len, object_features, num_objects)

      # 2. Optimize the mask.
      outputs_adv_mask = self._predict_answer(
          tf.stop_gradient(question_embs),
          tf.stop_gradient(question_object_features),
          tf.stop_gradient(question_len),
          tf.multiply(tf.stop_gradient(answer_embs), answer_shortcut_mask),
          tf.stop_gradient(answer_object_features),
          tf.stop_gradient(answer_len), tf.stop_gradient(object_features),
          tf.stop_gradient(num_objects))

    predictions.update({
        'temperature':
            1.0 * temperature,
        'image_id':
            inputs[InputFields.img_id],
        'question':
            inputs[InputFields.question],
        'answer_choices':
            inputs[InputFields.answer_choices],
        'shortcut_mask':
            tf.reshape(answer_shortcut_mask, [batch_size, NUM_CHOICES, -1]),
        FIELD_ANSWER_PREDICTION_ORI:
            tf.reshape(outputs, [batch_size, NUM_CHOICES]),
        FIELD_ANSWER_PREDICTION_ADV_R2C:
            tf.reshape(outputs_adv_r2c, [batch_size, NUM_CHOICES]),
        FIELD_ANSWER_PREDICTION_ADV_MASK:
            tf.reshape(outputs_adv_mask, [batch_size, NUM_CHOICES]),
    })
    return predictions
コード例 #23
0
def _update_decoded_example(decoded_example, options):
    """Updates the decoded example, add size to the varlen feature.

  Args:
    decoded_example: A tensor dictionary keyed by name.
    options: An instance of reader_pb2.Reader.

  Returns:
    decoded_example: The same instance with content modified.
  """
    token_to_id_func = token_to_id.TokenToIdLayer(
        options.vocab_file, options.out_of_vocabulary_token_id)

    # Number of objects.
    detection_boxes = decoded_example[InputFields.detection_boxes]
    detection_classes = decoded_example[InputFields.detection_classes]
    num_detections = tf.shape(detection_boxes)[0]

    # Object Fast-RCNN features.
    detection_features = decoded_example.pop(
        TFExampleFields.detection_features)
    detection_features = tf.reshape(detection_features,
                                    [-1, options.frcnn_feature_dims])

    # Question length.
    question = decoded_example[InputFields.question]
    question_tag = decoded_example[InputFields.question_tag]
    question_len = tf.shape(question)[0]

    # Answer and rationale choices.
    answer_choices_list = [
        decoded_example.pop(TFExampleFields.answer_choice + '_%i' % i)
        for i in range(1, 1 + NUM_CHOICES)
    ]
    answer_choices_tag_list = [
        decoded_example.pop(TFExampleFields.answer_choice_tag + '_%i' % i)
        for i in range(1, 1 + NUM_CHOICES)
    ]
    (answer_choices, answer_choices_len) = _pad_sequences(answer_choices_list)
    (answer_choices_tag, _) = _pad_sequences(answer_choices_tag_list, -1)

    rationale_choices_list = [
        decoded_example.pop(TFExampleFields.rationale_choice + '_%i' % i)
        for i in range(1, 1 + NUM_CHOICES)
    ]
    rationale_choices_tag_list = [
        decoded_example.pop(TFExampleFields.rationale_choice_tag + '_%i' % i)
        for i in range(1, 1 + NUM_CHOICES)
    ]
    (rationale_choices,
     rationale_choices_len) = _pad_sequences(rationale_choices_list)
    (rationale_choices_tag, _) = _pad_sequences(rationale_choices_tag_list, -1)

    # Mixed question -> answer, question-answer -> rationale.
    answer_len = answer_choices_len[decoded_example[InputFields.answer_label]]
    answer = answer_choices[decoded_example[
        InputFields.answer_label]][:answer_len]
    answer_tag = answer_choices_tag[decoded_example[
        InputFields.answer_label]][:answer_len]

    mixed_answer_choices_list = [
        tf.concat([question, ['[SEP]'], x], 0) for x in answer_choices_list
    ]
    mixed_answer_choices_tag_list = [
        tf.concat([question_tag, [-1], x], 0) for x in answer_choices_tag_list
    ]
    (mixed_answer_choices,
     mixed_answer_choices_len) = _pad_sequences(mixed_answer_choices_list)
    (mixed_answer_choices_tag,
     _) = _pad_sequences(mixed_answer_choices_tag_list, pad=-1)

    mixed_rationale_choices_list = [
        tf.concat([question, ['[SEP]'], answer, ['[SEP]'], x], 0)
        for x in rationale_choices_list
    ]
    mixed_rationale_choices_tag_list = [
        tf.concat([question_tag, [-1], answer_tag, [-1], x], 0)
        for x in rationale_choices_tag_list
    ]
    (mixed_rationale_choices, mixed_rationale_choices_len
     ) = _pad_sequences(mixed_rationale_choices_list)
    (mixed_rationale_choices_tag,
     _) = _pad_sequences(mixed_rationale_choices_tag_list, pad=-1)

    decoded_example.update({
        InputFields.num_detections:
        num_detections,
        InputFields.detection_classes:
        token_to_id_func(detection_classes),
        InputFields.detection_features:
        detection_features,
        InputFields.question:
        tf.tile(tf.expand_dims(token_to_id_func(question), 0),
                [NUM_CHOICES, 1]),
        InputFields.question_tag:
        tf.tile(tf.expand_dims(question_tag, 0), [NUM_CHOICES, 1]),
        InputFields.question_len:
        tf.tile(tf.expand_dims(question_len, 0), [NUM_CHOICES]),
        InputFields.answer_choices:
        token_to_id_func(answer_choices),
        InputFields.answer_choices_tag:
        answer_choices_tag,
        InputFields.answer_choices_len:
        answer_choices_len,
        InputFields.rationale_choices:
        token_to_id_func(rationale_choices),
        InputFields.rationale_choices_tag:
        rationale_choices_tag,
        InputFields.rationale_choices_len:
        rationale_choices_len,
        InputFields.mixed_answer_choices:
        token_to_id_func(mixed_answer_choices),
        InputFields.mixed_answer_choices_tag:
        mixed_answer_choices_tag,
        InputFields.mixed_answer_choices_len:
        mixed_answer_choices_len,
        InputFields.mixed_rationale_choices:
        token_to_id_func(mixed_rationale_choices),
        InputFields.mixed_rationale_choices_tag:
        mixed_rationale_choices_tag,
        InputFields.mixed_rationale_choices_len:
        mixed_rationale_choices_len,
    })

    return decoded_example
コード例 #24
0
    def predict(self, inputs, **kwargs):
        """Predicts the resulting tensors.

    Args:
      inputs: A dictionary of input tensors keyed by names.

    Returns:
      predictions: A dictionary of prediction tensors keyed by name.
    """
        is_training = self._is_training
        options = self._model_proto

        token_to_id_layer = token_to_id.TokenToIdLayer(
            options.bert_vocab_file, options.bert_unk_token_id)
        bert_config = BertConfig.from_json_file(options.bert_config_file)
        slim_fc_scope = hyperparams.build_hyperparams(options.fc_hyperparams,
                                                      is_training)()

        # Predict object embedding vectors.
        (num_objects, object_bboxes, object_labels, object_scores,
         object_features, max_num_objects) = _trim_to_max_num_objects(
             inputs[InputFields.num_detections],
             inputs[InputFields.detection_boxes],
             inputs[InputFields.detection_classes],
             inputs[InputFields.detection_scores],
             inputs[InputFields.detection_features],
             max_num_objects=options.max_num_objects)

        object_features = _predict_object_embeddings(
            object_features,
            bert_config.hidden_size,
            slim_fc_scope,
            keep_prob=options.dropout_keep_prob,
            is_training=is_training)

        # Gather text inputs.
        (answer_choices, answer_choices_tag,
         answer_choices_len) = (inputs[self._field_answer_choices],
                                inputs[self._field_answer_choices_tag],
                                inputs[self._field_answer_choices_len])
        batch_size = answer_choices.shape[0]

        answer_choices_tag = _assign_invalid_tags(answer_choices_tag,
                                                  max_num_objects)

        # Convert tokens into token ids.
        answer_choices_token_ids = token_to_id_layer(answer_choices)
        answer_choices_token_ids = tf.reshape(answer_choices_token_ids,
                                              [batch_size * NUM_CHOICES, -1])
        answer_choices_mask = tf.sequence_mask(
            answer_choices_len, maxlen=tf.shape(answer_choices)[-1])
        answer_choices_mask = tf.reshape(answer_choices_mask,
                                         [batch_size * NUM_CHOICES, -1])

        # Create tag features sequence.
        answer_choices_tag = tf.reshape(answer_choices_tag,
                                        [batch_size * NUM_CHOICES, -1])
        answer_choices_tag_embeddings = _ground_tag_using_object_features(
            object_features, answer_choices_tag)

        (tiled_object_masks, tiled_object_ids,
         tiled_object_features) = _tile_objects(
             num_objects, token_to_id_layer(object_labels), object_features)

        # Create Bert model.
        input_ids = tf.concat([answer_choices_token_ids, tiled_object_ids], -1)
        input_tag_embeddings = tf.concat(
            [answer_choices_tag_embeddings, tiled_object_features], 1)
        input_mask = tf.concat([answer_choices_mask, tiled_object_masks], -1)

        output = self._bert_model(
            input_ids,
            input_tag_embeddings,
            input_mask,
            bert_config,
            bert_checkpoint_file=options.bert_checkpoint_file,
            is_training=is_training)

        # Classification layer.
        with slim.arg_scope(slim_fc_scope):
            output = slim.fully_connected(output,
                                          num_outputs=1,
                                          activation_fn=None,
                                          scope='logits')
            output = tf.reshape(output, [batch_size, NUM_CHOICES])

        return {FIELD_ANSWER_PREDICTION: output}
コード例 #25
0
ファイル: vbert.py プロジェクト: yekeren/VCR
    def predict(self, inputs, **kwargs):
        """Predicts the resulting tensors.

    Args:
      inputs: A dictionary of input tensors keyed by names.

    Returns:
      predictions: A dictionary of prediction tensors keyed by name.
    """
        is_training = self._is_training
        options = self._model_proto

        (image, height, width, num_objects, object_bboxes, object_labels,
         object_scores, answer_choices, answer_choices_len,
         answer_label) = (inputs[InputFields.img_data],
                          inputs[InputFields.img_height],
                          inputs[InputFields.img_width],
                          inputs[InputFields.num_objects],
                          inputs[InputFields.object_bboxes],
                          inputs[InputFields.object_labels],
                          inputs[InputFields.object_scores],
                          inputs[InputFields.answer_choices_with_question],
                          inputs[InputFields.answer_choices_with_question_len],
                          inputs[InputFields.answer_label])

        # Visualize image and object bboxes.
        batch_size = image.shape[0]

        image_batch_shape = tf.shape(image)
        object_bboxes = _to_batch_coordinates(object_bboxes, height, width,
                                              image_batch_shape[1],
                                              image_batch_shape[2])
        image_with_boxes = visualization.draw_bounding_boxes_on_image_tensors(
            image, num_objects, object_bboxes, object_labels, object_scores)
        tf.summary.image('vcr/detection', image_with_boxes, max_outputs=10)

        # Extract FRCNN feature.
        frcnn_features = fast_rcnn.FastRCNN(tf.cast(image, tf.float32),
                                            object_bboxes,
                                            options=options.fast_rcnn_config,
                                            is_training=is_training)
        object_masks = tf.sequence_mask(num_objects,
                                        tf.shape(object_bboxes)[1],
                                        dtype=tf.float32)
        image_feature = masked_ops.masked_avg_nd(frcnn_features,
                                                 object_masks,
                                                 dim=1)

        # Convert tokens into token ids.
        token_to_id_layer = token_to_id.TokenToIdLayer(
            options.bert_vocab_file, options.bert_unk_token_id)
        answer_choices_token_ids = token_to_id_layer(answer_choices)
        answer_choices_token_ids_reshaped = tf.reshape(
            answer_choices_token_ids, [batch_size * NUM_CHOICES, -1])

        answer_choices_mask = tf.sequence_mask(
            answer_choices_len, maxlen=tf.shape(answer_choices)[-1])
        answer_choices_mask_reshaped = tf.reshape(
            answer_choices_mask, [batch_size * NUM_CHOICES, -1])

        # Bert prediction.
        bert_config = BertConfig.from_json_file(options.bert_config_file)
        bert_model = BertModel(bert_config,
                               is_training,
                               input_ids=answer_choices_token_ids_reshaped,
                               input_mask=answer_choices_mask_reshaped)

        answer_choices_cls_feature_reshaped = bert_model.get_pooled_output()
        answer_choices_cls_feature = tf.reshape(
            answer_choices_cls_feature_reshaped, [batch_size, NUM_CHOICES, -1])

        assignment_map, _ = get_assignment_map_from_checkpoint(
            tf.global_variables(), options.bert_checkpoint_file)

        # Fuse image feature.
        image_feature_tiled = tf.tile(image_feature, [1, NUM_CHOICES, 1])
        answer_choices_cls_feature = tf.concat(
            [answer_choices_cls_feature, image_feature_tiled], -1)

        # Classification layer.
        output = tf.compat.v1.layers.dense(answer_choices_cls_feature,
                                           units=1,
                                           activation=None)
        output = tf.squeeze(output, axis=-1)

        return {FIELD_ANSWER_PREDICTION: output}