Beispiel #1
0
    def create_and_check_model_with_global_attention_mask(
            self, config, input_ids, token_type_ids, input_mask,
            sequence_labels, token_labels, choice_labels):
        config.return_dict = True
        model = TFLongformerModel(config=config)
        half_input_mask_length = shape_list(input_mask)[-1] // 2
        global_attention_mask = tf.concat(
            [
                tf.zeros_like(input_mask)[:, :half_input_mask_length],
                tf.ones_like(input_mask)[:, half_input_mask_length:],
            ],
            axis=-1,
        )

        result = model(
            input_ids,
            attention_mask=input_mask,
            global_attention_mask=global_attention_mask,
            token_type_ids=token_type_ids,
        )
        result = model(input_ids,
                       token_type_ids=token_type_ids,
                       global_attention_mask=global_attention_mask)
        result = model(input_ids, global_attention_mask=global_attention_mask)

        self.parent.assertListEqual(
            shape_list(result.last_hidden_state),
            [self.batch_size, self.seq_length, self.hidden_size])
        self.parent.assertListEqual(shape_list(result.pooler_output),
                                    [self.batch_size, self.hidden_size])
Beispiel #2
0
    def test_inference_no_head_long(self):
        model = TFLongformerModel.from_pretrained(
            "allenai/longformer-base-4096")

        # 'Hello world! ' repeated 1000 times
        input_ids = tf.convert_to_tensor(
            [[0] + [20920, 232, 328, 1437] * 1000 + [2]],
            dtype=tf.dtypes.int32)

        attention_mask = tf.ones(shape_list(input_ids), dtype=tf.dtypes.int32)
        global_attention_mask = tf.zeros(shape_list(input_ids),
                                         dtype=tf.dtypes.int32)
        # Set global attention on a few random positions
        global_attention_mask = tf.tensor_scatter_nd_update(
            global_attention_mask, tf.constant([[0, 1], [0, 4], [0, 21]]),
            tf.constant([1, 1, 1]))

        output = model(input_ids,
                       attention_mask=attention_mask,
                       global_attention_mask=global_attention_mask)[0]

        expected_output_sum = tf.constant(74585.875)
        expected_output_mean = tf.constant(0.024267)

        # assert close
        tf.debugging.assert_near(tf.reduce_sum(output),
                                 expected_output_sum,
                                 rtol=1e-4)
        tf.debugging.assert_near(tf.reduce_mean(output),
                                 expected_output_mean,
                                 rtol=1e-4)
Beispiel #3
0
    def test_layer_local_attn(self):
        model = TFLongformerModel.from_pretrained(
            "patrickvonplaten/longformer-random-tiny")
        layer = model.longformer.encoder.layer[0].attention.self_attention
        hidden_states = self._get_hidden_states()
        batch_size, seq_length, hidden_size = hidden_states.shape

        attention_mask = tf.zeros((batch_size, seq_length),
                                  dtype=tf.dtypes.float32)
        is_index_global_attn = tf.math.greater(attention_mask, 1)
        is_global_attn = tf.math.reduce_any(is_index_global_attn)

        attention_mask = tf.where(
            tf.range(4)[None, :, None, None] > 1, -10000.0,
            attention_mask[:, :, None, None])
        is_index_masked = tf.math.less(attention_mask[:, :, 0, 0], 0)

        layer_head_mask = None

        output_hidden_states = layer([
            hidden_states, attention_mask, layer_head_mask, is_index_masked,
            is_index_global_attn, is_global_attn
        ])[0]

        expected_slice = tf.convert_to_tensor([
            0.00188, 0.012196, -0.017051, -0.025571, -0.02996, 0.017297,
            -0.011521, 0.004848
        ],
                                              dtype=tf.dtypes.float32)

        self.assertTrue(output_hidden_states.shape, (1, 4, 8))
        tf.debugging.assert_near(output_hidden_states[0, 1],
                                 expected_slice,
                                 rtol=1e-3)
def create_model():
    # Longformer encoder
    encoder = TFLongformerModel.from_pretrained('weights.h5')

    # QA Model - Reproducing HuggingFace like QA model architecture
    input_ids = layers.Input(shape=(MAX_LEN,), dtype=tf.int32)
    #token_type_ids = layers.Input(shape=(MAX_LEN,), dtype=tf.int32)
    attention_mask = layers.Input(shape=(MAX_LEN,), dtype=tf.int32)
    embedding = encoder(
        input_ids, attention_mask=attention_mask
    )[0]

    start_logits = layers.Dense(1, name="start_logit", use_bias=False)(embedding)
    start_logits = layers.Flatten()(start_logits)

    end_logits = layers.Dense(1, name="end_logit", use_bias=False)(embedding)
    end_logits = layers.Flatten()(end_logits)

    start_probs = layers.Activation(keras.activations.softmax)(start_logits)
    end_probs = layers.Activation(keras.activations.softmax)(end_logits)

    model = keras.Model(
        inputs=[input_ids, attention_mask],
        outputs=[start_probs, end_probs],
    )
    loss = keras.losses.SparseCategoricalCrossentropy(from_logits=False)
    optimizer = keras.optimizers.Adam(lr=LR)
    model.compile(optimizer=optimizer, loss=[loss, loss])
    return model
Beispiel #5
0
    def __init__(
            self,
            pretrained_model_name_or_path='allenai/longformer-base-4096',
            reduce_output='cls_pooled',
            trainable=True,
            num_tokens=None,
            **kwargs
    ):
        super(LongformerEncoder, self).__init__()
        try:
            from transformers import TFLongformerModel
        except ModuleNotFoundError:
            logger.error(
                ' transformers is not installed. '
                'In order to install all text feature dependencies run '
                'pip install ludwig[text]'
            )
            sys.exit(-1)

        self.transformer = TFLongformerModel.from_pretrained(
            pretrained_model_name_or_path
        )
        self.reduce_output = reduce_output
        if not self.reduce_output == 'cls_pooled':
            self.reduce_sequence = SequenceReducer(reduce_mode=reduce_output)
        self.transformer.trainable = trainable
        self.transformer.resize_token_embeddings(num_tokens)
    def test_layer_global_attn(self):
        model = TFLongformerModel.from_pretrained("patrickvonplaten/longformer-random-tiny", use_cdn=False)
        layer = model.longformer.encoder.layer[0].attention.self_attention
        hidden_states = self._get_hidden_states()

        hidden_states = tf.concat([self._get_hidden_states(), self._get_hidden_states() - 0.5], axis=0)
        batch_size, seq_length, hidden_size = hidden_states.shape

        # create attn mask
        attention_mask_1 = tf.zeros((1, 1, 1, seq_length), dtype=tf.dtypes.float32)
        attention_mask_2 = tf.zeros((1, 1, 1, seq_length), dtype=tf.dtypes.float32)

        attention_mask_1 = tf.where(tf.range(4)[None, None, None, :] > 1, 10000.0, attention_mask_1)
        attention_mask_1 = tf.where(tf.range(4)[None, None, None, :] > 2, -10000.0, attention_mask_1)
        attention_mask_2 = tf.where(tf.range(4)[None, None, None, :] > 0, 10000.0, attention_mask_2)
        attention_mask = tf.concat([attention_mask_1, attention_mask_2], axis=0)

        output_hidden_states = layer([hidden_states, attention_mask, None])[0]

        self.assertTrue(output_hidden_states.shape, (2, 4, 8))
        expected_slice_0 = tf.convert_to_tensor(
            [-0.06508, -0.039306, 0.030934, -0.03417, -0.00656, -0.01553, -0.02088, -0.04938], dtype=tf.dtypes.float32
        )

        expected_slice_1 = tf.convert_to_tensor(
            [-0.04055, -0.038399, 0.0396, -0.03735, -0.03415, 0.01357, 0.00145, -0.05709], dtype=tf.dtypes.float32
        )

        tf.debugging.assert_near(output_hidden_states[0, 2], expected_slice_0, rtol=1e-3)
        tf.debugging.assert_near(output_hidden_states[1, -2], expected_slice_1, rtol=1e-3)
Beispiel #7
0
 def _test_TFLongformer(self, size, large=False):
     from transformers import LongformerTokenizer, TFLongformerModel
     tokenizer = LongformerTokenizer.from_pretrained(size)
     model = TFLongformerModel.from_pretrained(size)
     input_dict = tokenizer("Hello, my dog is cute", return_tensors="tf")
     spec, input_dict = self.spec_and_pad(input_dict, max_length=512)
     outputs = ["last_hidden_state"]
     self.run_test(model, input_dict, input_signature=spec, outputs=outputs, large=large)
Beispiel #8
0
    def create_and_check_attention_mask_determinism(
        self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
    ):
        model = TFLongformerModel(config=config)

        attention_mask = tf.ones(input_ids.shape, dtype=tf.dtypes.int32)
        output_with_mask = model(input_ids, attention_mask=attention_mask)[0]
        output_without_mask = model(input_ids)[0]
        tf.debugging.assert_near(output_with_mask[0, 0, :5], output_without_mask[0, 0, :5], rtol=1e-4)
Beispiel #9
0
    def create_and_check_model(
        self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
    ):
        config.return_dict = True
        model = TFLongformerModel(config=config)
        result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids)
        result = model(input_ids, token_type_ids=token_type_ids)
        result = model(input_ids)

        self.parent.assertListEqual(
            shape_list(result.last_hidden_state), [self.batch_size, self.seq_length, self.hidden_size]
        )
        self.parent.assertListEqual(shape_list(result.pooler_output), [self.batch_size, self.hidden_size])
Beispiel #10
0
    def test_inference_no_head(self):
        model = TFLongformerModel.from_pretrained("allenai/longformer-base-4096")

        # 'Hello world!'
        input_ids = tf.convert_to_tensor([[0, 20920, 232, 328, 1437, 2]], dtype=tf.dtypes.int32)
        attention_mask = tf.ones(shape_list(input_ids), dtype=tf.dtypes.int32)

        output = model(input_ids, attention_mask=attention_mask)[0]
        output_without_mask = model(input_ids)[0]

        expected_output_slice = tf.convert_to_tensor(
            [0.0549, 0.1087, -0.1119, -0.0368, 0.0250], dtype=tf.dtypes.float32
        )

        tf.debugging.assert_near(output[0, 0, -5:], expected_output_slice, rtol=1e-3)
        tf.debugging.assert_near(output_without_mask[0, 0, -5:], expected_output_slice, rtol=1e-3)
    def create_and_check_longformer_model(
        self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
    ):
        model = TFLongformerModel(config=config)
        sequence_output, pooled_output = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids)
        sequence_output, pooled_output = model(input_ids, token_type_ids=token_type_ids)
        sequence_output, pooled_output = model(input_ids)

        result = {
            "sequence_output": sequence_output,
            "pooled_output": pooled_output,
        }
        self.parent.assertListEqual(
            shape_list(result["sequence_output"]), [self.batch_size, self.seq_length, self.hidden_size]
        )
        self.parent.assertListEqual(shape_list(result["pooled_output"]), [self.batch_size, self.hidden_size])
    def test_layer_local_attn(self):
        model = TFLongformerModel.from_pretrained("patrickvonplaten/longformer-random-tiny", use_cdn=False)
        layer = model.longformer.encoder.layer[0].attention.self_attention
        hidden_states = self._get_hidden_states()
        batch_size, seq_length, hidden_size = hidden_states.shape

        attention_mask = tf.zeros((batch_size, 1, 1, seq_length), dtype=tf.dtypes.float32)
        attention_mask = tf.where(tf.range(4)[None, None, None, :] > 1, -10000.0, attention_mask)

        output_hidden_states = layer([hidden_states, attention_mask, None])[0]

        expected_slice = tf.convert_to_tensor(
            [0.00188, 0.012196, -0.017051, -0.025571, -0.02996, 0.017297, -0.011521, 0.004848], dtype=tf.dtypes.float32
        )

        self.assertTrue(output_hidden_states.shape, (1, 4, 8))
        tf.debugging.assert_near(output_hidden_states[0, 1], expected_slice, rtol=1e-3)
Beispiel #13
0
    def test_layer_attn_probs(self):
        model = TFLongformerModel.from_pretrained(
            "patrickvonplaten/longformer-random-tiny")
        layer = model.longformer.encoder.layer[0].attention.self_attention
        hidden_states = tf.concat(
            [self._get_hidden_states(),
             self._get_hidden_states() - 0.5],
            axis=0)
        batch_size, seq_length, hidden_size = hidden_states.shape

        # create attn mask
        attention_mask_1 = tf.zeros((1, 1, 1, seq_length),
                                    dtype=tf.dtypes.float32)
        attention_mask_2 = tf.zeros((1, 1, 1, seq_length),
                                    dtype=tf.dtypes.float32)

        attention_mask_1 = tf.where(
            tf.range(4)[None, :, None, None] > 1, 10000.0, attention_mask_1)
        attention_mask_1 = tf.where(
            tf.range(4)[None, :, None, None] > 2, -10000.0, attention_mask_1)
        attention_mask_2 = tf.where(
            tf.range(4)[None, :, None, None] > 0, 10000.0, attention_mask_2)
        attention_mask = tf.concat([attention_mask_1, attention_mask_2],
                                   axis=0)

        is_index_masked = tf.math.less(attention_mask[:, :, 0, 0], 0)
        is_index_global_attn = tf.math.greater(attention_mask[:, :, 0, 0], 0)
        is_global_attn = tf.math.reduce_any(is_index_global_attn)

        layer_head_mask = None

        output_hidden_states, local_attentions, global_attentions = layer([
            hidden_states,
            -tf.math.abs(attention_mask),
            layer_head_mask,
            is_index_masked,
            is_index_global_attn,
            is_global_attn,
        ])

        self.assertEqual(local_attentions.shape, (2, 4, 2, 8))
        self.assertEqual(global_attentions.shape, (2, 2, 3, 4))

        self.assertTrue((local_attentions[0, 2:4, :, :] == 0).numpy().tolist())
        self.assertTrue((local_attentions[1, 1:4, :, :] == 0).numpy().tolist())

        #
        # The weight of all tokens with local attention must sum to 1.
        self.assertTrue((tf.math.abs(
            tf.math.reduce_sum(global_attentions[0, :, :2, :], axis=-1) - 1) <
                         1e-6).numpy().tolist())
        self.assertTrue((tf.math.abs(
            tf.math.reduce_sum(global_attentions[1, :, :1, :], axis=-1) - 1) <
                         1e-6).numpy().tolist())

        tf.debugging.assert_near(
            local_attentions[0, 0, 0, :],
            tf.convert_to_tensor([
                0.3328, 0.0000, 0.0000, 0.0000, 0.0000, 0.3355, 0.3318, 0.0000
            ],
                                 dtype=tf.dtypes.float32),
            rtol=1e-3,
        )

        tf.debugging.assert_near(
            local_attentions[1, 0, 0, :],
            tf.convert_to_tensor([
                0.2492, 0.2502, 0.2502, 0.0000, 0.0000, 0.2505, 0.0000, 0.0000
            ],
                                 dtype=tf.dtypes.float32),
            rtol=1e-3,
        )

        # All the global attention weights must sum to 1.
        self.assertTrue(
            (tf.math.abs(tf.math.reduce_sum(global_attentions, axis=-1) - 1) <
             1e-6).numpy().tolist())

        tf.debugging.assert_near(
            global_attentions[0, 0, 1, :],
            tf.convert_to_tensor([0.2500, 0.2500, 0.2500, 0.2500],
                                 dtype=tf.dtypes.float32),
            rtol=1e-3,
        )
        tf.debugging.assert_near(
            global_attentions[1, 0, 0, :],
            tf.convert_to_tensor([0.2497, 0.2500, 0.2499, 0.2504],
                                 dtype=tf.dtypes.float32),
            rtol=1e-3,
        )