def test_inference_masked_lm(self):
        model = TFLongformerForMaskedLM.from_pretrained(
            "lysandre/tiny-longformer-random")
        input_ids = tf.constant([[0, 1, 2, 3, 4, 5]])
        output = model(input_ids)[0]

        expected_shape = [1, 6, 10]
        self.assertEqual(output.shape, expected_shape)

        print(output[:, :3, :3])

        expected_slice = tf.constant([[
            [-0.04926379, 0.0367098, 0.02099686],
            [0.03940692, 0.01547744, -0.01448723],
            [0.03495252, -0.05900355, -0.01675752],
        ]])
        tf.debugging.assert_near(output[:, :3, :3], expected_slice, atol=1e-4)
Beispiel #2
0
    def test_inference_masked_lm_long(self):
        model = TFLongformerForMaskedLM.from_pretrained("allenai/longformer-base-4096")

        # 'Hello world! ' repeated 1000 times
        input_ids = tf.convert_to_tensor([[0] + [20920, 232, 328, 1437] * 1000 + [2]], dtype=tf.dtypes.int32)

        output = model(input_ids, labels=input_ids)
        loss = output.loss
        prediction_scores = output.logits

        expected_loss = tf.constant(0.0073798)
        expected_prediction_scores_sum = tf.constant(-610476600.0)
        expected_prediction_scores_mean = tf.constant(-3.03477)

        # assert close
        tf.debugging.assert_near(tf.reduce_mean(loss), expected_loss, rtol=1e-4)
        tf.debugging.assert_near(tf.reduce_sum(prediction_scores), expected_prediction_scores_sum, rtol=1e-4)
        tf.debugging.assert_near(tf.reduce_mean(prediction_scores), expected_prediction_scores_mean, rtol=1e-4)