def create_and_check_model(self, config, input_ids, bbox, image,
                               token_type_ids, input_mask, sequence_labels,
                               token_labels):
        model = LayoutLMv2Model(config=config)
        model.to(torch_device)
        model.eval()

        result = model(input_ids,
                       bbox=bbox,
                       image=image,
                       attention_mask=input_mask,
                       token_type_ids=token_type_ids)
        result = model(input_ids,
                       bbox=bbox,
                       image=image,
                       token_type_ids=token_type_ids)
        result = model(input_ids, bbox=bbox, image=image)

        # LayoutLMv2 has a different expected sequence length, namely also visual tokens are added
        expected_seq_len = self.seq_length + self.image_feature_pool_shape[
            0] * self.image_feature_pool_shape[1]
        self.parent.assertEqual(
            result.last_hidden_state.shape,
            (self.batch_size, expected_seq_len, self.hidden_size))
        self.parent.assertEqual(result.pooler_output.shape,
                                (self.batch_size, self.hidden_size))
    def test_inference_no_head(self):
        model = LayoutLMv2Model.from_pretrained(
            "microsoft/layoutlmv2-base-uncased").to(torch_device)

        (
            input_ids,
            bbox,
            image,
            attention_mask,
            token_type_ids,
        ) = prepare_layoutlmv2_batch_inputs()

        # forward pass
        outputs = model(
            input_ids=input_ids.to(torch_device),
            bbox=bbox.to(torch_device),
            image=image.to(torch_device),
            attention_mask=attention_mask.to(torch_device),
            token_type_ids=token_type_ids.to(torch_device),
        )

        # verify the sequence output
        expected_shape = torch.Size((
            2,
            input_ids.shape[1] + model.config.image_feature_pool_shape[0] *
            model.config.image_feature_pool_shape[1],
            model.config.hidden_size,
        ))
        self.assertEqual(outputs.last_hidden_state.shape, expected_shape)

        expected_slice = torch.tensor(
            [[-0.1087, 0.0727, -0.3075], [0.0799, -0.0427, -0.0751],
             [-0.0367, 0.0480, -0.1358]],
            device=torch_device)
        self.assertTrue(
            torch.allclose(outputs.last_hidden_state[0, :3, :3],
                           expected_slice,
                           atol=1e-3))

        # verify the pooled output
        expected_shape = torch.Size((2, model.config.hidden_size))
        self.assertEqual(outputs.pooler_output.shape, expected_shape)
 def test_model_from_pretrained(self):
     for model_name in LAYOUTLMV2_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
         model = LayoutLMv2Model.from_pretrained(model_name)
         self.assertIsNotNone(model)