def create_and_check_maskformer_model(self,
                                          config,
                                          pixel_values,
                                          pixel_mask,
                                          output_hidden_states=False):
        with torch.no_grad():
            model = MaskFormerModel(config=config)
            model.to(torch_device)
            model.eval()

            output = model(pixel_values=pixel_values, pixel_mask=pixel_mask)
            output = model(pixel_values, output_hidden_states=True)
        # the correct shape of output.transformer_decoder_hidden_states ensure the correcteness of the
        # encoder and pixel decoder
        self.parent.assertEqual(
            output.transformer_decoder_last_hidden_state.shape,
            (self.batch_size, self.num_queries, self.mask_feature_size),
        )
        # let's ensure the other two hidden state exists
        self.parent.assertTrue(
            output.pixel_decoder_last_hidden_state is not None)
        self.parent.assertTrue(output.encoder_last_hidden_state is not None)

        if output_hidden_states:
            self.check_output_hidden_state(output, config)
    def test_inference_no_head(self):
        model = MaskFormerModel.from_pretrained(
            self.model_checkpoints).to(torch_device)
        feature_extractor = self.default_feature_extractor
        image = prepare_img()
        inputs = feature_extractor(image, return_tensors="pt").to(torch_device)
        inputs_shape = inputs["pixel_values"].shape
        # check size is divisible by 32
        self.assertTrue((inputs_shape[-1] % 32) == 0
                        and (inputs_shape[-2] % 32) == 0)
        # check size
        self.assertEqual(inputs_shape, (1, 3, 800, 1088))

        with torch.no_grad():
            outputs = model(**inputs)

        expected_slice_hidden_state = torch.tensor([[-0.0482, 0.9228, 0.4951],
                                                    [-0.2547, 0.8017, 0.8527],
                                                    [-0.0069, 0.3385, -0.0089]
                                                    ]).to(torch_device)
        self.assertTrue(
            torch.allclose(outputs.encoder_last_hidden_state[0, 0, :3, :3],
                           expected_slice_hidden_state,
                           atol=TOLERANCE))

        expected_slice_hidden_state = torch.tensor(
            [[-0.8422, -0.8434, -0.9718], [-1.0144, -0.5565, -0.4195],
             [-1.0038, -0.4484, -0.1961]]).to(torch_device)
        self.assertTrue(
            torch.allclose(outputs.pixel_decoder_last_hidden_state[0,
                                                                   0, :3, :3],
                           expected_slice_hidden_state,
                           atol=TOLERANCE))

        expected_slice_hidden_state = torch.tensor([[0.2852, -0.0159, 0.9735],
                                                    [0.6254, 0.1858, 0.8529],
                                                    [-0.0680, -0.4116,
                                                     1.8413]]).to(torch_device)
        self.assertTrue(
            torch.allclose(
                outputs.transformer_decoder_last_hidden_state[0, :3, :3],
                expected_slice_hidden_state,
                atol=TOLERANCE))
 def test_model_from_pretrained(self):
     for model_name in ["facebook/maskformer-swin-small-coco"]:
         model = MaskFormerModel.from_pretrained(model_name)
         self.assertIsNotNone(model)