def test_with_segmentation_maps_and_loss(self):
        model = MaskFormerForInstanceSegmentation.from_pretrained(
            self.model_checkpoints).to(torch_device).eval()
        feature_extractor = self.default_feature_extractor

        inputs = feature_extractor(
            [np.zeros((3, 800, 1333)),
             np.zeros((3, 800, 1333))],
            segmentation_maps=[
                np.zeros((384, 384)).astype(np.float32),
                np.zeros((384, 384)).astype(np.float32)
            ],
            return_tensors="pt",
        )

        inputs["pixel_values"] = inputs["pixel_values"].to(torch_device)
        inputs["mask_labels"] = [
            el.to(torch_device) for el in inputs["mask_labels"]
        ]
        inputs["class_labels"] = [
            el.to(torch_device) for el in inputs["class_labels"]
        ]

        with torch.no_grad():
            outputs = model(**inputs)

        self.assertTrue(outputs.loss is not None)
Esempio n. 2
0
    def test_inference_instance_segmentation_head(self):
        model = MaskFormerForInstanceSegmentation.from_pretrained(self.model_checkpoints).to(torch_device).eval()
        feature_extractor = self.default_feature_extractor
        image = prepare_img()
        inputs = feature_extractor(image, return_tensors="pt").to(torch_device)
        inputs_shape = inputs["pixel_values"].shape
        # check size is divisible by 32
        self.assertTrue((inputs_shape[-1] % 32) == 0 and (inputs_shape[-2] % 32) == 0)
        # check size
        self.assertEqual(inputs_shape, (1, 3, 800, 1088))

        with torch.no_grad():
            outputs = model(**inputs)
        # masks_queries_logits
        masks_queries_logits = outputs.masks_queries_logits
        self.assertEqual(
            masks_queries_logits.shape, (1, model.config.num_queries, inputs_shape[-2] // 4, inputs_shape[-1] // 4)
        )
        expected_slice = torch.tensor(
            [[-1.3738, -1.7725, -1.9365], [-1.5978, -1.9869, -2.1524], [-1.5796, -1.9271, -2.0940]]
        ).to(torch_device)
        self.assertTrue(torch.allclose(masks_queries_logits[0, 0, :3, :3], expected_slice, atol=TOLERANCE))
        # class_queries_logits
        class_queries_logits = outputs.class_queries_logits
        self.assertEqual(class_queries_logits.shape, (1, model.config.num_queries, model.config.num_labels + 1))
        expected_slice = torch.tensor(
            [
                [1.6512e00, -5.2572e00, -3.3519e00],
                [3.6169e-02, -5.9025e00, -2.9313e00],
                [1.0766e-04, -7.7630e00, -5.1263e00],
            ]
        ).to(torch_device)
        self.assertTrue(torch.allclose(outputs.class_queries_logits[0, :3, :3], expected_slice, atol=TOLERANCE))
Esempio n. 3
0
    def test_with_annotations_and_loss(self):
        model = MaskFormerForInstanceSegmentation.from_pretrained(
            self.model_checkpoints).to(torch_device).eval()
        feature_extractor = self.default_feature_extractor

        inputs = feature_extractor(
            [np.zeros((3, 800, 1333)),
             np.zeros((3, 800, 1333))],
            annotations=[
                {
                    "masks": np.random.rand(10, 384, 384).astype(np.float32),
                    "labels": np.zeros(10).astype(np.int64)
                },
                {
                    "masks": np.random.rand(10, 384, 384).astype(np.float32),
                    "labels": np.zeros(10).astype(np.int64)
                },
            ],
            return_tensors="pt",
        )

        with torch.no_grad():
            outputs = model(**inputs)

        self.assertTrue(outputs.loss is not None)
Esempio n. 4
0
    def test_maskformer(self):
        threshold = 0.999
        model_id = "facebook/maskformer-swin-base-ade"

        from transformers import MaskFormerFeatureExtractor, MaskFormerForInstanceSegmentation

        model = MaskFormerForInstanceSegmentation.from_pretrained(model_id)
        feature_extractor = MaskFormerFeatureExtractor.from_pretrained(model_id)

        image_segmenter = pipeline("image-segmentation", model=model, feature_extractor=feature_extractor)

        image = load_dataset("hf-internal-testing/fixtures_ade20k", split="test")
        outputs = image_segmenter(image[0]["file"], threshold=threshold)

        for o in outputs:
            o["mask"] = hashimage(o["mask"])

        self.assertEqual(
            nested_simplify(outputs, decimals=4),
            [
                {"mask": "20d1b9480d1dc1501dbdcfdff483e370", "label": "wall", "score": None},
                {"mask": "0f902fbc66a0ff711ea455b0e4943adf", "label": "house", "score": None},
                {"mask": "4537bdc07d47d84b3f8634b7ada37bd4", "label": "grass", "score": None},
                {"mask": "b7ac77dfae44a904b479a0926a2acaf7", "label": "tree", "score": None},
                {"mask": "e9bedd56bd40650fb263ce03eb621079", "label": "plant", "score": None},
                {"mask": "37a609f8c9c1b8db91fbff269f428b20", "label": "road, route", "score": None},
                {"mask": "0d8cdfd63bae8bf6e4344d460a2fa711", "label": "sky", "score": None},
            ],
        )
Esempio n. 5
0
    def test_model_with_labels(self):
        size = (self.model_tester.min_size,) * 2
        inputs = {
            "pixel_values": torch.randn((2, 3, *size), device=torch_device),
            "mask_labels": torch.randn((2, 10, *size), device=torch_device),
            "class_labels": torch.zeros(2, 10, device=torch_device).long(),
        }

        model = MaskFormerForInstanceSegmentation(MaskFormerConfig()).to(torch_device)
        outputs = model(**inputs)
        self.assertTrue(outputs.loss is not None)
    def create_and_check_maskformer_instance_segmentation_head_model(
            self, config, pixel_values, pixel_mask, mask_labels, class_labels):
        model = MaskFormerForInstanceSegmentation(config=config)
        model.to(torch_device)
        model.eval()

        def comm_check_on_output(result):
            # let's still check that all the required stuff is there
            self.parent.assertTrue(
                result.transformer_decoder_last_hidden_state is not None)
            self.parent.assertTrue(
                result.pixel_decoder_last_hidden_state is not None)
            self.parent.assertTrue(
                result.encoder_last_hidden_state is not None)
            # okay, now we need to check the logits shape
            # due to the encoder compression, masks have a //4 spatial size
            self.parent.assertEqual(
                result.masks_queries_logits.shape,
                (self.batch_size, self.num_queries, self.min_size // 4,
                 self.max_size // 4),
            )
            # + 1 for null class
            self.parent.assertEqual(
                result.class_queries_logits.shape,
                (self.batch_size, self.num_queries, self.num_labels + 1))

        with torch.no_grad():
            result = model(pixel_values=pixel_values, pixel_mask=pixel_mask)
            result = model(pixel_values)

            comm_check_on_output(result)

            result = model(pixel_values=pixel_values,
                           pixel_mask=pixel_mask,
                           mask_labels=mask_labels,
                           class_labels=class_labels)

        comm_check_on_output(result)

        self.parent.assertTrue(result.loss is not None)
        self.parent.assertEqual(result.loss.shape, torch.Size([1]))