class ConvNextFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase): feature_extraction_class = ConvNextFeatureExtractor if is_vision_available( ) else None def setUp(self): self.feature_extract_tester = ConvNextFeatureExtractionTester(self) @property def feat_extract_dict(self): return self.feature_extract_tester.prepare_feat_extract_dict() def test_feat_extract_properties(self): feature_extractor = self.feature_extraction_class( **self.feat_extract_dict) self.assertTrue(hasattr(feature_extractor, "do_resize")) self.assertTrue(hasattr(feature_extractor, "size")) self.assertTrue(hasattr(feature_extractor, "crop_pct")) self.assertTrue(hasattr(feature_extractor, "do_normalize")) self.assertTrue(hasattr(feature_extractor, "image_mean")) self.assertTrue(hasattr(feature_extractor, "image_std")) def test_batch_feature(self): pass def test_call_pil(self): # Initialize feature_extractor feature_extractor = self.feature_extraction_class( **self.feat_extract_dict) # create random PIL images image_inputs = prepare_image_inputs(self.feature_extract_tester, equal_resolution=False) for image in image_inputs: self.assertIsInstance(image, Image.Image) # Test not batched input encoded_images = feature_extractor(image_inputs[0], return_tensors="pt").pixel_values self.assertEqual( encoded_images.shape, ( 1, self.feature_extract_tester.num_channels, self.feature_extract_tester.size, self.feature_extract_tester.size, ), ) # Test batched encoded_images = feature_extractor(image_inputs, return_tensors="pt").pixel_values self.assertEqual( encoded_images.shape, ( self.feature_extract_tester.batch_size, self.feature_extract_tester.num_channels, self.feature_extract_tester.size, self.feature_extract_tester.size, ), ) def test_call_numpy(self): # Initialize feature_extractor feature_extractor = self.feature_extraction_class( **self.feat_extract_dict) # create random numpy tensors image_inputs = prepare_image_inputs(self.feature_extract_tester, equal_resolution=False, numpify=True) for image in image_inputs: self.assertIsInstance(image, np.ndarray) # Test not batched input encoded_images = feature_extractor(image_inputs[0], return_tensors="pt").pixel_values self.assertEqual( encoded_images.shape, ( 1, self.feature_extract_tester.num_channels, self.feature_extract_tester.size, self.feature_extract_tester.size, ), ) # Test batched encoded_images = feature_extractor(image_inputs, return_tensors="pt").pixel_values self.assertEqual( encoded_images.shape, ( self.feature_extract_tester.batch_size, self.feature_extract_tester.num_channels, self.feature_extract_tester.size, self.feature_extract_tester.size, ), ) def test_call_pytorch(self): # Initialize feature_extractor feature_extractor = self.feature_extraction_class( **self.feat_extract_dict) # create random PyTorch tensors image_inputs = prepare_image_inputs(self.feature_extract_tester, equal_resolution=False, torchify=True) for image in image_inputs: self.assertIsInstance(image, torch.Tensor) # Test not batched input encoded_images = feature_extractor(image_inputs[0], return_tensors="pt").pixel_values self.assertEqual( encoded_images.shape, ( 1, self.feature_extract_tester.num_channels, self.feature_extract_tester.size, self.feature_extract_tester.size, ), ) # Test batched encoded_images = feature_extractor(image_inputs, return_tensors="pt").pixel_values self.assertEqual( encoded_images.shape, ( self.feature_extract_tester.batch_size, self.feature_extract_tester.num_channels, self.feature_extract_tester.size, self.feature_extract_tester.size, ), )
class SegformerFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase): feature_extraction_class = SegformerFeatureExtractor if is_vision_available( ) else None def setUp(self): self.feature_extract_tester = SegformerFeatureExtractionTester(self) @property def feat_extract_dict(self): return self.feature_extract_tester.prepare_feat_extract_dict() def test_feat_extract_properties(self): feature_extractor = self.feature_extraction_class( **self.feat_extract_dict) self.assertTrue(hasattr(feature_extractor, "do_resize")) self.assertTrue(hasattr(feature_extractor, "size")) self.assertTrue(hasattr(feature_extractor, "do_normalize")) self.assertTrue(hasattr(feature_extractor, "image_mean")) self.assertTrue(hasattr(feature_extractor, "image_std")) self.assertTrue(hasattr(feature_extractor, "reduce_labels")) def test_batch_feature(self): pass def test_call_pil(self): # Initialize feature_extractor feature_extractor = self.feature_extraction_class( **self.feat_extract_dict) # create random PIL images image_inputs = prepare_image_inputs(self.feature_extract_tester, equal_resolution=False) for image in image_inputs: self.assertIsInstance(image, Image.Image) # Test not batched input encoded_images = feature_extractor(image_inputs[0], return_tensors="pt").pixel_values self.assertEqual( encoded_images.shape, ( 1, self.feature_extract_tester.num_channels, self.feature_extract_tester.size, self.feature_extract_tester.size, ), ) # Test batched encoded_images = feature_extractor(image_inputs, return_tensors="pt").pixel_values self.assertEqual( encoded_images.shape, ( self.feature_extract_tester.batch_size, self.feature_extract_tester.num_channels, self.feature_extract_tester.size, self.feature_extract_tester.size, ), ) def test_call_numpy(self): # Initialize feature_extractor feature_extractor = self.feature_extraction_class( **self.feat_extract_dict) # create random numpy tensors image_inputs = prepare_image_inputs(self.feature_extract_tester, equal_resolution=False, numpify=True) for image in image_inputs: self.assertIsInstance(image, np.ndarray) # Test not batched input encoded_images = feature_extractor(image_inputs[0], return_tensors="pt").pixel_values self.assertEqual( encoded_images.shape, ( 1, self.feature_extract_tester.num_channels, self.feature_extract_tester.size, self.feature_extract_tester.size, ), ) # Test batched encoded_images = feature_extractor(image_inputs, return_tensors="pt").pixel_values self.assertEqual( encoded_images.shape, ( self.feature_extract_tester.batch_size, self.feature_extract_tester.num_channels, self.feature_extract_tester.size, self.feature_extract_tester.size, ), ) def test_call_pytorch(self): # Initialize feature_extractor feature_extractor = self.feature_extraction_class( **self.feat_extract_dict) # create random PyTorch tensors image_inputs = prepare_image_inputs(self.feature_extract_tester, equal_resolution=False, torchify=True) for image in image_inputs: self.assertIsInstance(image, torch.Tensor) # Test not batched input encoded_images = feature_extractor(image_inputs[0], return_tensors="pt").pixel_values self.assertEqual( encoded_images.shape, ( 1, self.feature_extract_tester.num_channels, self.feature_extract_tester.size, self.feature_extract_tester.size, ), ) # Test batched encoded_images = feature_extractor(image_inputs, return_tensors="pt").pixel_values self.assertEqual( encoded_images.shape, ( self.feature_extract_tester.batch_size, self.feature_extract_tester.num_channels, self.feature_extract_tester.size, self.feature_extract_tester.size, ), ) def test_call_segmentation_maps(self): # Initialize feature_extractor feature_extractor = self.feature_extraction_class( **self.feat_extract_dict) # create random PyTorch tensors image_inputs = prepare_image_inputs(self.feature_extract_tester, equal_resolution=False, torchify=True) maps = [] for image in image_inputs: self.assertIsInstance(image, torch.Tensor) maps.append(torch.zeros(image.shape[-2:]).long()) # Test not batched input encoding = feature_extractor(image_inputs[0], maps[0], return_tensors="pt") self.assertEqual( encoding["pixel_values"].shape, ( 1, self.feature_extract_tester.num_channels, self.feature_extract_tester.size, self.feature_extract_tester.size, ), ) self.assertEqual( encoding["labels"].shape, ( 1, self.feature_extract_tester.size, self.feature_extract_tester.size, ), ) self.assertEqual(encoding["labels"].dtype, torch.long) self.assertTrue(encoding["labels"].min().item() >= 0) self.assertTrue(encoding["labels"].max().item() <= 255) # Test batched encoding = feature_extractor(image_inputs, maps, return_tensors="pt") self.assertEqual( encoding["pixel_values"].shape, ( self.feature_extract_tester.batch_size, self.feature_extract_tester.num_channels, self.feature_extract_tester.size, self.feature_extract_tester.size, ), ) self.assertEqual( encoding["labels"].shape, ( self.feature_extract_tester.batch_size, self.feature_extract_tester.size, self.feature_extract_tester.size, ), ) self.assertEqual(encoding["labels"].dtype, torch.long) self.assertTrue(encoding["labels"].min().item() >= 0) self.assertTrue(encoding["labels"].max().item() <= 255) # Test not batched input (PIL images) image, segmentation_map = prepare_semantic_single_inputs() encoding = feature_extractor(image, segmentation_map, return_tensors="pt") self.assertEqual( encoding["pixel_values"].shape, ( 1, self.feature_extract_tester.num_channels, self.feature_extract_tester.size, self.feature_extract_tester.size, ), ) self.assertEqual( encoding["labels"].shape, ( 1, self.feature_extract_tester.size, self.feature_extract_tester.size, ), ) self.assertEqual(encoding["labels"].dtype, torch.long) self.assertTrue(encoding["labels"].min().item() >= 0) self.assertTrue(encoding["labels"].max().item() <= 255) # Test batched input (PIL images) images, segmentation_maps = prepare_semantic_batch_inputs() encoding = feature_extractor(images, segmentation_maps, return_tensors="pt") self.assertEqual( encoding["pixel_values"].shape, ( 2, self.feature_extract_tester.num_channels, self.feature_extract_tester.size, self.feature_extract_tester.size, ), ) self.assertEqual( encoding["labels"].shape, ( 2, self.feature_extract_tester.size, self.feature_extract_tester.size, ), ) self.assertEqual(encoding["labels"].dtype, torch.long) self.assertTrue(encoding["labels"].min().item() >= 0) self.assertTrue(encoding["labels"].max().item() <= 255) def test_reduce_labels(self): # Initialize feature_extractor feature_extractor = self.feature_extraction_class( **self.feat_extract_dict) # ADE20k has 150 classes, and the background is included, so labels should be between 0 and 150 image, map = prepare_semantic_single_inputs() encoding = feature_extractor(image, map, return_tensors="pt") self.assertTrue(encoding["labels"].min().item() >= 0) self.assertTrue(encoding["labels"].max().item() <= 150) feature_extractor.reduce_labels = True encoding = feature_extractor(image, map, return_tensors="pt") self.assertTrue(encoding["labels"].min().item() >= 0) self.assertTrue(encoding["labels"].max().item() <= 255)
def default_feature_extractor(self): return ViTFeatureExtractor.from_pretrained( "google/vit-base-patch16-224") if is_vision_available() else None
class DetrFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase): feature_extraction_class = DetrFeatureExtractor if is_vision_available() else None def setUp(self): self.feature_extract_tester = DetrFeatureExtractionTester(self) @property def feat_extract_dict(self): return self.feature_extract_tester.prepare_feat_extract_dict() def test_feat_extract_properties(self): feature_extractor = self.feature_extraction_class(**self.feat_extract_dict) self.assertTrue(hasattr(feature_extractor, "image_mean")) self.assertTrue(hasattr(feature_extractor, "image_std")) self.assertTrue(hasattr(feature_extractor, "do_normalize")) self.assertTrue(hasattr(feature_extractor, "do_resize")) self.assertTrue(hasattr(feature_extractor, "size")) self.assertTrue(hasattr(feature_extractor, "max_size")) def test_batch_feature(self): pass def test_call_pil(self): # Initialize feature_extractor feature_extractor = self.feature_extraction_class(**self.feat_extract_dict) # create random PIL images image_inputs = prepare_image_inputs(self.feature_extract_tester, equal_resolution=False) for image in image_inputs: self.assertIsInstance(image, Image.Image) # Test not batched input encoded_images = feature_extractor(image_inputs[0], return_tensors="pt").pixel_values expected_height, expected_width = self.feature_extract_tester.get_expected_values(image_inputs) self.assertEqual( encoded_images.shape, (1, self.feature_extract_tester.num_channels, expected_height, expected_width), ) # Test batched expected_height, expected_width = self.feature_extract_tester.get_expected_values(image_inputs, batched=True) encoded_images = feature_extractor(image_inputs, return_tensors="pt").pixel_values self.assertEqual( encoded_images.shape, ( self.feature_extract_tester.batch_size, self.feature_extract_tester.num_channels, expected_height, expected_width, ), ) def test_call_numpy(self): # Initialize feature_extractor feature_extractor = self.feature_extraction_class(**self.feat_extract_dict) # create random numpy tensors image_inputs = prepare_image_inputs(self.feature_extract_tester, equal_resolution=False, numpify=True) for image in image_inputs: self.assertIsInstance(image, np.ndarray) # Test not batched input encoded_images = feature_extractor(image_inputs[0], return_tensors="pt").pixel_values expected_height, expected_width = self.feature_extract_tester.get_expected_values(image_inputs) self.assertEqual( encoded_images.shape, (1, self.feature_extract_tester.num_channels, expected_height, expected_width), ) # Test batched encoded_images = feature_extractor(image_inputs, return_tensors="pt").pixel_values expected_height, expected_width = self.feature_extract_tester.get_expected_values(image_inputs, batched=True) self.assertEqual( encoded_images.shape, ( self.feature_extract_tester.batch_size, self.feature_extract_tester.num_channels, expected_height, expected_width, ), ) def test_call_pytorch(self): # Initialize feature_extractor feature_extractor = self.feature_extraction_class(**self.feat_extract_dict) # create random PyTorch tensors image_inputs = prepare_image_inputs(self.feature_extract_tester, equal_resolution=False, torchify=True) for image in image_inputs: self.assertIsInstance(image, torch.Tensor) # Test not batched input encoded_images = feature_extractor(image_inputs[0], return_tensors="pt").pixel_values expected_height, expected_width = self.feature_extract_tester.get_expected_values(image_inputs) self.assertEqual( encoded_images.shape, (1, self.feature_extract_tester.num_channels, expected_height, expected_width), ) # Test batched encoded_images = feature_extractor(image_inputs, return_tensors="pt").pixel_values expected_height, expected_width = self.feature_extract_tester.get_expected_values(image_inputs, batched=True) self.assertEqual( encoded_images.shape, ( self.feature_extract_tester.batch_size, self.feature_extract_tester.num_channels, expected_height, expected_width, ), ) def test_equivalence_pad_and_create_pixel_mask(self): # Initialize feature_extractors feature_extractor_1 = self.feature_extraction_class(**self.feat_extract_dict) feature_extractor_2 = self.feature_extraction_class(do_resize=False, do_normalize=False) # create random PyTorch tensors image_inputs = prepare_image_inputs(self.feature_extract_tester, equal_resolution=False, torchify=True) for image in image_inputs: self.assertIsInstance(image, torch.Tensor) # Test whether the method "pad_and_return_pixel_mask" and calling the feature extractor return the same tensors encoded_images_with_method = feature_extractor_1.pad_and_create_pixel_mask(image_inputs, return_tensors="pt") encoded_images = feature_extractor_2(image_inputs, return_tensors="pt") assert torch.allclose(encoded_images_with_method["pixel_values"], encoded_images["pixel_values"], atol=1e-4) assert torch.allclose(encoded_images_with_method["pixel_mask"], encoded_images["pixel_mask"], atol=1e-4) @slow def test_call_pytorch_with_coco_detection_annotations(self): # prepare image and target image = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png") with open("./tests/fixtures/tests_samples/COCO/coco_annotations.txt", "r") as f: target = json.loads(f.read()) target = {"image_id": 39769, "annotations": target} # encode them feature_extractor = DetrFeatureExtractor.from_pretrained("facebook/detr-resnet-50") encoding = feature_extractor(images=image, annotations=target, return_tensors="pt") # verify pixel values expected_shape = torch.Size([1, 3, 800, 1066]) self.assertEqual(encoding["pixel_values"].shape, expected_shape) expected_slice = torch.tensor([0.2796, 0.3138, 0.3481]) assert torch.allclose(encoding["pixel_values"][0, 0, 0, :3], expected_slice, atol=1e-4) # verify area expected_area = torch.tensor([5887.9600, 11250.2061, 489353.8438, 837122.7500, 147967.5156, 165732.3438]) assert torch.allclose(encoding["labels"][0]["area"], expected_area) # verify boxes expected_boxes_shape = torch.Size([6, 4]) self.assertEqual(encoding["labels"][0]["boxes"].shape, expected_boxes_shape) expected_boxes_slice = torch.tensor([0.5503, 0.2765, 0.0604, 0.2215]) assert torch.allclose(encoding["labels"][0]["boxes"][0], expected_boxes_slice, atol=1e-3) # verify image_id expected_image_id = torch.tensor([39769]) assert torch.allclose(encoding["labels"][0]["image_id"], expected_image_id) # verify is_crowd expected_is_crowd = torch.tensor([0, 0, 0, 0, 0, 0]) assert torch.allclose(encoding["labels"][0]["iscrowd"], expected_is_crowd) # verify class_labels expected_class_labels = torch.tensor([75, 75, 63, 65, 17, 17]) assert torch.allclose(encoding["labels"][0]["class_labels"], expected_class_labels) # verify orig_size expected_orig_size = torch.tensor([480, 640]) assert torch.allclose(encoding["labels"][0]["orig_size"], expected_orig_size) # verify size expected_size = torch.tensor([800, 1066]) assert torch.allclose(encoding["labels"][0]["size"], expected_size) @slow def test_call_pytorch_with_coco_panoptic_annotations(self): # prepare image, target and masks_path image = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png") with open("./tests/fixtures/tests_samples/COCO/coco_panoptic_annotations.txt", "r") as f: target = json.loads(f.read()) target = {"file_name": "000000039769.png", "image_id": 39769, "segments_info": target} masks_path = pathlib.Path("./tests/fixtures/tests_samples/COCO/coco_panoptic") # encode them # TODO replace by .from_pretrained facebook/detr-resnet-50-panoptic feature_extractor = DetrFeatureExtractor(format="coco_panoptic") encoding = feature_extractor(images=image, annotations=target, masks_path=masks_path, return_tensors="pt") # verify pixel values expected_shape = torch.Size([1, 3, 800, 1066]) self.assertEqual(encoding["pixel_values"].shape, expected_shape) expected_slice = torch.tensor([0.2796, 0.3138, 0.3481]) assert torch.allclose(encoding["pixel_values"][0, 0, 0, :3], expected_slice, atol=1e-4) # verify area expected_area = torch.tensor([147979.6875, 165527.0469, 484638.5938, 11292.9375, 5879.6562, 7634.1147]) assert torch.allclose(encoding["labels"][0]["area"], expected_area) # verify boxes expected_boxes_shape = torch.Size([6, 4]) self.assertEqual(encoding["labels"][0]["boxes"].shape, expected_boxes_shape) expected_boxes_slice = torch.tensor([0.2625, 0.5437, 0.4688, 0.8625]) assert torch.allclose(encoding["labels"][0]["boxes"][0], expected_boxes_slice, atol=1e-3) # verify image_id expected_image_id = torch.tensor([39769]) assert torch.allclose(encoding["labels"][0]["image_id"], expected_image_id) # verify is_crowd expected_is_crowd = torch.tensor([0, 0, 0, 0, 0, 0]) assert torch.allclose(encoding["labels"][0]["iscrowd"], expected_is_crowd) # verify class_labels expected_class_labels = torch.tensor([17, 17, 63, 75, 75, 93]) assert torch.allclose(encoding["labels"][0]["class_labels"], expected_class_labels) # verify masks expected_masks_sum = 822338 self.assertEqual(encoding["labels"][0]["masks"].sum().item(), expected_masks_sum) # verify orig_size expected_orig_size = torch.tensor([480, 640]) assert torch.allclose(encoding["labels"][0]["orig_size"], expected_orig_size) # verify size expected_size = torch.tensor([800, 1066]) assert torch.allclose(encoding["labels"][0]["size"], expected_size)
def default_feature_extractor(self): return (AutoFeatureExtractor.from_pretrained( REGNET_PRETRAINED_MODEL_ARCHIVE_LIST[0]) if is_vision_available() else None)
class SegformerFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase): feature_extraction_class = SegformerFeatureExtractor if is_vision_available() else None def setUp(self): self.feature_extract_tester = SegformerFeatureExtractionTester(self) @property def feat_extract_dict(self): return self.feature_extract_tester.prepare_feat_extract_dict() def test_feat_extract_properties(self): feature_extractor = self.feature_extraction_class(**self.feat_extract_dict) self.assertTrue(hasattr(feature_extractor, "do_resize")) self.assertTrue(hasattr(feature_extractor, "keep_ratio")) self.assertTrue(hasattr(feature_extractor, "image_scale")) self.assertTrue(hasattr(feature_extractor, "align")) self.assertTrue(hasattr(feature_extractor, "size_divisor")) self.assertTrue(hasattr(feature_extractor, "do_random_crop")) self.assertTrue(hasattr(feature_extractor, "crop_size")) self.assertTrue(hasattr(feature_extractor, "do_normalize")) self.assertTrue(hasattr(feature_extractor, "image_mean")) self.assertTrue(hasattr(feature_extractor, "image_std")) self.assertTrue(hasattr(feature_extractor, "do_pad")) def test_batch_feature(self): pass def test_call_pil(self): # Initialize feature_extractor feature_extractor = self.feature_extraction_class(**self.feat_extract_dict) # create random PIL images image_inputs = prepare_image_inputs(self.feature_extract_tester, equal_resolution=False) for image in image_inputs: self.assertIsInstance(image, Image.Image) # Test not batched input encoded_images = feature_extractor(image_inputs[0], return_tensors="pt").pixel_values self.assertEqual( encoded_images.shape, ( 1, self.feature_extract_tester.num_channels, *self.feature_extract_tester.crop_size, ), ) # Test batched encoded_images = feature_extractor(image_inputs, return_tensors="pt").pixel_values self.assertEqual( encoded_images.shape, ( self.feature_extract_tester.batch_size, self.feature_extract_tester.num_channels, *self.feature_extract_tester.crop_size[::-1], ), ) def test_call_numpy(self): # Initialize feature_extractor feature_extractor = self.feature_extraction_class(**self.feat_extract_dict) # create random numpy tensors image_inputs = prepare_image_inputs(self.feature_extract_tester, equal_resolution=False, numpify=True) for image in image_inputs: self.assertIsInstance(image, np.ndarray) # Test not batched input encoded_images = feature_extractor(image_inputs[0], return_tensors="pt").pixel_values self.assertEqual( encoded_images.shape, ( 1, self.feature_extract_tester.num_channels, *self.feature_extract_tester.crop_size[::-1], ), ) # Test batched encoded_images = feature_extractor(image_inputs, return_tensors="pt").pixel_values self.assertEqual( encoded_images.shape, ( self.feature_extract_tester.batch_size, self.feature_extract_tester.num_channels, *self.feature_extract_tester.crop_size[::-1], ), ) def test_call_pytorch(self): # Initialize feature_extractor feature_extractor = self.feature_extraction_class(**self.feat_extract_dict) # create random PyTorch tensors image_inputs = prepare_image_inputs(self.feature_extract_tester, equal_resolution=False, torchify=True) for image in image_inputs: self.assertIsInstance(image, torch.Tensor) # Test not batched input encoded_images = feature_extractor(image_inputs[0], return_tensors="pt").pixel_values self.assertEqual( encoded_images.shape, ( 1, self.feature_extract_tester.num_channels, *self.feature_extract_tester.crop_size[::-1], ), ) # Test batched encoded_images = feature_extractor(image_inputs, return_tensors="pt").pixel_values self.assertEqual( encoded_images.shape, ( self.feature_extract_tester.batch_size, self.feature_extract_tester.num_channels, *self.feature_extract_tester.crop_size[::-1], ), ) def test_resize(self): # Initialize feature_extractor: version 1 (no align, keep_ratio=True) feature_extractor = SegformerFeatureExtractor( image_scale=(1333, 800), align=False, do_random_crop=False, do_pad=False ) # Create random PyTorch tensor image = torch.randn((3, 288, 512)) # Verify shape encoded_images = feature_extractor(image, return_tensors="pt").pixel_values expected_shape = (1, 3, 750, 1333) self.assertEqual(encoded_images.shape, expected_shape) # Initialize feature_extractor: version 2 (keep_ratio=False) feature_extractor = SegformerFeatureExtractor( image_scale=(1280, 800), align=False, keep_ratio=False, do_random_crop=False, do_pad=False ) # Verify shape encoded_images = feature_extractor(image, return_tensors="pt").pixel_values expected_shape = (1, 3, 800, 1280) self.assertEqual(encoded_images.shape, expected_shape) def test_aligned_resize(self): # Initialize feature_extractor: version 1 feature_extractor = SegformerFeatureExtractor(do_random_crop=False, do_pad=False) # Create random PyTorch tensor image = torch.randn((3, 256, 304)) # Verify shape encoded_images = feature_extractor(image, return_tensors="pt").pixel_values expected_shape = (1, 3, 512, 608) self.assertEqual(encoded_images.shape, expected_shape) # Initialize feature_extractor: version 2 feature_extractor = SegformerFeatureExtractor(image_scale=(1024, 2048), do_random_crop=False, do_pad=False) # create random PyTorch tensor image = torch.randn((3, 1024, 2048)) # Verify shape encoded_images = feature_extractor(image, return_tensors="pt").pixel_values expected_shape = (1, 3, 1024, 2048) self.assertEqual(encoded_images.shape, expected_shape) def test_random_crop(self): from datasets import load_dataset ds = load_dataset("hf-internal-testing/fixtures_ade20k", split="test") image = Image.open(ds[0]["file"]) segmentation_map = Image.open(ds[1]["file"]) w, h = image.size # Initialize feature_extractor feature_extractor = SegformerFeatureExtractor(crop_size=[w - 20, h - 20], do_pad=False) # Encode image + segmentation map encoded_images = feature_extractor(images=image, segmentation_maps=segmentation_map, return_tensors="pt") # Verify shape of pixel_values self.assertEqual(encoded_images.pixel_values.shape[-2:], (h - 20, w - 20)) # Verify shape of labels self.assertEqual(encoded_images.labels.shape[-2:], (h - 20, w - 20)) def test_pad(self): # Initialize feature_extractor (note that padding should only be applied when random cropping) feature_extractor = SegformerFeatureExtractor( align=False, do_random_crop=True, crop_size=self.feature_extract_tester.crop_size, do_pad=True ) # create random PyTorch tensors image_inputs = prepare_image_inputs(self.feature_extract_tester, equal_resolution=False, torchify=True) for image in image_inputs: self.assertIsInstance(image, torch.Tensor) # Test not batched input encoded_images = feature_extractor(image_inputs[0], return_tensors="pt").pixel_values self.assertEqual( encoded_images.shape, ( 1, self.feature_extract_tester.num_channels, *self.feature_extract_tester.crop_size[::-1], ), ) # Test batched encoded_images = feature_extractor(image_inputs, return_tensors="pt").pixel_values self.assertEqual( encoded_images.shape, ( self.feature_extract_tester.batch_size, self.feature_extract_tester.num_channels, *self.feature_extract_tester.crop_size[::-1], ), )
def default_feature_extractor(self): return ( BeitFeatureExtractor.from_pretrained("microsoft/beit-base-patch16-224") if is_vision_available() else None )
def default_feature_extractor(self): return (BeitFeatureExtractor.from_pretrained( "facebook/data2vec-vision-base-ft1k") if is_vision_available() else None)
def default_feature_extractor(self): return (AutoFeatureExtractor.from_pretrained( "microsoft/swin-tiny-patch4-window7-224") if is_vision_available() else None)
def default_feature_extractor(self): return AutoFeatureExtractor.from_pretrained("facebook/convnext-tiny-224") if is_vision_available() else None
def default_feature_extractor(self): return ImageGPTFeatureExtractor.from_pretrained( "openai/imagegpt-small") if is_vision_available() else None
class ViltFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase): feature_extraction_class = ViltFeatureExtractor if is_vision_available( ) else None def setUp(self): self.feature_extract_tester = ViltFeatureExtractionTester(self) @property def feat_extract_dict(self): return self.feature_extract_tester.prepare_feat_extract_dict() def test_feat_extract_properties(self): feature_extractor = self.feature_extraction_class( **self.feat_extract_dict) self.assertTrue(hasattr(feature_extractor, "image_mean")) self.assertTrue(hasattr(feature_extractor, "image_std")) self.assertTrue(hasattr(feature_extractor, "do_normalize")) self.assertTrue(hasattr(feature_extractor, "do_resize")) self.assertTrue(hasattr(feature_extractor, "size")) self.assertTrue(hasattr(feature_extractor, "size_divisor")) def test_batch_feature(self): pass def test_call_pil(self): # Initialize feature_extractor feature_extractor = self.feature_extraction_class( **self.feat_extract_dict) # create random PIL images image_inputs = prepare_image_inputs(self.feature_extract_tester, equal_resolution=False) for image in image_inputs: self.assertIsInstance(image, Image.Image) # Test not batched input encoded_images = feature_extractor(image_inputs[0], return_tensors="pt").pixel_values expected_height, expected_width = self.feature_extract_tester.get_expected_values( image_inputs) self.assertEqual( encoded_images.shape, (1, self.feature_extract_tester.num_channels, expected_height, expected_width), ) # Test batched encoded_images = feature_extractor(image_inputs, return_tensors="pt").pixel_values expected_height, expected_width = self.feature_extract_tester.get_expected_values( image_inputs, batched=True) self.assertEqual( encoded_images.shape, ( self.feature_extract_tester.batch_size, self.feature_extract_tester.num_channels, expected_height, expected_width, ), ) def test_call_numpy(self): # Initialize feature_extractor feature_extractor = self.feature_extraction_class( **self.feat_extract_dict) # create random numpy tensors image_inputs = prepare_image_inputs(self.feature_extract_tester, equal_resolution=False, numpify=True) for image in image_inputs: self.assertIsInstance(image, np.ndarray) # Test not batched input encoded_images = feature_extractor(image_inputs[0], return_tensors="pt").pixel_values expected_height, expected_width = self.feature_extract_tester.get_expected_values( image_inputs) self.assertEqual( encoded_images.shape, (1, self.feature_extract_tester.num_channels, expected_height, expected_width), ) # Test batched encoded_images = feature_extractor(image_inputs, return_tensors="pt").pixel_values expected_height, expected_width = self.feature_extract_tester.get_expected_values( image_inputs, batched=True) self.assertEqual( encoded_images.shape, ( self.feature_extract_tester.batch_size, self.feature_extract_tester.num_channels, expected_height, expected_width, ), ) def test_call_pytorch(self): # Initialize feature_extractor feature_extractor = self.feature_extraction_class( **self.feat_extract_dict) # create random PyTorch tensors image_inputs = prepare_image_inputs(self.feature_extract_tester, equal_resolution=False, torchify=True) for image in image_inputs: self.assertIsInstance(image, torch.Tensor) # Test not batched input encoded_images = feature_extractor(image_inputs[0], return_tensors="pt").pixel_values expected_height, expected_width = self.feature_extract_tester.get_expected_values( image_inputs) self.assertEqual( encoded_images.shape, (1, self.feature_extract_tester.num_channels, expected_height, expected_width), ) # Test batched encoded_images = feature_extractor(image_inputs, return_tensors="pt").pixel_values expected_height, expected_width = self.feature_extract_tester.get_expected_values( image_inputs, batched=True) self.assertEqual( encoded_images.shape, ( self.feature_extract_tester.batch_size, self.feature_extract_tester.num_channels, expected_height, expected_width, ), ) def test_equivalence_pad_and_create_pixel_mask(self): # Initialize feature_extractors feature_extractor_1 = self.feature_extraction_class( **self.feat_extract_dict) feature_extractor_2 = self.feature_extraction_class(do_resize=False, do_normalize=False) # create random PyTorch tensors image_inputs = prepare_image_inputs(self.feature_extract_tester, equal_resolution=False, torchify=True) for image in image_inputs: self.assertIsInstance(image, torch.Tensor) # Test whether the method "pad_and_return_pixel_mask" and calling the feature extractor return the same tensors encoded_images_with_method = feature_extractor_1.pad_and_create_pixel_mask( image_inputs, return_tensors="pt") encoded_images = feature_extractor_2(image_inputs, return_tensors="pt") self.assertTrue( torch.allclose(encoded_images_with_method["pixel_values"], encoded_images["pixel_values"], atol=1e-4)) self.assertTrue( torch.allclose(encoded_images_with_method["pixel_mask"], encoded_images["pixel_mask"], atol=1e-4))
def default_processor(self): return TrOCRProcessor.from_pretrained( "microsoft/trocr-base-handwritten") if is_vision_available( ) else None
class ImageGPTFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase): feature_extraction_class = ImageGPTFeatureExtractor if is_vision_available( ) else None def setUp(self): self.feature_extract_tester = ImageGPTFeatureExtractionTester(self) @property def feat_extract_dict(self): return self.feature_extract_tester.prepare_feat_extract_dict() def test_feat_extract_properties(self): feature_extractor = self.feature_extraction_class( **self.feat_extract_dict) self.assertTrue(hasattr(feature_extractor, "clusters")) self.assertTrue(hasattr(feature_extractor, "do_resize")) self.assertTrue(hasattr(feature_extractor, "size")) self.assertTrue(hasattr(feature_extractor, "do_normalize")) def test_feat_extract_to_json_string(self): feat_extract = self.feature_extraction_class(**self.feat_extract_dict) obj = json.loads(feat_extract.to_json_string()) for key, value in self.feat_extract_dict.items(): if key == "clusters": self.assertTrue(np.array_equal(value, obj[key])) else: self.assertEqual(obj[key], value) def test_feat_extract_to_json_file(self): feat_extract_first = self.feature_extraction_class( **self.feat_extract_dict) with tempfile.TemporaryDirectory() as tmpdirname: json_file_path = os.path.join(tmpdirname, "feat_extract.json") feat_extract_first.to_json_file(json_file_path) feat_extract_second = self.feature_extraction_class.from_json_file( json_file_path).to_dict() feat_extract_first = feat_extract_first.to_dict() for key, value in feat_extract_first.items(): if key == "clusters": self.assertTrue(np.array_equal(value, feat_extract_second[key])) else: self.assertEqual(feat_extract_first[key], value) def test_feat_extract_from_and_save_pretrained(self): feat_extract_first = self.feature_extraction_class( **self.feat_extract_dict) with tempfile.TemporaryDirectory() as tmpdirname: feat_extract_first.save_pretrained(tmpdirname) feat_extract_second = self.feature_extraction_class.from_pretrained( tmpdirname).to_dict() feat_extract_first = feat_extract_first.to_dict() for key, value in feat_extract_first.items(): if key == "clusters": self.assertTrue(np.array_equal(value, feat_extract_second[key])) else: self.assertEqual(feat_extract_first[key], value) @unittest.skip("ImageGPT requires clusters at initialization") def test_init_without_params(self): pass
from .test_configuration_common import ConfigTester from .test_modeling_common import ModelTesterMixin, floats_tensor, ids_tensor if is_torch_available(): import torch from torch import nn from transformers import ( MODEL_MAPPING, DeiTForImageClassification, DeiTForImageClassificationWithTeacher, DeiTModel, ) from transformers.models.deit.modeling_deit import DEIT_PRETRAINED_MODEL_ARCHIVE_LIST, to_2tuple if is_vision_available(): from PIL import Image from transformers import DeiTFeatureExtractor class DeiTModelTester: def __init__( self, parent, batch_size=13, image_size=30, patch_size=2, num_channels=3, is_training=True, use_labels=True,
def default_feature_extractor(self): return ViTFeatureExtractor.from_pretrained("facebook/vit-mae-base") if is_vision_available() else None
def default_feature_extractor(self): return (DeiTFeatureExtractor.from_pretrained( "facebook/deit-base-distilled-patch16-224") if is_vision_available() else None)
class MaskFormerFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase): feature_extraction_class = MaskFormerFeatureExtractor if ( is_vision_available() and is_torch_available()) else None def setUp(self): self.feature_extract_tester = MaskFormerFeatureExtractionTester(self) @property def feat_extract_dict(self): return self.feature_extract_tester.prepare_feat_extract_dict() def test_feat_extract_properties(self): feature_extractor = self.feature_extraction_class( **self.feat_extract_dict) self.assertTrue(hasattr(feature_extractor, "image_mean")) self.assertTrue(hasattr(feature_extractor, "image_std")) self.assertTrue(hasattr(feature_extractor, "do_normalize")) self.assertTrue(hasattr(feature_extractor, "do_resize")) self.assertTrue(hasattr(feature_extractor, "size")) self.assertTrue(hasattr(feature_extractor, "max_size")) def test_batch_feature(self): pass def test_call_pil(self): # Initialize feature_extractor feature_extractor = self.feature_extraction_class( **self.feat_extract_dict) # create random PIL images image_inputs = prepare_image_inputs(self.feature_extract_tester, equal_resolution=False) for image in image_inputs: self.assertIsInstance(image, Image.Image) # Test not batched input encoded_images = feature_extractor(image_inputs[0], return_tensors="pt").pixel_values expected_height, expected_width = self.feature_extract_tester.get_expected_values( image_inputs) self.assertEqual( encoded_images.shape, (1, self.feature_extract_tester.num_channels, expected_height, expected_width), ) # Test batched expected_height, expected_width = self.feature_extract_tester.get_expected_values( image_inputs, batched=True) encoded_images = feature_extractor(image_inputs, return_tensors="pt").pixel_values self.assertEqual( encoded_images.shape, ( self.feature_extract_tester.batch_size, self.feature_extract_tester.num_channels, expected_height, expected_width, ), ) def test_call_numpy(self): # Initialize feature_extractor feature_extractor = self.feature_extraction_class( **self.feat_extract_dict) # create random numpy tensors image_inputs = prepare_image_inputs(self.feature_extract_tester, equal_resolution=False, numpify=True) for image in image_inputs: self.assertIsInstance(image, np.ndarray) # Test not batched input encoded_images = feature_extractor(image_inputs[0], return_tensors="pt").pixel_values expected_height, expected_width = self.feature_extract_tester.get_expected_values( image_inputs) self.assertEqual( encoded_images.shape, (1, self.feature_extract_tester.num_channels, expected_height, expected_width), ) # Test batched encoded_images = feature_extractor(image_inputs, return_tensors="pt").pixel_values expected_height, expected_width = self.feature_extract_tester.get_expected_values( image_inputs, batched=True) self.assertEqual( encoded_images.shape, ( self.feature_extract_tester.batch_size, self.feature_extract_tester.num_channels, expected_height, expected_width, ), ) def test_call_pytorch(self): # Initialize feature_extractor feature_extractor = self.feature_extraction_class( **self.feat_extract_dict) # create random PyTorch tensors image_inputs = prepare_image_inputs(self.feature_extract_tester, equal_resolution=False, torchify=True) for image in image_inputs: self.assertIsInstance(image, torch.Tensor) # Test not batched input encoded_images = feature_extractor(image_inputs[0], return_tensors="pt").pixel_values expected_height, expected_width = self.feature_extract_tester.get_expected_values( image_inputs) self.assertEqual( encoded_images.shape, (1, self.feature_extract_tester.num_channels, expected_height, expected_width), ) # Test batched encoded_images = feature_extractor(image_inputs, return_tensors="pt").pixel_values expected_height, expected_width = self.feature_extract_tester.get_expected_values( image_inputs, batched=True) self.assertEqual( encoded_images.shape, ( self.feature_extract_tester.batch_size, self.feature_extract_tester.num_channels, expected_height, expected_width, ), ) def test_equivalence_pad_and_create_pixel_mask(self): # Initialize feature_extractors feature_extractor_1 = self.feature_extraction_class( **self.feat_extract_dict) feature_extractor_2 = self.feature_extraction_class(do_resize=False, do_normalize=False) # create random PyTorch tensors image_inputs = prepare_image_inputs(self.feature_extract_tester, equal_resolution=False, torchify=True) for image in image_inputs: self.assertIsInstance(image, torch.Tensor) # Test whether the method "pad_and_return_pixel_mask" and calling the feature extractor return the same tensors encoded_images_with_method = feature_extractor_1.encode_inputs( image_inputs, return_tensors="pt") encoded_images = feature_extractor_2(image_inputs, return_tensors="pt") self.assertTrue( torch.allclose(encoded_images_with_method["pixel_values"], encoded_images["pixel_values"], atol=1e-4)) self.assertTrue( torch.allclose(encoded_images_with_method["pixel_mask"], encoded_images["pixel_mask"], atol=1e-4)) def comm_get_feature_extractor_inputs(self, with_annotations=False): feature_extractor = self.feature_extraction_class( **self.feat_extract_dict) # prepare image and target num_classes = 8 batch_size = self.feature_extract_tester.batch_size annotations = None if with_annotations: annotations = [{ "masks": np.random.rand(num_classes, 384, 384).astype(np.float32), "labels": (np.random.rand(num_classes) > 0.5).astype(np.int64), } for _ in range(batch_size)] image_inputs = prepare_image_inputs(self.feature_extract_tester, equal_resolution=False) inputs = feature_extractor(image_inputs, annotations, return_tensors="pt", pad_and_return_pixel_mask=True) return inputs def test_with_size_divisibility(self): size_divisibilities = [8, 16, 32] weird_input_sizes = [(407, 802), (582, 1094)] for size_divisibility in size_divisibilities: feat_extract_dict = { **self.feat_extract_dict, **{ "size_divisibility": size_divisibility } } feature_extractor = self.feature_extraction_class( **feat_extract_dict) for weird_input_size in weird_input_sizes: inputs = feature_extractor([np.ones((3, *weird_input_size))], return_tensors="pt") pixel_values = inputs["pixel_values"] # check if divisible self.assertTrue((pixel_values.shape[-1] % size_divisibility) == 0) self.assertTrue((pixel_values.shape[-2] % size_divisibility) == 0) def test_call_with_numpy_annotations(self): num_classes = 8 batch_size = self.feature_extract_tester.batch_size inputs = self.comm_get_feature_extractor_inputs(with_annotations=True) # check the batch_size for el in inputs.values(): self.assertEqual(el.shape[0], batch_size) pixel_values = inputs["pixel_values"] mask_labels = inputs["mask_labels"] class_labels = inputs["class_labels"] self.assertEqual(pixel_values.shape[-2], mask_labels.shape[-2]) self.assertEqual(pixel_values.shape[-1], mask_labels.shape[-1]) self.assertEqual(mask_labels.shape[1], class_labels.shape[1]) self.assertEqual(mask_labels.shape[1], num_classes) def test_post_process_segmentation(self): fature_extractor = self.feature_extraction_class() outputs = self.feature_extract_tester.get_fake_maskformer_outputs() segmentation = fature_extractor.post_process_segmentation(outputs) self.assertEqual( segmentation.shape, ( self.feature_extract_tester.batch_size, self.feature_extract_tester.num_classes, self.feature_extract_tester.height, self.feature_extract_tester.width, ), ) target_size = (1, 4) segmentation = fature_extractor.post_process_segmentation( outputs, target_size=target_size) self.assertEqual( segmentation.shape, (self.feature_extract_tester.batch_size, self.feature_extract_tester.num_classes, *target_size), ) def test_post_process_semantic_segmentation(self): fature_extractor = self.feature_extraction_class() outputs = self.feature_extract_tester.get_fake_maskformer_outputs() segmentation = fature_extractor.post_process_semantic_segmentation( outputs) self.assertEqual( segmentation.shape, ( self.feature_extract_tester.batch_size, self.feature_extract_tester.height, self.feature_extract_tester.width, ), ) target_size = (1, 4) segmentation = fature_extractor.post_process_semantic_segmentation( outputs, target_size=target_size) self.assertEqual( segmentation.shape, (self.feature_extract_tester.batch_size, *target_size)) def test_post_process_panoptic_segmentation(self): fature_extractor = self.feature_extraction_class() outputs = self.feature_extract_tester.get_fake_maskformer_outputs() segmentation = fature_extractor.post_process_panoptic_segmentation( outputs, object_mask_threshold=0) self.assertTrue( len(segmentation) == self.feature_extract_tester.batch_size) for el in segmentation: self.assertTrue("segmentation" in el) self.assertTrue("segments" in el) self.assertEqual(type(el["segments"]), list) self.assertEqual(el["segmentation"].shape, (self.feature_extract_tester.height, self.feature_extract_tester.width))