class GLPNFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase): feature_extraction_class = GLPNFeatureExtractor if is_vision_available() else None def setUp(self): self.feature_extract_tester = GLPNFeatureExtractionTester(self) @property def feat_extract_dict(self): return self.feature_extract_tester.prepare_feat_extract_dict() def test_feat_extract_properties(self): feature_extractor = self.feature_extraction_class(**self.feat_extract_dict) self.assertTrue(hasattr(feature_extractor, "do_resize")) self.assertTrue(hasattr(feature_extractor, "size_divisor")) self.assertTrue(hasattr(feature_extractor, "resample")) self.assertTrue(hasattr(feature_extractor, "do_rescale")) def test_batch_feature(self): pass def test_call_pil(self): # Initialize feature_extractor feature_extractor = self.feature_extraction_class(**self.feat_extract_dict) # create random PIL images image_inputs = prepare_image_inputs(self.feature_extract_tester, equal_resolution=False) for image in image_inputs: self.assertIsInstance(image, Image.Image) # Test not batched input (GLPNFeatureExtractor doesn't support batching) encoded_images = feature_extractor(image_inputs[0], return_tensors="pt").pixel_values self.assertTrue(encoded_images.shape[-1] % self.feature_extract_tester.size_divisor == 0) self.assertTrue(encoded_images.shape[-2] % self.feature_extract_tester.size_divisor == 0) def test_call_numpy(self): # Initialize feature_extractor feature_extractor = self.feature_extraction_class(**self.feat_extract_dict) # create random numpy tensors image_inputs = prepare_image_inputs(self.feature_extract_tester, equal_resolution=False, numpify=True) for image in image_inputs: self.assertIsInstance(image, np.ndarray) # Test not batched input (GLPNFeatureExtractor doesn't support batching) encoded_images = feature_extractor(image_inputs[0], return_tensors="pt").pixel_values self.assertTrue(encoded_images.shape[-1] % self.feature_extract_tester.size_divisor == 0) self.assertTrue(encoded_images.shape[-2] % self.feature_extract_tester.size_divisor == 0) def test_call_pytorch(self): # Initialize feature_extractor feature_extractor = self.feature_extraction_class(**self.feat_extract_dict) # create random PyTorch tensors image_inputs = prepare_image_inputs(self.feature_extract_tester, equal_resolution=False, torchify=True) for image in image_inputs: self.assertIsInstance(image, torch.Tensor) # Test not batched input (GLPNFeatureExtractor doesn't support batching) encoded_images = feature_extractor(image_inputs[0], return_tensors="pt").pixel_values self.assertTrue(encoded_images.shape[-1] % self.feature_extract_tester.size_divisor == 0) self.assertTrue(encoded_images.shape[-2] % self.feature_extract_tester.size_divisor == 0)
def default_feature_extractor(self): return MobileViTFeatureExtractor.from_pretrained( "apple/mobilevit-xx-small") if is_vision_available() else None
def default_feature_extractor(self): return ViTFeatureExtractor.from_pretrained("google/vit-base-patch16-224") if is_vision_available() else None
class MaskFormerFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase): feature_extraction_class = MaskFormerFeatureExtractor if ( is_vision_available() and is_torch_available()) else None def setUp(self): self.feature_extract_tester = MaskFormerFeatureExtractionTester(self) @property def feat_extract_dict(self): return self.feature_extract_tester.prepare_feat_extract_dict() def test_feat_extract_properties(self): feature_extractor = self.feature_extraction_class( **self.feat_extract_dict) self.assertTrue(hasattr(feature_extractor, "image_mean")) self.assertTrue(hasattr(feature_extractor, "image_std")) self.assertTrue(hasattr(feature_extractor, "do_normalize")) self.assertTrue(hasattr(feature_extractor, "do_resize")) self.assertTrue(hasattr(feature_extractor, "size")) self.assertTrue(hasattr(feature_extractor, "max_size")) self.assertTrue(hasattr(feature_extractor, "ignore_index")) self.assertTrue(hasattr(feature_extractor, "num_labels")) def test_batch_feature(self): pass def test_call_pil(self): # Initialize feature_extractor feature_extractor = self.feature_extraction_class( **self.feat_extract_dict) # create random PIL images image_inputs = prepare_image_inputs(self.feature_extract_tester, equal_resolution=False) for image in image_inputs: self.assertIsInstance(image, Image.Image) # Test not batched input encoded_images = feature_extractor(image_inputs[0], return_tensors="pt").pixel_values expected_height, expected_width = self.feature_extract_tester.get_expected_values( image_inputs) self.assertEqual( encoded_images.shape, (1, self.feature_extract_tester.num_channels, expected_height, expected_width), ) # Test batched expected_height, expected_width = self.feature_extract_tester.get_expected_values( image_inputs, batched=True) encoded_images = feature_extractor(image_inputs, return_tensors="pt").pixel_values self.assertEqual( encoded_images.shape, ( self.feature_extract_tester.batch_size, self.feature_extract_tester.num_channels, expected_height, expected_width, ), ) def test_call_numpy(self): # Initialize feature_extractor feature_extractor = self.feature_extraction_class( **self.feat_extract_dict) # create random numpy tensors image_inputs = prepare_image_inputs(self.feature_extract_tester, equal_resolution=False, numpify=True) for image in image_inputs: self.assertIsInstance(image, np.ndarray) # Test not batched input encoded_images = feature_extractor(image_inputs[0], return_tensors="pt").pixel_values expected_height, expected_width = self.feature_extract_tester.get_expected_values( image_inputs) self.assertEqual( encoded_images.shape, (1, self.feature_extract_tester.num_channels, expected_height, expected_width), ) # Test batched encoded_images = feature_extractor(image_inputs, return_tensors="pt").pixel_values expected_height, expected_width = self.feature_extract_tester.get_expected_values( image_inputs, batched=True) self.assertEqual( encoded_images.shape, ( self.feature_extract_tester.batch_size, self.feature_extract_tester.num_channels, expected_height, expected_width, ), ) def test_call_pytorch(self): # Initialize feature_extractor feature_extractor = self.feature_extraction_class( **self.feat_extract_dict) # create random PyTorch tensors image_inputs = prepare_image_inputs(self.feature_extract_tester, equal_resolution=False, torchify=True) for image in image_inputs: self.assertIsInstance(image, torch.Tensor) # Test not batched input encoded_images = feature_extractor(image_inputs[0], return_tensors="pt").pixel_values expected_height, expected_width = self.feature_extract_tester.get_expected_values( image_inputs) self.assertEqual( encoded_images.shape, (1, self.feature_extract_tester.num_channels, expected_height, expected_width), ) # Test batched encoded_images = feature_extractor(image_inputs, return_tensors="pt").pixel_values expected_height, expected_width = self.feature_extract_tester.get_expected_values( image_inputs, batched=True) self.assertEqual( encoded_images.shape, ( self.feature_extract_tester.batch_size, self.feature_extract_tester.num_channels, expected_height, expected_width, ), ) def test_equivalence_pad_and_create_pixel_mask(self): # Initialize feature_extractors feature_extractor_1 = self.feature_extraction_class( **self.feat_extract_dict) feature_extractor_2 = self.feature_extraction_class( do_resize=False, do_normalize=False, num_labels=self.feature_extract_tester.num_classes) # create random PyTorch tensors image_inputs = prepare_image_inputs(self.feature_extract_tester, equal_resolution=False, torchify=True) for image in image_inputs: self.assertIsInstance(image, torch.Tensor) # Test whether the method "pad_and_return_pixel_mask" and calling the feature extractor return the same tensors encoded_images_with_method = feature_extractor_1.encode_inputs( image_inputs, return_tensors="pt") encoded_images = feature_extractor_2(image_inputs, return_tensors="pt") self.assertTrue( torch.allclose(encoded_images_with_method["pixel_values"], encoded_images["pixel_values"], atol=1e-4)) self.assertTrue( torch.allclose(encoded_images_with_method["pixel_mask"], encoded_images["pixel_mask"], atol=1e-4)) def comm_get_feature_extractor_inputs(self, with_segmentation_maps=False, is_instance_map=False, segmentation_type="np"): feature_extractor = self.feature_extraction_class( **self.feat_extract_dict) # prepare image and target batch_size = self.feature_extract_tester.batch_size num_labels = self.feature_extract_tester.num_labels annotations = None instance_id_to_semantic_id = None if with_segmentation_maps: high = num_labels if is_instance_map: high * 2 labels_expanded = list(range(num_labels)) * 2 instance_id_to_semantic_id = { instance_id: label_id for instance_id, label_id in enumerate(labels_expanded) } annotations = [ np.random.randint(0, high, (384, 384)).astype(np.uint8) for _ in range(batch_size) ] if segmentation_type == "pil": annotations = [ Image.fromarray(annotation) for annotation in annotations ] image_inputs = prepare_image_inputs(self.feature_extract_tester, equal_resolution=False) inputs = feature_extractor( image_inputs, annotations, return_tensors="pt", instance_id_to_semantic_id=instance_id_to_semantic_id, pad_and_return_pixel_mask=True, ) return inputs def test_init_without_params(self): pass def test_with_size_divisibility(self): size_divisibilities = [8, 16, 32] weird_input_sizes = [(407, 802), (582, 1094)] for size_divisibility in size_divisibilities: feat_extract_dict = { **self.feat_extract_dict, **{ "size_divisibility": size_divisibility } } feature_extractor = self.feature_extraction_class( **feat_extract_dict) for weird_input_size in weird_input_sizes: inputs = feature_extractor([np.ones((3, *weird_input_size))], return_tensors="pt") pixel_values = inputs["pixel_values"] # check if divisible self.assertTrue((pixel_values.shape[-1] % size_divisibility) == 0) self.assertTrue((pixel_values.shape[-2] % size_divisibility) == 0) def test_call_with_segmentation_maps(self): def common(is_instance_map=False, segmentation_type=None): inputs = self.comm_get_feature_extractor_inputs( with_segmentation_maps=True, is_instance_map=is_instance_map, segmentation_type=segmentation_type) mask_labels = inputs["mask_labels"] class_labels = inputs["class_labels"] pixel_values = inputs["pixel_values"] # check the batch_size for mask_label, class_label in zip(mask_labels, class_labels): self.assertEqual(mask_label.shape[0], class_label.shape[0]) # this ensure padding has happened self.assertEqual(mask_label.shape[1:], pixel_values.shape[2:]) common() common(is_instance_map=True) common(is_instance_map=False, segmentation_type="pil") common(is_instance_map=True, segmentation_type="pil") def test_post_process_segmentation(self): fature_extractor = self.feature_extraction_class( num_labels=self.feature_extract_tester.num_classes) outputs = self.feature_extract_tester.get_fake_maskformer_outputs() segmentation = fature_extractor.post_process_segmentation(outputs) self.assertEqual( segmentation.shape, ( self.feature_extract_tester.batch_size, self.feature_extract_tester.num_classes, self.feature_extract_tester.height, self.feature_extract_tester.width, ), ) target_size = (1, 4) segmentation = fature_extractor.post_process_segmentation( outputs, target_size=target_size) self.assertEqual( segmentation.shape, (self.feature_extract_tester.batch_size, self.feature_extract_tester.num_classes, *target_size), ) def test_post_process_semantic_segmentation(self): fature_extractor = self.feature_extraction_class( num_labels=self.feature_extract_tester.num_classes) outputs = self.feature_extract_tester.get_fake_maskformer_outputs() segmentation = fature_extractor.post_process_semantic_segmentation( outputs) self.assertEqual( segmentation.shape, ( self.feature_extract_tester.batch_size, self.feature_extract_tester.height, self.feature_extract_tester.width, ), ) target_size = (1, 4) segmentation = fature_extractor.post_process_semantic_segmentation( outputs, target_size=target_size) self.assertEqual( segmentation.shape, (self.feature_extract_tester.batch_size, *target_size)) def test_post_process_panoptic_segmentation(self): fature_extractor = self.feature_extraction_class( num_labels=self.feature_extract_tester.num_classes) outputs = self.feature_extract_tester.get_fake_maskformer_outputs() segmentation = fature_extractor.post_process_panoptic_segmentation( outputs, object_mask_threshold=0) self.assertTrue( len(segmentation) == self.feature_extract_tester.batch_size) for el in segmentation: self.assertTrue("segmentation" in el) self.assertTrue("segments" in el) self.assertEqual(type(el["segments"]), list) self.assertEqual(el["segmentation"].shape, (self.feature_extract_tester.height, self.feature_extract_tester.width))
from transformers import ViTMAEConfig from transformers.testing_utils import require_torch, require_vision, slow, torch_device from transformers.utils import cached_property, is_torch_available, is_vision_available from ...test_configuration_common import ConfigTester from ...test_modeling_common import ModelTesterMixin, floats_tensor, ids_tensor if is_torch_available(): import torch from torch import nn from transformers import ViTMAEForPreTraining, ViTMAEModel from transformers.models.vit.modeling_vit import VIT_PRETRAINED_MODEL_ARCHIVE_LIST if is_vision_available(): from PIL import Image from transformers import ViTFeatureExtractor class ViTMAEModelTester: def __init__( self, parent, batch_size=13, image_size=30, patch_size=2, num_channels=3, is_training=True, use_labels=True,
class ConvNextFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase): feature_extraction_class = ConvNextFeatureExtractor if is_vision_available( ) else None def setUp(self): self.feature_extract_tester = ConvNextFeatureExtractionTester(self) @property def feat_extract_dict(self): return self.feature_extract_tester.prepare_feat_extract_dict() def test_feat_extract_properties(self): feature_extractor = self.feature_extraction_class( **self.feat_extract_dict) self.assertTrue(hasattr(feature_extractor, "do_resize")) self.assertTrue(hasattr(feature_extractor, "size")) self.assertTrue(hasattr(feature_extractor, "crop_pct")) self.assertTrue(hasattr(feature_extractor, "do_normalize")) self.assertTrue(hasattr(feature_extractor, "image_mean")) self.assertTrue(hasattr(feature_extractor, "image_std")) def test_batch_feature(self): pass def test_call_pil(self): # Initialize feature_extractor feature_extractor = self.feature_extraction_class( **self.feat_extract_dict) # create random PIL images image_inputs = prepare_image_inputs(self.feature_extract_tester, equal_resolution=False) for image in image_inputs: self.assertIsInstance(image, Image.Image) # Test not batched input encoded_images = feature_extractor(image_inputs[0], return_tensors="pt").pixel_values self.assertEqual( encoded_images.shape, ( 1, self.feature_extract_tester.num_channels, self.feature_extract_tester.size, self.feature_extract_tester.size, ), ) # Test batched encoded_images = feature_extractor(image_inputs, return_tensors="pt").pixel_values self.assertEqual( encoded_images.shape, ( self.feature_extract_tester.batch_size, self.feature_extract_tester.num_channels, self.feature_extract_tester.size, self.feature_extract_tester.size, ), ) def test_call_numpy(self): # Initialize feature_extractor feature_extractor = self.feature_extraction_class( **self.feat_extract_dict) # create random numpy tensors image_inputs = prepare_image_inputs(self.feature_extract_tester, equal_resolution=False, numpify=True) for image in image_inputs: self.assertIsInstance(image, np.ndarray) # Test not batched input encoded_images = feature_extractor(image_inputs[0], return_tensors="pt").pixel_values self.assertEqual( encoded_images.shape, ( 1, self.feature_extract_tester.num_channels, self.feature_extract_tester.size, self.feature_extract_tester.size, ), ) # Test batched encoded_images = feature_extractor(image_inputs, return_tensors="pt").pixel_values self.assertEqual( encoded_images.shape, ( self.feature_extract_tester.batch_size, self.feature_extract_tester.num_channels, self.feature_extract_tester.size, self.feature_extract_tester.size, ), ) def test_call_pytorch(self): # Initialize feature_extractor feature_extractor = self.feature_extraction_class( **self.feat_extract_dict) # create random PyTorch tensors image_inputs = prepare_image_inputs(self.feature_extract_tester, equal_resolution=False, torchify=True) for image in image_inputs: self.assertIsInstance(image, torch.Tensor) # Test not batched input encoded_images = feature_extractor(image_inputs[0], return_tensors="pt").pixel_values self.assertEqual( encoded_images.shape, ( 1, self.feature_extract_tester.num_channels, self.feature_extract_tester.size, self.feature_extract_tester.size, ), ) # Test batched encoded_images = feature_extractor(image_inputs, return_tensors="pt").pixel_values self.assertEqual( encoded_images.shape, ( self.feature_extract_tester.batch_size, self.feature_extract_tester.num_channels, self.feature_extract_tester.size, self.feature_extract_tester.size, ), )
class FlavaFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase): feature_extraction_class = FlavaFeatureExtractor if is_vision_available( ) else None maxDiff = None def setUp(self): self.feature_extract_tester = FlavaFeatureExtractionTester(self) @property def feat_extract_dict(self): return self.feature_extract_tester.prepare_feat_extract_dict() def test_feat_extract_properties(self): feature_extractor = self.feature_extraction_class( **self.feat_extract_dict) self.assertTrue(hasattr(feature_extractor, "image_mean")) self.assertTrue(hasattr(feature_extractor, "image_std")) self.assertTrue(hasattr(feature_extractor, "do_normalize")) self.assertTrue(hasattr(feature_extractor, "do_resize")) self.assertTrue(hasattr(feature_extractor, "resample")) self.assertTrue(hasattr(feature_extractor, "crop_size")) self.assertTrue(hasattr(feature_extractor, "do_center_crop")) self.assertTrue(hasattr(feature_extractor, "masking_generator")) self.assertTrue(hasattr(feature_extractor, "codebook_do_resize")) self.assertTrue(hasattr(feature_extractor, "codebook_size")) self.assertTrue(hasattr(feature_extractor, "codebook_resample")) self.assertTrue(hasattr(feature_extractor, "codebook_do_center_crop")) self.assertTrue(hasattr(feature_extractor, "codebook_crop_size")) self.assertTrue(hasattr(feature_extractor, "codebook_do_map_pixels")) self.assertTrue(hasattr(feature_extractor, "codebook_do_normalize")) self.assertTrue(hasattr(feature_extractor, "codebook_image_mean")) self.assertTrue(hasattr(feature_extractor, "codebook_image_std")) def test_batch_feature(self): pass def test_call_pil(self): # Initialize feature_extractor feature_extractor = self.feature_extraction_class( **self.feat_extract_dict) # create random PIL images image_inputs = prepare_image_inputs(self.feature_extract_tester, equal_resolution=False) for image in image_inputs: self.assertIsInstance(image, Image.Image) # Test not batched input encoded_images = feature_extractor(image_inputs[0], return_tensors="pt") # Test no bool masked pos self.assertFalse("bool_masked_pos" in encoded_images) expected_height, expected_width = self.feature_extract_tester.get_expected_image_size( ) self.assertEqual( encoded_images.pixel_values.shape, (1, self.feature_extract_tester.num_channels, expected_height, expected_width), ) # Test batched encoded_images = feature_extractor(image_inputs, return_tensors="pt") expected_height, expected_width = self.feature_extract_tester.get_expected_image_size( ) # Test no bool masked pos self.assertFalse("bool_masked_pos" in encoded_images) self.assertEqual( encoded_images.pixel_values.shape, ( self.feature_extract_tester.batch_size, self.feature_extract_tester.num_channels, expected_height, expected_width, ), ) def _test_call_framework(self, instance_class, prepare_kwargs): # Initialize feature_extractor feature_extractor = self.feature_extraction_class( **self.feat_extract_dict) # create random tensors image_inputs = prepare_image_inputs(self.feature_extract_tester, equal_resolution=False, **prepare_kwargs) for image in image_inputs: self.assertIsInstance(image, instance_class) # Test not batched input encoded_images = feature_extractor(image_inputs[0], return_tensors="pt") expected_height, expected_width = self.feature_extract_tester.get_expected_image_size( ) self.assertEqual( encoded_images.pixel_values.shape, (1, self.feature_extract_tester.num_channels, expected_height, expected_width), ) encoded_images = feature_extractor(image_inputs, return_image_mask=True, return_tensors="pt") expected_height, expected_width = self.feature_extract_tester.get_expected_image_size( ) self.assertEqual( encoded_images.pixel_values.shape, ( self.feature_extract_tester.batch_size, self.feature_extract_tester.num_channels, expected_height, expected_width, ), ) expected_height, expected_width = self.feature_extract_tester.get_expected_mask_size( ) self.assertEqual( encoded_images.bool_masked_pos.shape, ( self.feature_extract_tester.batch_size, expected_height, expected_width, ), ) # Test batched encoded_images = feature_extractor(image_inputs, return_tensors="pt").pixel_values expected_height, expected_width = self.feature_extract_tester.get_expected_image_size( ) self.assertEqual( encoded_images.shape, ( self.feature_extract_tester.batch_size, self.feature_extract_tester.num_channels, expected_height, expected_width, ), ) # Test masking encoded_images = feature_extractor(image_inputs, return_image_mask=True, return_tensors="pt") expected_height, expected_width = self.feature_extract_tester.get_expected_image_size( ) self.assertEqual( encoded_images.pixel_values.shape, ( self.feature_extract_tester.batch_size, self.feature_extract_tester.num_channels, expected_height, expected_width, ), ) expected_height, expected_width = self.feature_extract_tester.get_expected_mask_size( ) self.assertEqual( encoded_images.bool_masked_pos.shape, ( self.feature_extract_tester.batch_size, expected_height, expected_width, ), ) def test_call_numpy(self): self._test_call_framework(np.ndarray, prepare_kwargs={"numpify": True}) def test_call_pytorch(self): self._test_call_framework(torch.Tensor, prepare_kwargs={"torchify": True}) def test_masking(self): # Initialize feature_extractor random.seed(1234) feature_extractor = self.feature_extraction_class( **self.feat_extract_dict) image_inputs = prepare_image_inputs(self.feature_extract_tester, equal_resolution=False, torchify=True) # Test not batched input encoded_images = feature_extractor(image_inputs[0], return_image_mask=True, return_tensors="pt") self.assertEqual(encoded_images.bool_masked_pos.sum().item(), 75) def test_codebook_pixels(self): # Initialize feature_extractor feature_extractor = self.feature_extraction_class( **self.feat_extract_dict) # create random PIL images image_inputs = prepare_image_inputs(self.feature_extract_tester, equal_resolution=False) for image in image_inputs: self.assertIsInstance(image, Image.Image) # Test not batched input encoded_images = feature_extractor(image_inputs[0], return_codebook_pixels=True, return_tensors="pt") expected_height, expected_width = self.feature_extract_tester.get_expected_codebook_image_size( ) self.assertEqual( encoded_images.codebook_pixel_values.shape, (1, self.feature_extract_tester.num_channels, expected_height, expected_width), ) # Test batched encoded_images = feature_extractor(image_inputs, return_codebook_pixels=True, return_tensors="pt") expected_height, expected_width = self.feature_extract_tester.get_expected_codebook_image_size( ) self.assertEqual( encoded_images.codebook_pixel_values.shape, ( self.feature_extract_tester.batch_size, self.feature_extract_tester.num_channels, expected_height, expected_width, ), )
def default_feature_extractor(self): return ( BeitFeatureExtractor.from_pretrained("microsoft/beit-base-patch16-224") if is_vision_available() else None )
class MaskFormerFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase): feature_extraction_class = MaskFormerFeatureExtractor if ( is_vision_available() and is_torch_available()) else None def setUp(self): self.feature_extract_tester = MaskFormerFeatureExtractionTester(self) @property def feat_extract_dict(self): return self.feature_extract_tester.prepare_feat_extract_dict() def test_feat_extract_properties(self): feature_extractor = self.feature_extraction_class( **self.feat_extract_dict) self.assertTrue(hasattr(feature_extractor, "image_mean")) self.assertTrue(hasattr(feature_extractor, "image_std")) self.assertTrue(hasattr(feature_extractor, "do_normalize")) self.assertTrue(hasattr(feature_extractor, "do_resize")) self.assertTrue(hasattr(feature_extractor, "size")) self.assertTrue(hasattr(feature_extractor, "max_size")) def test_batch_feature(self): pass def test_call_pil(self): # Initialize feature_extractor feature_extractor = self.feature_extraction_class( **self.feat_extract_dict) # create random PIL images image_inputs = prepare_image_inputs(self.feature_extract_tester, equal_resolution=False) for image in image_inputs: self.assertIsInstance(image, Image.Image) # Test not batched input encoded_images = feature_extractor(image_inputs[0], return_tensors="pt").pixel_values expected_height, expected_width = self.feature_extract_tester.get_expected_values( image_inputs) self.assertEqual( encoded_images.shape, (1, self.feature_extract_tester.num_channels, expected_height, expected_width), ) # Test batched expected_height, expected_width = self.feature_extract_tester.get_expected_values( image_inputs, batched=True) encoded_images = feature_extractor(image_inputs, return_tensors="pt").pixel_values self.assertEqual( encoded_images.shape, ( self.feature_extract_tester.batch_size, self.feature_extract_tester.num_channels, expected_height, expected_width, ), ) def test_call_numpy(self): # Initialize feature_extractor feature_extractor = self.feature_extraction_class( **self.feat_extract_dict) # create random numpy tensors image_inputs = prepare_image_inputs(self.feature_extract_tester, equal_resolution=False, numpify=True) for image in image_inputs: self.assertIsInstance(image, np.ndarray) # Test not batched input encoded_images = feature_extractor(image_inputs[0], return_tensors="pt").pixel_values expected_height, expected_width = self.feature_extract_tester.get_expected_values( image_inputs) self.assertEqual( encoded_images.shape, (1, self.feature_extract_tester.num_channels, expected_height, expected_width), ) # Test batched encoded_images = feature_extractor(image_inputs, return_tensors="pt").pixel_values expected_height, expected_width = self.feature_extract_tester.get_expected_values( image_inputs, batched=True) self.assertEqual( encoded_images.shape, ( self.feature_extract_tester.batch_size, self.feature_extract_tester.num_channels, expected_height, expected_width, ), ) def test_call_pytorch(self): # Initialize feature_extractor feature_extractor = self.feature_extraction_class( **self.feat_extract_dict) # create random PyTorch tensors image_inputs = prepare_image_inputs(self.feature_extract_tester, equal_resolution=False, torchify=True) for image in image_inputs: self.assertIsInstance(image, torch.Tensor) # Test not batched input encoded_images = feature_extractor(image_inputs[0], return_tensors="pt").pixel_values expected_height, expected_width = self.feature_extract_tester.get_expected_values( image_inputs) self.assertEqual( encoded_images.shape, (1, self.feature_extract_tester.num_channels, expected_height, expected_width), ) # Test batched encoded_images = feature_extractor(image_inputs, return_tensors="pt").pixel_values expected_height, expected_width = self.feature_extract_tester.get_expected_values( image_inputs, batched=True) self.assertEqual( encoded_images.shape, ( self.feature_extract_tester.batch_size, self.feature_extract_tester.num_channels, expected_height, expected_width, ), ) def test_equivalence_pad_and_create_pixel_mask(self): # Initialize feature_extractors feature_extractor_1 = self.feature_extraction_class( **self.feat_extract_dict) feature_extractor_2 = self.feature_extraction_class(do_resize=False, do_normalize=False) # create random PyTorch tensors image_inputs = prepare_image_inputs(self.feature_extract_tester, equal_resolution=False, torchify=True) for image in image_inputs: self.assertIsInstance(image, torch.Tensor) # Test whether the method "pad_and_return_pixel_mask" and calling the feature extractor return the same tensors encoded_images_with_method = feature_extractor_1.encode_inputs( image_inputs, return_tensors="pt") encoded_images = feature_extractor_2(image_inputs, return_tensors="pt") self.assertTrue( torch.allclose(encoded_images_with_method["pixel_values"], encoded_images["pixel_values"], atol=1e-4)) self.assertTrue( torch.allclose(encoded_images_with_method["pixel_mask"], encoded_images["pixel_mask"], atol=1e-4)) def comm_get_feature_extractor_inputs(self, with_annotations=False): feature_extractor = self.feature_extraction_class( **self.feat_extract_dict) # prepare image and target num_classes = 8 batch_size = self.feature_extract_tester.batch_size annotations = None if with_annotations: annotations = [{ "masks": np.random.rand(num_classes, 384, 384).astype(np.float32), "labels": (np.random.rand(num_classes) > 0.5).astype(np.int64), } for _ in range(batch_size)] image_inputs = prepare_image_inputs(self.feature_extract_tester, equal_resolution=False) inputs = feature_extractor(image_inputs, annotations, return_tensors="pt", pad_and_return_pixel_mask=True) return inputs def test_with_size_divisibility(self): size_divisibilities = [8, 16, 32] weird_input_sizes = [(407, 802), (582, 1094)] for size_divisibility in size_divisibilities: feat_extract_dict = { **self.feat_extract_dict, **{ "size_divisibility": size_divisibility } } feature_extractor = self.feature_extraction_class( **feat_extract_dict) for weird_input_size in weird_input_sizes: inputs = feature_extractor([np.ones((3, *weird_input_size))], return_tensors="pt") pixel_values = inputs["pixel_values"] # check if divisible self.assertTrue((pixel_values.shape[-1] % size_divisibility) == 0) self.assertTrue((pixel_values.shape[-2] % size_divisibility) == 0) def test_call_with_numpy_annotations(self): num_classes = 8 batch_size = self.feature_extract_tester.batch_size inputs = self.comm_get_feature_extractor_inputs(with_annotations=True) # check the batch_size for el in inputs.values(): self.assertEqual(el.shape[0], batch_size) pixel_values = inputs["pixel_values"] mask_labels = inputs["mask_labels"] class_labels = inputs["class_labels"] self.assertEqual(pixel_values.shape[-2], mask_labels.shape[-2]) self.assertEqual(pixel_values.shape[-1], mask_labels.shape[-1]) self.assertEqual(mask_labels.shape[1], class_labels.shape[1]) self.assertEqual(mask_labels.shape[1], num_classes) def test_post_process_segmentation(self): fature_extractor = self.feature_extraction_class() outputs = self.feature_extract_tester.get_fake_maskformer_outputs() segmentation = fature_extractor.post_process_segmentation(outputs) self.assertEqual( segmentation.shape, ( self.feature_extract_tester.batch_size, self.feature_extract_tester.num_classes, self.feature_extract_tester.height, self.feature_extract_tester.width, ), ) target_size = (1, 4) segmentation = fature_extractor.post_process_segmentation( outputs, target_size=target_size) self.assertEqual( segmentation.shape, (self.feature_extract_tester.batch_size, self.feature_extract_tester.num_classes, *target_size), ) def test_post_process_semantic_segmentation(self): fature_extractor = self.feature_extraction_class() outputs = self.feature_extract_tester.get_fake_maskformer_outputs() segmentation = fature_extractor.post_process_semantic_segmentation( outputs) self.assertEqual( segmentation.shape, ( self.feature_extract_tester.batch_size, self.feature_extract_tester.height, self.feature_extract_tester.width, ), ) target_size = (1, 4) segmentation = fature_extractor.post_process_semantic_segmentation( outputs, target_size=target_size) self.assertEqual( segmentation.shape, (self.feature_extract_tester.batch_size, *target_size)) def test_post_process_panoptic_segmentation(self): fature_extractor = self.feature_extraction_class() outputs = self.feature_extract_tester.get_fake_maskformer_outputs() segmentation = fature_extractor.post_process_panoptic_segmentation( outputs, object_mask_threshold=0) self.assertTrue( len(segmentation) == self.feature_extract_tester.batch_size) for el in segmentation: self.assertTrue("segmentation" in el) self.assertTrue("segments" in el) self.assertEqual(type(el["segments"]), list) self.assertEqual(el["segmentation"].shape, (self.feature_extract_tester.height, self.feature_extract_tester.width))
def default_feature_extractor(self): return ImageGPTFeatureExtractor.from_pretrained( "openai/imagegpt-small") if is_vision_available() else None
class DetrFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase): feature_extraction_class = DetrFeatureExtractor if is_vision_available() else None def setUp(self): self.feature_extract_tester = DetrFeatureExtractionTester(self) @property def feat_extract_dict(self): return self.feature_extract_tester.prepare_feat_extract_dict() def test_feat_extract_properties(self): feature_extractor = self.feature_extraction_class(**self.feat_extract_dict) self.assertTrue(hasattr(feature_extractor, "image_mean")) self.assertTrue(hasattr(feature_extractor, "image_std")) self.assertTrue(hasattr(feature_extractor, "do_normalize")) self.assertTrue(hasattr(feature_extractor, "do_resize")) self.assertTrue(hasattr(feature_extractor, "size")) self.assertTrue(hasattr(feature_extractor, "max_size")) def test_batch_feature(self): pass def test_call_pil(self): # Initialize feature_extractor feature_extractor = self.feature_extraction_class(**self.feat_extract_dict) # create random PIL images image_inputs = prepare_image_inputs(self.feature_extract_tester, equal_resolution=False) for image in image_inputs: self.assertIsInstance(image, Image.Image) # Test not batched input encoded_images = feature_extractor(image_inputs[0], return_tensors="pt").pixel_values expected_height, expected_width = self.feature_extract_tester.get_expected_values(image_inputs) self.assertEqual( encoded_images.shape, (1, self.feature_extract_tester.num_channels, expected_height, expected_width), ) # Test batched expected_height, expected_width = self.feature_extract_tester.get_expected_values(image_inputs, batched=True) encoded_images = feature_extractor(image_inputs, return_tensors="pt").pixel_values self.assertEqual( encoded_images.shape, ( self.feature_extract_tester.batch_size, self.feature_extract_tester.num_channels, expected_height, expected_width, ), ) def test_call_numpy(self): # Initialize feature_extractor feature_extractor = self.feature_extraction_class(**self.feat_extract_dict) # create random numpy tensors image_inputs = prepare_image_inputs(self.feature_extract_tester, equal_resolution=False, numpify=True) for image in image_inputs: self.assertIsInstance(image, np.ndarray) # Test not batched input encoded_images = feature_extractor(image_inputs[0], return_tensors="pt").pixel_values expected_height, expected_width = self.feature_extract_tester.get_expected_values(image_inputs) self.assertEqual( encoded_images.shape, (1, self.feature_extract_tester.num_channels, expected_height, expected_width), ) # Test batched encoded_images = feature_extractor(image_inputs, return_tensors="pt").pixel_values expected_height, expected_width = self.feature_extract_tester.get_expected_values(image_inputs, batched=True) self.assertEqual( encoded_images.shape, ( self.feature_extract_tester.batch_size, self.feature_extract_tester.num_channels, expected_height, expected_width, ), ) def test_call_pytorch(self): # Initialize feature_extractor feature_extractor = self.feature_extraction_class(**self.feat_extract_dict) # create random PyTorch tensors image_inputs = prepare_image_inputs(self.feature_extract_tester, equal_resolution=False, torchify=True) for image in image_inputs: self.assertIsInstance(image, torch.Tensor) # Test not batched input encoded_images = feature_extractor(image_inputs[0], return_tensors="pt").pixel_values expected_height, expected_width = self.feature_extract_tester.get_expected_values(image_inputs) self.assertEqual( encoded_images.shape, (1, self.feature_extract_tester.num_channels, expected_height, expected_width), ) # Test batched encoded_images = feature_extractor(image_inputs, return_tensors="pt").pixel_values expected_height, expected_width = self.feature_extract_tester.get_expected_values(image_inputs, batched=True) self.assertEqual( encoded_images.shape, ( self.feature_extract_tester.batch_size, self.feature_extract_tester.num_channels, expected_height, expected_width, ), ) def test_equivalence_pad_and_create_pixel_mask(self): # Initialize feature_extractors feature_extractor_1 = self.feature_extraction_class(**self.feat_extract_dict) feature_extractor_2 = self.feature_extraction_class(do_resize=False, do_normalize=False) # create random PyTorch tensors image_inputs = prepare_image_inputs(self.feature_extract_tester, equal_resolution=False, torchify=True) for image in image_inputs: self.assertIsInstance(image, torch.Tensor) # Test whether the method "pad_and_return_pixel_mask" and calling the feature extractor return the same tensors encoded_images_with_method = feature_extractor_1.pad_and_create_pixel_mask(image_inputs, return_tensors="pt") encoded_images = feature_extractor_2(image_inputs, return_tensors="pt") assert torch.allclose(encoded_images_with_method["pixel_values"], encoded_images["pixel_values"], atol=1e-4) assert torch.allclose(encoded_images_with_method["pixel_mask"], encoded_images["pixel_mask"], atol=1e-4) @slow def test_call_pytorch_with_coco_detection_annotations(self): # prepare image and target image = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png") with open("./tests/fixtures/tests_samples/COCO/coco_annotations.txt", "r") as f: target = json.loads(f.read()) target = {"image_id": 39769, "annotations": target} # encode them feature_extractor = DetrFeatureExtractor.from_pretrained("facebook/detr-resnet-50") encoding = feature_extractor(images=image, annotations=target, return_tensors="pt") # verify pixel values expected_shape = torch.Size([1, 3, 800, 1066]) self.assertEqual(encoding["pixel_values"].shape, expected_shape) expected_slice = torch.tensor([0.2796, 0.3138, 0.3481]) assert torch.allclose(encoding["pixel_values"][0, 0, 0, :3], expected_slice, atol=1e-4) # verify area expected_area = torch.tensor([5887.9600, 11250.2061, 489353.8438, 837122.7500, 147967.5156, 165732.3438]) assert torch.allclose(encoding["labels"][0]["area"], expected_area) # verify boxes expected_boxes_shape = torch.Size([6, 4]) self.assertEqual(encoding["labels"][0]["boxes"].shape, expected_boxes_shape) expected_boxes_slice = torch.tensor([0.5503, 0.2765, 0.0604, 0.2215]) assert torch.allclose(encoding["labels"][0]["boxes"][0], expected_boxes_slice, atol=1e-3) # verify image_id expected_image_id = torch.tensor([39769]) assert torch.allclose(encoding["labels"][0]["image_id"], expected_image_id) # verify is_crowd expected_is_crowd = torch.tensor([0, 0, 0, 0, 0, 0]) assert torch.allclose(encoding["labels"][0]["iscrowd"], expected_is_crowd) # verify class_labels expected_class_labels = torch.tensor([75, 75, 63, 65, 17, 17]) assert torch.allclose(encoding["labels"][0]["class_labels"], expected_class_labels) # verify orig_size expected_orig_size = torch.tensor([480, 640]) assert torch.allclose(encoding["labels"][0]["orig_size"], expected_orig_size) # verify size expected_size = torch.tensor([800, 1066]) assert torch.allclose(encoding["labels"][0]["size"], expected_size) @slow def test_call_pytorch_with_coco_panoptic_annotations(self): # prepare image, target and masks_path image = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png") with open("./tests/fixtures/tests_samples/COCO/coco_panoptic_annotations.txt", "r") as f: target = json.loads(f.read()) target = {"file_name": "000000039769.png", "image_id": 39769, "segments_info": target} masks_path = pathlib.Path("./tests/fixtures/tests_samples/COCO/coco_panoptic") # encode them # TODO replace by .from_pretrained facebook/detr-resnet-50-panoptic feature_extractor = DetrFeatureExtractor(format="coco_panoptic") encoding = feature_extractor(images=image, annotations=target, masks_path=masks_path, return_tensors="pt") # verify pixel values expected_shape = torch.Size([1, 3, 800, 1066]) self.assertEqual(encoding["pixel_values"].shape, expected_shape) expected_slice = torch.tensor([0.2796, 0.3138, 0.3481]) assert torch.allclose(encoding["pixel_values"][0, 0, 0, :3], expected_slice, atol=1e-4) # verify area expected_area = torch.tensor([147979.6875, 165527.0469, 484638.5938, 11292.9375, 5879.6562, 7634.1147]) assert torch.allclose(encoding["labels"][0]["area"], expected_area) # verify boxes expected_boxes_shape = torch.Size([6, 4]) self.assertEqual(encoding["labels"][0]["boxes"].shape, expected_boxes_shape) expected_boxes_slice = torch.tensor([0.2625, 0.5437, 0.4688, 0.8625]) assert torch.allclose(encoding["labels"][0]["boxes"][0], expected_boxes_slice, atol=1e-3) # verify image_id expected_image_id = torch.tensor([39769]) assert torch.allclose(encoding["labels"][0]["image_id"], expected_image_id) # verify is_crowd expected_is_crowd = torch.tensor([0, 0, 0, 0, 0, 0]) assert torch.allclose(encoding["labels"][0]["iscrowd"], expected_is_crowd) # verify class_labels expected_class_labels = torch.tensor([17, 17, 63, 75, 75, 93]) assert torch.allclose(encoding["labels"][0]["class_labels"], expected_class_labels) # verify masks expected_masks_sum = 822338 self.assertEqual(encoding["labels"][0]["masks"].sum().item(), expected_masks_sum) # verify orig_size expected_orig_size = torch.tensor([480, 640]) assert torch.allclose(encoding["labels"][0]["orig_size"], expected_orig_size) # verify size expected_size = torch.tensor([800, 1066]) assert torch.allclose(encoding["labels"][0]["size"], expected_size)
class ImageGPTFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase): feature_extraction_class = ImageGPTFeatureExtractor if is_vision_available( ) else None def setUp(self): self.feature_extract_tester = ImageGPTFeatureExtractionTester(self) @property def feat_extract_dict(self): return self.feature_extract_tester.prepare_feat_extract_dict() def test_feat_extract_properties(self): feature_extractor = self.feature_extraction_class( **self.feat_extract_dict) self.assertTrue(hasattr(feature_extractor, "clusters")) self.assertTrue(hasattr(feature_extractor, "do_resize")) self.assertTrue(hasattr(feature_extractor, "size")) self.assertTrue(hasattr(feature_extractor, "do_normalize")) def test_feat_extract_to_json_string(self): feat_extract = self.feature_extraction_class(**self.feat_extract_dict) obj = json.loads(feat_extract.to_json_string()) for key, value in self.feat_extract_dict.items(): if key == "clusters": self.assertTrue(np.array_equal(value, obj[key])) else: self.assertEqual(obj[key], value) def test_feat_extract_to_json_file(self): feat_extract_first = self.feature_extraction_class( **self.feat_extract_dict) with tempfile.TemporaryDirectory() as tmpdirname: json_file_path = os.path.join(tmpdirname, "feat_extract.json") feat_extract_first.to_json_file(json_file_path) feat_extract_second = self.feature_extraction_class.from_json_file( json_file_path).to_dict() feat_extract_first = feat_extract_first.to_dict() for key, value in feat_extract_first.items(): if key == "clusters": self.assertTrue(np.array_equal(value, feat_extract_second[key])) else: self.assertEqual(feat_extract_first[key], value) def test_feat_extract_from_and_save_pretrained(self): feat_extract_first = self.feature_extraction_class( **self.feat_extract_dict) with tempfile.TemporaryDirectory() as tmpdirname: feat_extract_first.save_pretrained(tmpdirname) feat_extract_second = self.feature_extraction_class.from_pretrained( tmpdirname).to_dict() feat_extract_first = feat_extract_first.to_dict() for key, value in feat_extract_first.items(): if key == "clusters": self.assertTrue(np.array_equal(value, feat_extract_second[key])) else: self.assertEqual(feat_extract_first[key], value) @unittest.skip("ImageGPT requires clusters at initialization") def test_init_without_params(self): pass
def default_processor(self): return TrOCRProcessor.from_pretrained( "microsoft/trocr-base-handwritten") if is_vision_available( ) else None
def default_feature_extractor(self): return AutoFeatureExtractor.from_pretrained( "hustvl/yolos-small") if is_vision_available() else None
def default_feature_extractor(self): return LayoutLMv3FeatureExtractor( apply_ocr=False) if is_vision_available() else None
def default_feature_extractor(self): return (AutoFeatureExtractor.from_pretrained( "microsoft/swin-tiny-patch4-window7-224") if is_vision_available() else None)
def default_feature_extractor(self): return (ConvNextFeatureExtractor.from_pretrained( "facebook/convnext-tiny-224") if is_vision_available() else None)
class SegformerFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase): feature_extraction_class = SegformerFeatureExtractor if is_vision_available( ) else None def setUp(self): self.feature_extract_tester = SegformerFeatureExtractionTester(self) @property def feat_extract_dict(self): return self.feature_extract_tester.prepare_feat_extract_dict() def test_feat_extract_properties(self): feature_extractor = self.feature_extraction_class( **self.feat_extract_dict) self.assertTrue(hasattr(feature_extractor, "do_resize")) self.assertTrue(hasattr(feature_extractor, "size")) self.assertTrue(hasattr(feature_extractor, "do_normalize")) self.assertTrue(hasattr(feature_extractor, "image_mean")) self.assertTrue(hasattr(feature_extractor, "image_std")) self.assertTrue(hasattr(feature_extractor, "reduce_labels")) def test_batch_feature(self): pass def test_call_pil(self): # Initialize feature_extractor feature_extractor = self.feature_extraction_class( **self.feat_extract_dict) # create random PIL images image_inputs = prepare_image_inputs(self.feature_extract_tester, equal_resolution=False) for image in image_inputs: self.assertIsInstance(image, Image.Image) # Test not batched input encoded_images = feature_extractor(image_inputs[0], return_tensors="pt").pixel_values self.assertEqual( encoded_images.shape, ( 1, self.feature_extract_tester.num_channels, self.feature_extract_tester.size, self.feature_extract_tester.size, ), ) # Test batched encoded_images = feature_extractor(image_inputs, return_tensors="pt").pixel_values self.assertEqual( encoded_images.shape, ( self.feature_extract_tester.batch_size, self.feature_extract_tester.num_channels, self.feature_extract_tester.size, self.feature_extract_tester.size, ), ) def test_call_numpy(self): # Initialize feature_extractor feature_extractor = self.feature_extraction_class( **self.feat_extract_dict) # create random numpy tensors image_inputs = prepare_image_inputs(self.feature_extract_tester, equal_resolution=False, numpify=True) for image in image_inputs: self.assertIsInstance(image, np.ndarray) # Test not batched input encoded_images = feature_extractor(image_inputs[0], return_tensors="pt").pixel_values self.assertEqual( encoded_images.shape, ( 1, self.feature_extract_tester.num_channels, self.feature_extract_tester.size, self.feature_extract_tester.size, ), ) # Test batched encoded_images = feature_extractor(image_inputs, return_tensors="pt").pixel_values self.assertEqual( encoded_images.shape, ( self.feature_extract_tester.batch_size, self.feature_extract_tester.num_channels, self.feature_extract_tester.size, self.feature_extract_tester.size, ), ) def test_call_pytorch(self): # Initialize feature_extractor feature_extractor = self.feature_extraction_class( **self.feat_extract_dict) # create random PyTorch tensors image_inputs = prepare_image_inputs(self.feature_extract_tester, equal_resolution=False, torchify=True) for image in image_inputs: self.assertIsInstance(image, torch.Tensor) # Test not batched input encoded_images = feature_extractor(image_inputs[0], return_tensors="pt").pixel_values self.assertEqual( encoded_images.shape, ( 1, self.feature_extract_tester.num_channels, self.feature_extract_tester.size, self.feature_extract_tester.size, ), ) # Test batched encoded_images = feature_extractor(image_inputs, return_tensors="pt").pixel_values self.assertEqual( encoded_images.shape, ( self.feature_extract_tester.batch_size, self.feature_extract_tester.num_channels, self.feature_extract_tester.size, self.feature_extract_tester.size, ), ) def test_call_segmentation_maps(self): # Initialize feature_extractor feature_extractor = self.feature_extraction_class( **self.feat_extract_dict) # create random PyTorch tensors image_inputs = prepare_image_inputs(self.feature_extract_tester, equal_resolution=False, torchify=True) maps = [] for image in image_inputs: self.assertIsInstance(image, torch.Tensor) maps.append(torch.zeros(image.shape[-2:]).long()) # Test not batched input encoding = feature_extractor(image_inputs[0], maps[0], return_tensors="pt") self.assertEqual( encoding["pixel_values"].shape, ( 1, self.feature_extract_tester.num_channels, self.feature_extract_tester.size, self.feature_extract_tester.size, ), ) self.assertEqual( encoding["labels"].shape, ( 1, self.feature_extract_tester.size, self.feature_extract_tester.size, ), ) self.assertEqual(encoding["labels"].dtype, torch.long) self.assertTrue(encoding["labels"].min().item() >= 0) self.assertTrue(encoding["labels"].max().item() <= 255) # Test batched encoding = feature_extractor(image_inputs, maps, return_tensors="pt") self.assertEqual( encoding["pixel_values"].shape, ( self.feature_extract_tester.batch_size, self.feature_extract_tester.num_channels, self.feature_extract_tester.size, self.feature_extract_tester.size, ), ) self.assertEqual( encoding["labels"].shape, ( self.feature_extract_tester.batch_size, self.feature_extract_tester.size, self.feature_extract_tester.size, ), ) self.assertEqual(encoding["labels"].dtype, torch.long) self.assertTrue(encoding["labels"].min().item() >= 0) self.assertTrue(encoding["labels"].max().item() <= 255) # Test not batched input (PIL images) image, segmentation_map = prepare_semantic_single_inputs() encoding = feature_extractor(image, segmentation_map, return_tensors="pt") self.assertEqual( encoding["pixel_values"].shape, ( 1, self.feature_extract_tester.num_channels, self.feature_extract_tester.size, self.feature_extract_tester.size, ), ) self.assertEqual( encoding["labels"].shape, ( 1, self.feature_extract_tester.size, self.feature_extract_tester.size, ), ) self.assertEqual(encoding["labels"].dtype, torch.long) self.assertTrue(encoding["labels"].min().item() >= 0) self.assertTrue(encoding["labels"].max().item() <= 255) # Test batched input (PIL images) images, segmentation_maps = prepare_semantic_batch_inputs() encoding = feature_extractor(images, segmentation_maps, return_tensors="pt") self.assertEqual( encoding["pixel_values"].shape, ( 2, self.feature_extract_tester.num_channels, self.feature_extract_tester.size, self.feature_extract_tester.size, ), ) self.assertEqual( encoding["labels"].shape, ( 2, self.feature_extract_tester.size, self.feature_extract_tester.size, ), ) self.assertEqual(encoding["labels"].dtype, torch.long) self.assertTrue(encoding["labels"].min().item() >= 0) self.assertTrue(encoding["labels"].max().item() <= 255) def test_reduce_labels(self): # Initialize feature_extractor feature_extractor = self.feature_extraction_class( **self.feat_extract_dict) # ADE20k has 150 classes, and the background is included, so labels should be between 0 and 150 image, map = prepare_semantic_single_inputs() encoding = feature_extractor(image, map, return_tensors="pt") self.assertTrue(encoding["labels"].min().item() >= 0) self.assertTrue(encoding["labels"].max().item() <= 150) feature_extractor.reduce_labels = True encoding = feature_extractor(image, map, return_tensors="pt") self.assertTrue(encoding["labels"].min().item() >= 0) self.assertTrue(encoding["labels"].max().item() <= 255)
class CLIPFeatureExtractionTestFourChannels(FeatureExtractionSavingTestMixin, unittest.TestCase): feature_extraction_class = CLIPFeatureExtractor if is_vision_available( ) else None def setUp(self): self.feature_extract_tester = CLIPFeatureExtractionTester( self, num_channels=4) self.expected_encoded_image_num_channels = 3 @property def feat_extract_dict(self): return self.feature_extract_tester.prepare_feat_extract_dict() def test_feat_extract_properties(self): feature_extractor = self.feature_extraction_class( **self.feat_extract_dict) self.assertTrue(hasattr(feature_extractor, "do_resize")) self.assertTrue(hasattr(feature_extractor, "size")) self.assertTrue(hasattr(feature_extractor, "do_center_crop")) self.assertTrue(hasattr(feature_extractor, "center_crop")) self.assertTrue(hasattr(feature_extractor, "do_normalize")) self.assertTrue(hasattr(feature_extractor, "image_mean")) self.assertTrue(hasattr(feature_extractor, "image_std")) self.assertTrue(hasattr(feature_extractor, "do_convert_rgb")) def test_batch_feature(self): pass def test_call_pil_four_channels(self): # Initialize feature_extractor feature_extractor = self.feature_extraction_class( **self.feat_extract_dict) # create random PIL images image_inputs = self.feature_extract_tester.prepare_inputs( equal_resolution=False) for image in image_inputs: self.assertIsInstance(image, Image.Image) # Test not batched input encoded_images = feature_extractor(image_inputs[0], return_tensors="pt").pixel_values self.assertEqual( encoded_images.shape, ( 1, self.expected_encoded_image_num_channels, self.feature_extract_tester.crop_size, self.feature_extract_tester.crop_size, ), ) # Test batched encoded_images = feature_extractor(image_inputs, return_tensors="pt").pixel_values self.assertEqual( encoded_images.shape, ( self.feature_extract_tester.batch_size, self.expected_encoded_image_num_channels, self.feature_extract_tester.crop_size, self.feature_extract_tester.crop_size, ), )
def default_feature_extractor(self): return (AutoFeatureExtractor.from_pretrained( RESNET_PRETRAINED_MODEL_ARCHIVE_LIST[0]) if is_vision_available() else None)
class ViltFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase): feature_extraction_class = ViltFeatureExtractor if is_vision_available( ) else None def setUp(self): self.feature_extract_tester = ViltFeatureExtractionTester(self) @property def feat_extract_dict(self): return self.feature_extract_tester.prepare_feat_extract_dict() def test_feat_extract_properties(self): feature_extractor = self.feature_extraction_class( **self.feat_extract_dict) self.assertTrue(hasattr(feature_extractor, "image_mean")) self.assertTrue(hasattr(feature_extractor, "image_std")) self.assertTrue(hasattr(feature_extractor, "do_normalize")) self.assertTrue(hasattr(feature_extractor, "do_resize")) self.assertTrue(hasattr(feature_extractor, "size")) self.assertTrue(hasattr(feature_extractor, "size_divisor")) def test_batch_feature(self): pass def test_call_pil(self): # Initialize feature_extractor feature_extractor = self.feature_extraction_class( **self.feat_extract_dict) # create random PIL images image_inputs = prepare_image_inputs(self.feature_extract_tester, equal_resolution=False) for image in image_inputs: self.assertIsInstance(image, Image.Image) # Test not batched input encoded_images = feature_extractor(image_inputs[0], return_tensors="pt").pixel_values expected_height, expected_width = self.feature_extract_tester.get_expected_values( image_inputs) self.assertEqual( encoded_images.shape, (1, self.feature_extract_tester.num_channels, expected_height, expected_width), ) # Test batched encoded_images = feature_extractor(image_inputs, return_tensors="pt").pixel_values expected_height, expected_width = self.feature_extract_tester.get_expected_values( image_inputs, batched=True) self.assertEqual( encoded_images.shape, ( self.feature_extract_tester.batch_size, self.feature_extract_tester.num_channels, expected_height, expected_width, ), ) def test_call_numpy(self): # Initialize feature_extractor feature_extractor = self.feature_extraction_class( **self.feat_extract_dict) # create random numpy tensors image_inputs = prepare_image_inputs(self.feature_extract_tester, equal_resolution=False, numpify=True) for image in image_inputs: self.assertIsInstance(image, np.ndarray) # Test not batched input encoded_images = feature_extractor(image_inputs[0], return_tensors="pt").pixel_values expected_height, expected_width = self.feature_extract_tester.get_expected_values( image_inputs) self.assertEqual( encoded_images.shape, (1, self.feature_extract_tester.num_channels, expected_height, expected_width), ) # Test batched encoded_images = feature_extractor(image_inputs, return_tensors="pt").pixel_values expected_height, expected_width = self.feature_extract_tester.get_expected_values( image_inputs, batched=True) self.assertEqual( encoded_images.shape, ( self.feature_extract_tester.batch_size, self.feature_extract_tester.num_channels, expected_height, expected_width, ), ) def test_call_pytorch(self): # Initialize feature_extractor feature_extractor = self.feature_extraction_class( **self.feat_extract_dict) # create random PyTorch tensors image_inputs = prepare_image_inputs(self.feature_extract_tester, equal_resolution=False, torchify=True) for image in image_inputs: self.assertIsInstance(image, torch.Tensor) # Test not batched input encoded_images = feature_extractor(image_inputs[0], return_tensors="pt").pixel_values expected_height, expected_width = self.feature_extract_tester.get_expected_values( image_inputs) self.assertEqual( encoded_images.shape, (1, self.feature_extract_tester.num_channels, expected_height, expected_width), ) # Test batched encoded_images = feature_extractor(image_inputs, return_tensors="pt").pixel_values expected_height, expected_width = self.feature_extract_tester.get_expected_values( image_inputs, batched=True) self.assertEqual( encoded_images.shape, ( self.feature_extract_tester.batch_size, self.feature_extract_tester.num_channels, expected_height, expected_width, ), ) def test_equivalence_pad_and_create_pixel_mask(self): # Initialize feature_extractors feature_extractor_1 = self.feature_extraction_class( **self.feat_extract_dict) feature_extractor_2 = self.feature_extraction_class(do_resize=False, do_normalize=False) # create random PyTorch tensors image_inputs = prepare_image_inputs(self.feature_extract_tester, equal_resolution=False, torchify=True) for image in image_inputs: self.assertIsInstance(image, torch.Tensor) # Test whether the method "pad_and_return_pixel_mask" and calling the feature extractor return the same tensors encoded_images_with_method = feature_extractor_1.pad_and_create_pixel_mask( image_inputs, return_tensors="pt") encoded_images = feature_extractor_2(image_inputs, return_tensors="pt") self.assertTrue( torch.allclose(encoded_images_with_method["pixel_values"], encoded_images["pixel_values"], atol=1e-4)) self.assertTrue( torch.allclose(encoded_images_with_method["pixel_mask"], encoded_images["pixel_mask"], atol=1e-4))
def default_feature_extractor(self): return (BeitFeatureExtractor.from_pretrained( "facebook/data2vec-vision-base-ft1k") if is_vision_available() else None)
def default_feature_extractor(self): return ( DeiTFeatureExtractor.from_pretrained("facebook/deit-base-distilled-patch16-224") if is_vision_available() else None )
def default_feature_extractor(self): return ViTFeatureExtractor.from_pretrained( "facebook/vit-mae-base") if is_vision_available() else None
def default_feature_extractor(self): # logits were tested with a different mean and std, so we use the same here return (VideoMAEFeatureExtractor(image_mean=[0.5, 0.5, 0.5], image_std=[0.5, 0.5, 0.5]) if is_vision_available() else None)