def test_register_pipeline(self): PIPELINE_REGISTRY.register_pipeline( "custom-text-classification", pipeline_class=PairClassificationPipeline, pt_model=AutoModelForSequenceClassification if is_torch_available() else None, tf_model=TFAutoModelForSequenceClassification if is_tf_available() else None, default={"pt": "hf-internal-testing/tiny-random-distilbert"}, type="text", ) assert "custom-text-classification" in PIPELINE_REGISTRY.get_supported_tasks( ) _, task_def, _ = PIPELINE_REGISTRY.check_task( "custom-text-classification") self.assertEqual( task_def["pt"], (AutoModelForSequenceClassification, ) if is_torch_available() else ()) self.assertEqual( task_def["tf"], (TFAutoModelForSequenceClassification, ) if is_tf_available() else ()) self.assertEqual(task_def["type"], "text") self.assertEqual(task_def["impl"], PairClassificationPipeline) self.assertEqual( task_def["default"], {"model": { "pt": "hf-internal-testing/tiny-random-distilbert" }}) # Clean registry for next tests. del PIPELINE_REGISTRY.supported_tasks["custom-text-classification"]
def test_find_labels(self): if is_torch_available(): from transformers import BertForPreTraining, BertForQuestionAnswering, BertForSequenceClassification self.assertEqual(find_labels(BertForSequenceClassification), ["labels"]) self.assertEqual(find_labels(BertForPreTraining), ["labels", "next_sentence_label"]) self.assertEqual(find_labels(BertForQuestionAnswering), ["start_positions", "end_positions"]) if is_tf_available(): from transformers import TFBertForPreTraining, TFBertForQuestionAnswering, TFBertForSequenceClassification self.assertEqual(find_labels(TFBertForSequenceClassification), ["labels"]) self.assertEqual(find_labels(TFBertForPreTraining), ["labels", "next_sentence_label"]) self.assertEqual(find_labels(TFBertForQuestionAnswering), ["start_positions", "end_positions"]) if is_flax_available(): # Flax models don't have labels from transformers import ( FlaxBertForPreTraining, FlaxBertForQuestionAnswering, FlaxBertForSequenceClassification, ) self.assertEqual(find_labels(FlaxBertForSequenceClassification), []) self.assertEqual(find_labels(FlaxBertForPreTraining), []) self.assertEqual(find_labels(FlaxBertForQuestionAnswering), [])
def convert_pytorch(nlp: Pipeline, opset: int, output: Path, use_external_format: bool): """ Export a PyTorch backed pipeline to ONNX Intermediate Representation (IR Args: nlp: The pipeline to be exported opset: The actual version of the ONNX operator set to use output: Path where will be stored the generated ONNX model use_external_format: Split the model definition from its parameters to allow model bigger than 2GB Returns: """ if not is_torch_available(): raise Exception( "Cannot convert because PyTorch is not installed. Please install torch first." ) import torch from torch.onnx import export from .pytorch_utils import is_torch_less_than_1_11 print(f"Using framework PyTorch: {torch.__version__}") with torch.no_grad(): input_names, output_names, dynamic_axes, tokens = infer_shapes( nlp, "pt") ordered_input_names, model_args = ensure_valid_input( nlp.model, tokens, input_names) # PyTorch deprecated the `enable_onnx_checker` and `use_external_data_format` arguments in v1.11, # so we check the torch version for backwards compatibility if is_torch_less_than_1_11: export( nlp.model, model_args, f=output.as_posix(), input_names=ordered_input_names, output_names=output_names, dynamic_axes=dynamic_axes, do_constant_folding=True, use_external_data_format=use_external_format, enable_onnx_checker=True, opset_version=opset, ) else: export( nlp.model, model_args, f=output.as_posix(), input_names=ordered_input_names, output_names=output_names, dynamic_axes=dynamic_axes, do_constant_folding=True, opset_version=opset, )
def require_retrieval(test_case): """ Decorator marking a test that requires a set of dependencies necessary for pefrorm retrieval with [`RagRetriever`]. These tests are skipped when respective libraries are not installed. """ if not (is_torch_available() and is_datasets_available() and is_faiss_available()): test_case = unittest.skip("test requires PyTorch, datasets and faiss")(test_case) return test_case
class OwlViTTextModelTest(ModelTesterMixin, unittest.TestCase): all_model_classes = (OwlViTTextModel, ) if is_torch_available() else () fx_compatible = False test_pruning = False test_head_masking = False def setUp(self): self.model_tester = OwlViTTextModelTester(self) self.config_tester = ConfigTester(self, config_class=OwlViTTextConfig, hidden_size=37) def test_config(self): self.config_tester.run_common_tests() def test_model(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_model(*config_and_inputs) @unittest.skip(reason="OWL-ViT does not support training yet") def test_training(self): pass @unittest.skip(reason="OWL-ViT does not support training yet") def test_training_gradient_checkpointing(self): pass @unittest.skip(reason="OWLVIT does not use inputs_embeds") def test_inputs_embeds(self): pass @unittest.skip( reason= "OwlViTTextModel has no base class and is not available in MODEL_MAPPING" ) def test_save_load_fast_init_from_base(self): pass @unittest.skip( reason= "OwlViTTextModel has no base class and is not available in MODEL_MAPPING" ) def test_save_load_fast_init_to_base(self): pass @slow def test_model_from_pretrained(self): for model_name in OWLVIT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: model = OwlViTTextModel.from_pretrained(model_name) self.assertIsNotNone(model)
class FlavaTextModelTest(ModelTesterMixin, unittest.TestCase): all_model_classes = (FlavaTextModel, ) if is_torch_available() else () test_pruning = False test_head_masking = False test_torchscript = False def setUp(self): self.model_tester = FlavaTextModelTester(self) self.config_tester = ConfigTester(self, config_class=FlavaTextConfig, hidden_size=37) def test_config(self): self.config_tester.run_common_tests() def test_model(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_model(*config_and_inputs) def test_training(self): pass def test_training_gradient_checkpointing(self): pass def test_inputs_embeds(self): # FLAVA does not use inputs_embeds pass # skip this test as FlavaTextModel has no base class and is # not available in MODEL_MAPPING def test_save_load_fast_init_from_base(self): pass # skip this test as FlavaTextModel has no base class and is # not available in MODEL_MAPPING def test_save_load_fast_init_to_base(self): pass @slow def test_model_from_pretrained(self): for model_name in FLAVA_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: model = FlavaTextModel.from_pretrained(model_name) self.assertIsNotNone(model)
def load_graph_from_args(pipeline_name: str, framework: str, model: str, tokenizer: Optional[str] = None, **models_kwargs) -> Pipeline: """ Convert the set of arguments provided through the CLI to an actual pipeline reference (tokenizer + model Args: pipeline_name: The kind of pipeline to use (ner, question-answering, etc.) framework: The actual model to convert the pipeline from ("pt" or "tf") model: The model name which will be loaded by the pipeline tokenizer: The tokenizer name which will be loaded by the pipeline, default to the model's value Returns: Pipeline object """ # If no tokenizer provided if tokenizer is None: tokenizer = model # Check the wanted framework is available if framework == "pt" and not is_torch_available(): raise Exception( "Cannot convert because PyTorch is not installed. Please install torch first." ) if framework == "tf" and not is_tf_available(): raise Exception( "Cannot convert because TF is not installed. Please install tensorflow first." ) print(f"Loading pipeline (model: {model}, tokenizer: {tokenizer})") # Allocate tokenizer and model return pipeline(pipeline_name, model=model, tokenizer=tokenizer, framework=framework, model_kwargs=models_kwargs)
class GroupViTModelTest(ModelTesterMixin, unittest.TestCase): all_model_classes = (GroupViTModel, ) if is_torch_available() else () test_head_masking = False test_pruning = False test_resize_embeddings = False test_attention_outputs = False def setUp(self): self.model_tester = GroupViTModelTester(self) def test_model(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_model(*config_and_inputs) @unittest.skip(reason="hidden_states are tested in individual model tests") def test_hidden_states_output(self): pass @unittest.skip(reason="input_embeds are tested in individual model tests") def test_inputs_embeds(self): pass @unittest.skip(reason="tested in individual model tests") def test_retain_grad_hidden_states_attentions(self): pass @unittest.skip(reason="GroupViTModel does not have input/output embeddings" ) def test_model_common_attributes(self): pass # override as the `logit_scale` parameter initilization is different for GROUPVIT def test_initialization(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common( ) configs_no_init = _config_zero_init(config) for model_class in self.all_model_classes: model = model_class(config=configs_no_init) for name, param in model.named_parameters(): if param.requires_grad: # check if `logit_scale` is initilized as per the original implementation if name == "logit_scale": self.assertAlmostEqual( param.data.item(), np.log(1 / 0.07), delta=1e-3, msg= f"Parameter {name} of model {model_class} seems not properly initialized", ) else: self.assertIn( ((param.data.mean() * 1e9).round() / 1e9).item(), [0.0, 1.0], msg= f"Parameter {name} of model {model_class} seems not properly initialized", ) def _create_and_check_torchscript(self, config, inputs_dict): if not self.test_torchscript: return configs_no_init = _config_zero_init( config) # To be sure we have no Nan configs_no_init.torchscript = True configs_no_init.return_dict = False for model_class in self.all_model_classes: model = model_class(config=configs_no_init) model.to(torch_device) model.eval() try: input_ids = inputs_dict["input_ids"] pixel_values = inputs_dict[ "pixel_values"] # GROUPVIT needs pixel_values traced_model = torch.jit.trace(model, (input_ids, pixel_values)) except RuntimeError: self.fail("Couldn't trace module.") with tempfile.TemporaryDirectory() as tmp_dir_name: pt_file_name = os.path.join(tmp_dir_name, "traced_model.pt") try: torch.jit.save(traced_model, pt_file_name) except Exception: self.fail("Couldn't save module.") try: loaded_model = torch.jit.load(pt_file_name) except Exception: self.fail("Couldn't load module.") model.to(torch_device) model.eval() loaded_model.to(torch_device) loaded_model.eval() model_state_dict = model.state_dict() loaded_model_state_dict = loaded_model.state_dict() self.assertEqual(set(model_state_dict.keys()), set(loaded_model_state_dict.keys())) models_equal = True for layer_name, p1 in model_state_dict.items(): p2 = loaded_model_state_dict[layer_name] if p1.data.ne(p2.data).sum() > 0: models_equal = False self.assertTrue(models_equal) def test_load_vision_text_config(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common( ) # Save GroupViTConfig and check if we can load GroupViTVisionConfig from it with tempfile.TemporaryDirectory() as tmp_dir_name: config.save_pretrained(tmp_dir_name) vision_config = GroupViTVisionConfig.from_pretrained(tmp_dir_name) self.assertDictEqual(config.vision_config.to_dict(), vision_config.to_dict()) # Save GroupViTConfig and check if we can load GroupViTTextConfig from it with tempfile.TemporaryDirectory() as tmp_dir_name: config.save_pretrained(tmp_dir_name) text_config = GroupViTTextConfig.from_pretrained(tmp_dir_name) self.assertDictEqual(config.text_config.to_dict(), text_config.to_dict()) @slow def test_model_from_pretrained(self): for model_name in GROUPVIT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: model = GroupViTModel.from_pretrained(model_name) self.assertIsNotNone(model)
class ResNetModelTest(ModelTesterMixin, unittest.TestCase): """ Here we also overwrite some of the tests of test_modeling_common.py, as ResNet does not use input_ids, inputs_embeds, attention_mask and seq_length. """ all_model_classes = ( ResNetModel, ResNetForImageClassification) if is_torch_available() else () test_pruning = False test_resize_embeddings = False test_head_masking = False has_attentions = False def setUp(self): self.model_tester = ResNetModelTester(self) self.config_tester = ConfigTester(self, config_class=ResNetConfig, has_text_modality=False) def test_config(self): self.create_and_test_config_common_properties() self.config_tester.create_and_test_config_to_json_string() self.config_tester.create_and_test_config_to_json_file() self.config_tester.create_and_test_config_from_and_save_pretrained() self.config_tester.create_and_test_config_with_num_labels() self.config_tester.check_config_can_be_init_without_params() self.config_tester.check_config_arguments_init() def create_and_test_config_common_properties(self): return @unittest.skip(reason="ResNet does not output attentions") def test_attention_outputs(self): pass @unittest.skip(reason="ResNet does not use inputs_embeds") def test_inputs_embeds(self): pass @unittest.skip(reason="ResNet does not support input and output embeddings" ) def test_model_common_attributes(self): pass def test_forward_signature(self): config, _ = self.model_tester.prepare_config_and_inputs_for_common() for model_class in self.all_model_classes: model = model_class(config) signature = inspect.signature(model.forward) # signature.parameters is an OrderedDict => so arg_names order is deterministic arg_names = [*signature.parameters.keys()] expected_arg_names = ["pixel_values"] self.assertListEqual(arg_names[:1], expected_arg_names) def test_model(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_model(*config_and_inputs) def test_initialization(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common( ) for model_class in self.all_model_classes: model = model_class(config=config) for name, module in model.named_modules(): if isinstance(module, (nn.BatchNorm2d, nn.GroupNorm)): self.assertTrue( torch.all(module.weight == 1), msg= f"Parameter {name} of model {model_class} seems not properly initialized", ) self.assertTrue( torch.all(module.bias == 0), msg= f"Parameter {name} of model {model_class} seems not properly initialized", ) def test_hidden_states_output(self): def check_hidden_states_output(inputs_dict, config, model_class): model = model_class(config) model.to(torch_device) model.eval() with torch.no_grad(): outputs = model( **self._prepare_for_class(inputs_dict, model_class)) hidden_states = outputs.encoder_hidden_states if config.is_encoder_decoder else outputs.hidden_states expected_num_stages = self.model_tester.num_stages self.assertEqual(len(hidden_states), expected_num_stages + 1) # ResNet's feature maps are of shape (batch_size, num_channels, height, width) self.assertListEqual( list(hidden_states[0].shape[-2:]), [ self.model_tester.image_size // 4, self.model_tester.image_size // 4 ], ) config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common( ) layers_type = ["basic", "bottleneck"] for model_class in self.all_model_classes: for layer_type in layers_type: config.layer_type = layer_type inputs_dict["output_hidden_states"] = True check_hidden_states_output(inputs_dict, config, model_class) # check that output_hidden_states also work using config del inputs_dict["output_hidden_states"] config.output_hidden_states = True check_hidden_states_output(inputs_dict, config, model_class) def test_for_image_classification(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_for_image_classification( *config_and_inputs) @slow def test_model_from_pretrained(self): for model_name in RESNET_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: model = ResNetModel.from_pretrained(model_name) self.assertIsNotNone(model)
class BeitModelTest(ModelTesterMixin, unittest.TestCase): """ Here we also overwrite some of the tests of test_modeling_common.py, as BEiT does not use input_ids, inputs_embeds, attention_mask and seq_length. """ all_model_classes = ( (BeitModel, BeitForImageClassification, BeitForMaskedImageModeling, BeitForSemanticSegmentation) if is_torch_available() else () ) test_pruning = False test_resize_embeddings = False test_head_masking = False def setUp(self): self.model_tester = BeitModelTester(self) self.config_tester = ConfigTester(self, config_class=BeitConfig, has_text_modality=False, hidden_size=37) def test_config(self): self.config_tester.run_common_tests() def test_inputs_embeds(self): # BEiT does not use inputs_embeds pass def test_model_common_attributes(self): config, _ = self.model_tester.prepare_config_and_inputs_for_common() for model_class in self.all_model_classes: model = model_class(config) self.assertIsInstance(model.get_input_embeddings(), (nn.Module)) x = model.get_output_embeddings() self.assertTrue(x is None or isinstance(x, nn.Linear)) def test_forward_signature(self): config, _ = self.model_tester.prepare_config_and_inputs_for_common() for model_class in self.all_model_classes: model = model_class(config) signature = inspect.signature(model.forward) # signature.parameters is an OrderedDict => so arg_names order is deterministic arg_names = [*signature.parameters.keys()] expected_arg_names = ["pixel_values"] self.assertListEqual(arg_names[:1], expected_arg_names) def test_model(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_model(*config_and_inputs) def test_for_image_segmentation(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_for_image_segmentation(*config_and_inputs) def test_training(self): if not self.model_tester.is_training: return config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() config.return_dict = True for model_class in self.all_model_classes: # we don't test BeitForMaskedImageModeling if model_class in [*get_values(MODEL_MAPPING), BeitForMaskedImageModeling]: continue model = model_class(config) model.to(torch_device) model.train() inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True) loss = model(**inputs).loss loss.backward() def test_training_gradient_checkpointing(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() if not self.model_tester.is_training: return config.use_cache = False config.return_dict = True for model_class in self.all_model_classes: # we don't test BeitForMaskedImageModeling if ( model_class in [*get_values(MODEL_MAPPING), BeitForMaskedImageModeling] or not model_class.supports_gradient_checkpointing ): continue # TODO: remove the following 3 lines once we have a MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING # this can then be incorporated into _prepare_for_class in test_modeling_common.py elif model_class.__name__ == "BeitForSemanticSegmentation": batch_size, num_channels, height, width = inputs_dict["pixel_values"].shape inputs_dict["labels"] = torch.zeros( [self.model_tester.batch_size, height, width], device=torch_device ).long() model = model_class(config) model.gradient_checkpointing_enable() model.to(torch_device) model.train() inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True) loss = model(**inputs).loss loss.backward() def test_initialization(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() configs_no_init = _config_zero_init(config) for model_class in self.all_model_classes: model = model_class(config=configs_no_init) for name, param in model.named_parameters(): # we skip lambda parameters as these require special initial values # determined by config.layer_scale_init_value if "lambda" in name: continue if param.requires_grad: self.assertIn( ((param.data.mean() * 1e9).round() / 1e9).item(), [0.0, 1.0], msg=f"Parameter {name} of model {model_class} seems not properly initialized", ) def test_attention_outputs(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() config.return_dict = True # BEiT has a different seq_length seq_len = self.model_tester.expected_seq_length for model_class in self.all_model_classes: inputs_dict["output_attentions"] = True inputs_dict["output_hidden_states"] = False config.return_dict = True model = model_class(config) model.to(torch_device) model.eval() with torch.no_grad(): outputs = model(**self._prepare_for_class(inputs_dict, model_class)) attentions = outputs.attentions self.assertEqual(len(attentions), self.model_tester.num_hidden_layers) # check that output_attentions also work using config del inputs_dict["output_attentions"] config.output_attentions = True model = model_class(config) model.to(torch_device) model.eval() with torch.no_grad(): outputs = model(**self._prepare_for_class(inputs_dict, model_class)) attentions = outputs.attentions self.assertEqual(len(attentions), self.model_tester.num_hidden_layers) self.assertListEqual( list(attentions[0].shape[-3:]), [self.model_tester.num_attention_heads, seq_len, seq_len], ) out_len = len(outputs) # Check attention is always last and order is fine inputs_dict["output_attentions"] = True inputs_dict["output_hidden_states"] = True model = model_class(config) model.to(torch_device) model.eval() with torch.no_grad(): outputs = model(**self._prepare_for_class(inputs_dict, model_class)) self.assertEqual(out_len + 1, len(outputs)) self_attentions = outputs.attentions self.assertEqual(len(self_attentions), self.model_tester.num_hidden_layers) self.assertListEqual( list(self_attentions[0].shape[-3:]), [self.model_tester.num_attention_heads, seq_len, seq_len], ) def test_hidden_states_output(self): def check_hidden_states_output(inputs_dict, config, model_class): model = model_class(config) model.to(torch_device) model.eval() with torch.no_grad(): outputs = model(**self._prepare_for_class(inputs_dict, model_class)) hidden_states = outputs.hidden_states expected_num_layers = getattr( self.model_tester, "expected_num_hidden_layers", self.model_tester.num_hidden_layers + 1 ) self.assertEqual(len(hidden_states), expected_num_layers) # BEiT has a different seq_length seq_length = self.model_tester.expected_seq_length self.assertListEqual( list(hidden_states[0].shape[-2:]), [seq_length, self.model_tester.hidden_size], ) config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() for model_class in self.all_model_classes: inputs_dict["output_hidden_states"] = True check_hidden_states_output(inputs_dict, config, model_class) # check that output_hidden_states also work using config del inputs_dict["output_hidden_states"] config.output_hidden_states = True check_hidden_states_output(inputs_dict, config, model_class) def test_for_masked_lm(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_for_masked_lm(*config_and_inputs) def test_for_image_classification(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_for_image_classification(*config_and_inputs) @slow def test_model_from_pretrained(self): for model_name in BEIT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: model = BeitModel.from_pretrained(model_name) self.assertIsNotNone(model)
class GroupViTVisionModelTest(ModelTesterMixin, unittest.TestCase): """ Here we also overwrite some of the tests of test_modeling_common.py, as GROUPVIT does not use input_ids, inputs_embeds, attention_mask and seq_length. """ all_model_classes = (GroupViTVisionModel, ) if is_torch_available() else () test_pruning = False test_torchscript = False test_resize_embeddings = False test_head_masking = False def setUp(self): self.model_tester = GroupViTVisionModelTester(self) self.config_tester = ConfigTester(self, config_class=GroupViTVisionConfig, has_text_modality=False, hidden_size=37) def test_config(self): self.config_tester.run_common_tests() @unittest.skip(reason="GroupViT does not use inputs_embeds") def test_inputs_embeds(self): pass def test_model_common_attributes(self): config, _ = self.model_tester.prepare_config_and_inputs_for_common() for model_class in self.all_model_classes: model = model_class(config) self.assertIsInstance(model.get_input_embeddings(), (nn.Module)) x = model.get_output_embeddings() self.assertTrue(x is None or isinstance(x, nn.Linear)) def test_forward_signature(self): config, _ = self.model_tester.prepare_config_and_inputs_for_common() for model_class in self.all_model_classes: model = model_class(config) signature = inspect.signature(model.forward) # signature.parameters is an OrderedDict => so arg_names order is deterministic arg_names = [*signature.parameters.keys()] expected_arg_names = ["pixel_values"] self.assertListEqual(arg_names[:1], expected_arg_names) def test_model(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_model(*config_and_inputs) def test_attention_outputs(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common( ) config.return_dict = True seq_len = getattr(self.model_tester, "seq_length", None) expected_num_attention_outputs = sum( g > 0 for g in self.model_tester.num_group_tokens) for model_class in self.all_model_classes: inputs_dict["output_attentions"] = True inputs_dict["output_hidden_states"] = False config.return_dict = True model = model_class(config) model.to(torch_device) model.eval() with torch.no_grad(): outputs = model( **self._prepare_for_class(inputs_dict, model_class)) attentions = outputs.attentions # GroupViT returns attention grouping of each stage self.assertEqual( len(attentions), sum(g > 0 for g in self.model_tester.num_group_tokens)) # check that output_attentions also work using config del inputs_dict["output_attentions"] config.output_attentions = True model = model_class(config) model.to(torch_device) model.eval() with torch.no_grad(): outputs = model( **self._prepare_for_class(inputs_dict, model_class)) attentions = outputs.attentions # GroupViT returns attention grouping of each stage self.assertEqual(len(attentions), expected_num_attention_outputs) out_len = len(outputs) # Check attention is always last and order is fine inputs_dict["output_attentions"] = True inputs_dict["output_hidden_states"] = True model = model_class(config) model.to(torch_device) model.eval() with torch.no_grad(): outputs = model( **self._prepare_for_class(inputs_dict, model_class)) added_hidden_states = 1 self.assertEqual(out_len + added_hidden_states, len(outputs)) self_attentions = outputs.attentions # GroupViT returns attention grouping of each stage self.assertEqual(len(self_attentions), expected_num_attention_outputs) for i, self_attn in enumerate(self_attentions): if self_attn is None: continue self.assertListEqual( list(self_attentions[i].shape[-2:]), [ self.model_tester.num_output_groups[i], self.model_tester.num_output_groups[i - 1] if i > 0 else seq_len, ], ) def test_training(self): pass def test_training_gradient_checkpointing(self): pass @unittest.skip( reason= "GroupViTVisionModel has no base class and is not available in MODEL_MAPPING" ) def test_save_load_fast_init_from_base(self): pass @unittest.skip( reason= "GroupViTVisionModel has no base class and is not available in MODEL_MAPPING" ) def test_save_load_fast_init_to_base(self): pass # override since the attention mask from GroupViT is not used to compute loss, thus no grad def test_retain_grad_hidden_states_attentions(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common( ) config.output_hidden_states = True config.output_attentions = self.has_attentions # no need to test all models as different heads yield the same functionality model_class = self.all_model_classes[0] model = model_class(config) model.to(torch_device) inputs = self._prepare_for_class(inputs_dict, model_class) outputs = model(**inputs) output = outputs[0] if config.is_encoder_decoder: # Seq2Seq models encoder_hidden_states = outputs.encoder_hidden_states[0] encoder_hidden_states.retain_grad() decoder_hidden_states = outputs.decoder_hidden_states[0] decoder_hidden_states.retain_grad() if self.has_attentions: encoder_attentions = outputs.encoder_attentions[0] encoder_attentions.retain_grad() decoder_attentions = outputs.decoder_attentions[0] decoder_attentions.retain_grad() cross_attentions = outputs.cross_attentions[0] cross_attentions.retain_grad() output.flatten()[0].backward(retain_graph=True) self.assertIsNotNone(encoder_hidden_states.grad) self.assertIsNotNone(decoder_hidden_states.grad) if self.has_attentions: self.assertIsNotNone(encoder_attentions.grad) self.assertIsNotNone(decoder_attentions.grad) self.assertIsNotNone(cross_attentions.grad) else: # Encoder-/Decoder-only models hidden_states = outputs.hidden_states[0] hidden_states.retain_grad() if self.has_attentions: attentions = outputs.attentions[0] attentions.retain_grad() output.flatten()[0].backward(retain_graph=True) self.assertIsNotNone(hidden_states.grad) if self.has_attentions: self.assertIsNone(attentions.grad) @slow def test_model_from_pretrained(self): for model_name in GROUPVIT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: model = GroupViTVisionModel.from_pretrained(model_name) self.assertIsNotNone(model)
class CLIPVisionModelTest(ModelTesterMixin, unittest.TestCase): """ Here we also overwrite some of the tests of test_modeling_common.py, as CLIP does not use input_ids, inputs_embeds, attention_mask and seq_length. """ all_model_classes = (CLIPVisionModel, ) if is_torch_available() else () test_pruning = False test_resize_embeddings = False test_head_masking = False def setUp(self): self.model_tester = CLIPVisionModelTester(self) self.config_tester = ConfigTester(self, config_class=CLIPVisionConfig, has_text_modality=False, hidden_size=37) def test_config(self): self.config_tester.run_common_tests() def test_inputs_embeds(self): # CLIP does not use inputs_embeds pass def test_model_common_attributes(self): config, _ = self.model_tester.prepare_config_and_inputs_for_common() for model_class in self.all_model_classes: model = model_class(config) self.assertIsInstance(model.get_input_embeddings(), (nn.Module)) x = model.get_output_embeddings() self.assertTrue(x is None or isinstance(x, nn.Linear)) def test_forward_signature(self): config, _ = self.model_tester.prepare_config_and_inputs_for_common() for model_class in self.all_model_classes: model = model_class(config) signature = inspect.signature(model.forward) # signature.parameters is an OrderedDict => so arg_names order is deterministic arg_names = [*signature.parameters.keys()] expected_arg_names = ["pixel_values"] self.assertListEqual(arg_names[:1], expected_arg_names) def test_model(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_model(*config_and_inputs) def test_attention_outputs(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common( ) config.return_dict = True # in CLIP, the seq_len equals the number of patches + 1 (we add 1 for the [CLS] token) image_size = (self.model_tester.image_size, self.model_tester.image_size) patch_size = (self.model_tester.patch_size, self.model_tester.patch_size) num_patches = (image_size[1] // patch_size[1]) * (image_size[0] // patch_size[0]) seq_len = num_patches + 1 for model_class in self.all_model_classes: inputs_dict["output_attentions"] = True inputs_dict["output_hidden_states"] = False config.return_dict = True model = model_class(config) model.to(torch_device) model.eval() with torch.no_grad(): outputs = model( **self._prepare_for_class(inputs_dict, model_class)) attentions = outputs.attentions self.assertEqual(len(attentions), self.model_tester.num_hidden_layers) # check that output_attentions also work using config del inputs_dict["output_attentions"] config.output_attentions = True model = model_class(config) model.to(torch_device) model.eval() with torch.no_grad(): outputs = model( **self._prepare_for_class(inputs_dict, model_class)) attentions = outputs.attentions self.assertEqual(len(attentions), self.model_tester.num_hidden_layers) out_len = len(outputs) # Check attention is always last and order is fine inputs_dict["output_attentions"] = True inputs_dict["output_hidden_states"] = True model = model_class(config) model.to(torch_device) model.eval() with torch.no_grad(): outputs = model( **self._prepare_for_class(inputs_dict, model_class)) added_hidden_states = 1 self.assertEqual(out_len + added_hidden_states, len(outputs)) self_attentions = outputs.attentions self.assertEqual(len(self_attentions), self.model_tester.num_hidden_layers) self.assertListEqual( list(self_attentions[0].shape[-3:]), [self.model_tester.num_attention_heads, seq_len, seq_len], ) def test_hidden_states_output(self): def check_hidden_states_output(inputs_dict, config, model_class): model = model_class(config) model.to(torch_device) model.eval() with torch.no_grad(): outputs = model( **self._prepare_for_class(inputs_dict, model_class)) hidden_states = outputs.encoder_hidden_states if config.is_encoder_decoder else outputs.hidden_states expected_num_layers = getattr( self.model_tester, "expected_num_hidden_layers", self.model_tester.num_hidden_layers + 1) self.assertEqual(len(hidden_states), expected_num_layers) # CLIP has a different seq_length image_size = (self.model_tester.image_size, self.model_tester.image_size) patch_size = (self.model_tester.patch_size, self.model_tester.patch_size) num_patches = (image_size[1] // patch_size[1]) * (image_size[0] // patch_size[0]) seq_length = num_patches + 1 self.assertListEqual( list(hidden_states[0].shape[-2:]), [seq_length, self.model_tester.hidden_size], ) config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common( ) for model_class in self.all_model_classes: inputs_dict["output_hidden_states"] = True check_hidden_states_output(inputs_dict, config, model_class) # check that output_hidden_states also work using config del inputs_dict["output_hidden_states"] config.output_hidden_states = True check_hidden_states_output(inputs_dict, config, model_class) def test_training(self): pass def test_training_gradient_checkpointing(self): pass # skip this test as CLIPVisionModel has no base class and is # not available in MODEL_MAPPING def test_save_load_fast_init_from_base(self): pass # skip this test as CLIPVisionModel has no base class and is # not available in MODEL_MAPPING def test_save_load_fast_init_to_base(self): pass @slow def test_model_from_pretrained(self): for model_name in CLIP_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: model = CLIPVisionModel.from_pretrained(model_name) self.assertIsNotNone(model)
class CLIPModelTest(ModelTesterMixin, unittest.TestCase): all_model_classes = (CLIPModel, ) if is_torch_available() else () test_head_masking = False test_pruning = False test_resize_embeddings = False test_attention_outputs = False def setUp(self): self.model_tester = CLIPModelTester(self) def test_model(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_model(*config_and_inputs) # hidden_states are tested in individual model tests def test_hidden_states_output(self): pass # input_embeds are tested in individual model tests def test_inputs_embeds(self): pass # tested in individual model tests def test_retain_grad_hidden_states_attentions(self): pass # CLIPModel does not have input/output embeddings def test_model_common_attributes(self): pass # override as the `logit_scale` parameter initilization is different for CLIP def test_initialization(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common( ) configs_no_init = _config_zero_init(config) for model_class in self.all_model_classes: model = model_class(config=configs_no_init) for name, param in model.named_parameters(): if param.requires_grad: # check if `logit_scale` is initilized as per the original implementation if name == "logit_scale": self.assertAlmostEqual( param.data.item(), np.log(1 / 0.07), delta=1e-3, msg= f"Parameter {name} of model {model_class} seems not properly initialized", ) else: self.assertIn( ((param.data.mean() * 1e9).round() / 1e9).item(), [0.0, 1.0], msg= f"Parameter {name} of model {model_class} seems not properly initialized", ) def _create_and_check_torchscript(self, config, inputs_dict): if not self.test_torchscript: return configs_no_init = _config_zero_init( config) # To be sure we have no Nan configs_no_init.torchscript = True configs_no_init.return_dict = False for model_class in self.all_model_classes: model = model_class(config=configs_no_init) model.to(torch_device) model.eval() try: input_ids = inputs_dict["input_ids"] pixel_values = inputs_dict[ "pixel_values"] # CLIP needs pixel_values traced_model = torch.jit.trace(model, (input_ids, pixel_values)) except RuntimeError: self.fail("Couldn't trace module.") with tempfile.TemporaryDirectory() as tmp_dir_name: pt_file_name = os.path.join(tmp_dir_name, "traced_model.pt") try: torch.jit.save(traced_model, pt_file_name) except Exception: self.fail("Couldn't save module.") try: loaded_model = torch.jit.load(pt_file_name) except Exception: self.fail("Couldn't load module.") model.to(torch_device) model.eval() loaded_model.to(torch_device) loaded_model.eval() model_state_dict = model.state_dict() loaded_model_state_dict = loaded_model.state_dict() self.assertEqual(set(model_state_dict.keys()), set(loaded_model_state_dict.keys())) models_equal = True for layer_name, p1 in model_state_dict.items(): p2 = loaded_model_state_dict[layer_name] if p1.data.ne(p2.data).sum() > 0: models_equal = False self.assertTrue(models_equal) def test_load_vision_text_config(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common( ) # Save CLIPConfig and check if we can load CLIPVisionConfig from it with tempfile.TemporaryDirectory() as tmp_dir_name: config.save_pretrained(tmp_dir_name) vision_config = CLIPVisionConfig.from_pretrained(tmp_dir_name) self.assertDictEqual(config.vision_config.to_dict(), vision_config.to_dict()) # Save CLIPConfig and check if we can load CLIPTextConfig from it with tempfile.TemporaryDirectory() as tmp_dir_name: config.save_pretrained(tmp_dir_name) text_config = CLIPTextConfig.from_pretrained(tmp_dir_name) self.assertDictEqual(config.text_config.to_dict(), text_config.to_dict()) # overwrite from common since FlaxCLIPModel returns nested output # which is not supported in the common test @is_pt_flax_cross_test def test_equivalence_pt_to_flax(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common( ) for model_class in self.all_model_classes: with self.subTest(model_class.__name__): # load PyTorch class pt_model = model_class(config).eval() # Flax models don't use the `use_cache` option and cache is not returned as a default. # So we disable `use_cache` here for PyTorch model. pt_model.config.use_cache = False fx_model_class_name = "Flax" + model_class.__name__ if not hasattr(transformers, fx_model_class_name): return fx_model_class = getattr(transformers, fx_model_class_name) # load Flax class fx_model = fx_model_class(config, dtype=jnp.float32) # make sure only flax inputs are forward that actually exist in function args fx_input_keys = inspect.signature( fx_model.__call__).parameters.keys() # prepare inputs pt_inputs = self._prepare_for_class(inputs_dict, model_class) # remove function args that don't exist in Flax pt_inputs = { k: v for k, v in pt_inputs.items() if k in fx_input_keys } fx_state = convert_pytorch_state_dict_to_flax( pt_model.state_dict(), fx_model) fx_model.params = fx_state with torch.no_grad(): pt_outputs = pt_model(**pt_inputs).to_tuple() # convert inputs to Flax fx_inputs = { k: np.array(v) for k, v in pt_inputs.items() if torch.is_tensor(v) } fx_outputs = fx_model(**fx_inputs).to_tuple() self.assertEqual( len(fx_outputs), len(pt_outputs), "Output lengths differ between Flax and PyTorch") for fx_output, pt_output in zip(fx_outputs[:4], pt_outputs[:4]): self.assert_almost_equals(fx_output, pt_output.numpy(), 4e-2) with tempfile.TemporaryDirectory() as tmpdirname: pt_model.save_pretrained(tmpdirname) fx_model_loaded = fx_model_class.from_pretrained( tmpdirname, from_pt=True) fx_outputs_loaded = fx_model_loaded(**fx_inputs).to_tuple() self.assertEqual( len(fx_outputs_loaded), len(pt_outputs), "Output lengths differ between Flax and PyTorch") for fx_output_loaded, pt_output in zip(fx_outputs_loaded[:4], pt_outputs[:4]): self.assert_almost_equals(fx_output_loaded, pt_output.numpy(), 4e-2) # overwrite from common since FlaxCLIPModel returns nested output # which is not supported in the common test @is_pt_flax_cross_test def test_equivalence_flax_to_pt(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common( ) for model_class in self.all_model_classes: with self.subTest(model_class.__name__): # load corresponding PyTorch class pt_model = model_class(config).eval() # So we disable `use_cache` here for PyTorch model. pt_model.config.use_cache = False fx_model_class_name = "Flax" + model_class.__name__ if not hasattr(transformers, fx_model_class_name): # no flax model exists for this class return fx_model_class = getattr(transformers, fx_model_class_name) # load Flax class fx_model = fx_model_class(config, dtype=jnp.float32) # make sure only flax inputs are forward that actually exist in function args fx_input_keys = inspect.signature( fx_model.__call__).parameters.keys() pt_model = load_flax_weights_in_pytorch_model( pt_model, fx_model.params) # make sure weights are tied in PyTorch pt_model.tie_weights() # prepare inputs pt_inputs = self._prepare_for_class(inputs_dict, model_class) # remove function args that don't exist in Flax pt_inputs = { k: v for k, v in pt_inputs.items() if k in fx_input_keys } with torch.no_grad(): pt_outputs = pt_model(**pt_inputs).to_tuple() fx_inputs = { k: np.array(v) for k, v in pt_inputs.items() if torch.is_tensor(v) } fx_outputs = fx_model(**fx_inputs).to_tuple() self.assertEqual( len(fx_outputs), len(pt_outputs), "Output lengths differ between Flax and PyTorch") for fx_output, pt_output in zip(fx_outputs[:4], pt_outputs[:4]): self.assert_almost_equals(fx_output, pt_output.numpy(), 4e-2) with tempfile.TemporaryDirectory() as tmpdirname: fx_model.save_pretrained(tmpdirname) pt_model_loaded = model_class.from_pretrained( tmpdirname, from_flax=True) with torch.no_grad(): pt_outputs_loaded = pt_model_loaded(**pt_inputs).to_tuple() self.assertEqual( len(fx_outputs), len(pt_outputs_loaded), "Output lengths differ between Flax and PyTorch") for fx_output, pt_output in zip(fx_outputs[:4], pt_outputs_loaded[:4]): self.assert_almost_equals(fx_output, pt_output.numpy(), 4e-2) @slow def test_model_from_pretrained(self): for model_name in CLIP_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: model = CLIPModel.from_pretrained(model_name) self.assertIsNotNone(model)
class DeiTModelTest(ModelTesterMixin, unittest.TestCase): """ Here we also overwrite some of the tests of test_modeling_common.py, as DeiT does not use input_ids, inputs_embeds, attention_mask and seq_length. """ all_model_classes = (( DeiTModel, DeiTForImageClassification, DeiTForImageClassificationWithTeacher, DeiTForMaskedImageModeling, ) if is_torch_available() else ()) test_pruning = False test_resize_embeddings = False test_head_masking = False def setUp(self): self.model_tester = DeiTModelTester(self) self.config_tester = ConfigTester(self, config_class=DeiTConfig, has_text_modality=False, hidden_size=37) def test_config(self): self.config_tester.run_common_tests() @unittest.skip(reason="DeiT does not use inputs_embeds") def test_inputs_embeds(self): pass def test_model_common_attributes(self): config, _ = self.model_tester.prepare_config_and_inputs_for_common() for model_class in self.all_model_classes: model = model_class(config) self.assertIsInstance(model.get_input_embeddings(), (nn.Module)) x = model.get_output_embeddings() self.assertTrue(x is None or isinstance(x, nn.Linear)) def test_forward_signature(self): config, _ = self.model_tester.prepare_config_and_inputs_for_common() for model_class in self.all_model_classes: model = model_class(config) signature = inspect.signature(model.forward) # signature.parameters is an OrderedDict => so arg_names order is deterministic arg_names = [*signature.parameters.keys()] expected_arg_names = ["pixel_values"] self.assertListEqual(arg_names[:1], expected_arg_names) def test_model(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_model(*config_and_inputs) def test_for_image_classification(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_for_image_classification( *config_and_inputs) # special case for DeiTForImageClassificationWithTeacher model def _prepare_for_class(self, inputs_dict, model_class, return_labels=False): inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels) if return_labels: if model_class.__name__ == "DeiTForImageClassificationWithTeacher": del inputs_dict["labels"] return inputs_dict def test_training(self): if not self.model_tester.is_training: return config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common( ) config.return_dict = True for model_class in self.all_model_classes: # DeiTForImageClassificationWithTeacher supports inference-only if (model_class in get_values(MODEL_MAPPING) or model_class.__name__ == "DeiTForImageClassificationWithTeacher"): continue model = model_class(config) model.to(torch_device) model.train() inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True) loss = model(**inputs).loss loss.backward() def test_training_gradient_checkpointing(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common( ) if not self.model_tester.is_training: return config.use_cache = False config.return_dict = True for model_class in self.all_model_classes: if model_class in get_values( MODEL_MAPPING ) or not model_class.supports_gradient_checkpointing: continue # DeiTForImageClassificationWithTeacher supports inference-only if model_class.__name__ == "DeiTForImageClassificationWithTeacher": continue model = model_class(config) model.gradient_checkpointing_enable() model.to(torch_device) model.train() inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True) loss = model(**inputs).loss loss.backward() def test_problem_types(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common( ) problem_types = [ { "title": "multi_label_classification", "num_labels": 2, "dtype": torch.float }, { "title": "single_label_classification", "num_labels": 1, "dtype": torch.long }, { "title": "regression", "num_labels": 1, "dtype": torch.float }, ] for model_class in self.all_model_classes: if (model_class not in [ *get_values(MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING), *get_values(MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING), ] or model_class.__name__ == "DeiTForImageClassificationWithTeacher"): continue for problem_type in problem_types: with self.subTest( msg= f"Testing {model_class} with {problem_type['title']}"): config.problem_type = problem_type["title"] config.num_labels = problem_type["num_labels"] model = model_class(config) model.to(torch_device) model.train() inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True) if problem_type["num_labels"] > 1: inputs["labels"] = inputs["labels"].unsqueeze( 1).repeat(1, problem_type["num_labels"]) inputs["labels"] = inputs["labels"].to( problem_type["dtype"]) # This tests that we do not trigger the warning form PyTorch "Using a target size that is different # to the input size. This will likely lead to incorrect results due to broadcasting. Please ensure # they have the same size." which is a symptom something in wrong for the regression problem. # See https://github.com/huggingface/transformers/issues/11780 with warnings.catch_warnings(record=True) as warning_list: loss = model(**inputs).loss for w in warning_list: if "Using a target size that is different to the input size" in str( w.message): raise ValueError( f"Something is going wrong in the regression problem: intercepted {w.message}" ) loss.backward() @slow def test_model_from_pretrained(self): for model_name in DEIT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: model = DeiTModel.from_pretrained(model_name) self.assertIsNotNone(model)
class OwlViTVisionModelTest(ModelTesterMixin, unittest.TestCase): """ Here we also overwrite some of the tests of test_modeling_common.py, as OWLVIT does not use input_ids, inputs_embeds, attention_mask and seq_length. """ all_model_classes = (OwlViTVisionModel, ) if is_torch_available() else () fx_compatible = False test_pruning = False test_resize_embeddings = False test_head_masking = False def setUp(self): self.model_tester = OwlViTVisionModelTester(self) self.config_tester = ConfigTester(self, config_class=OwlViTVisionConfig, has_text_modality=False, hidden_size=37) def test_config(self): self.config_tester.run_common_tests() @unittest.skip(reason="OWLVIT does not use inputs_embeds") def test_inputs_embeds(self): pass def test_model_common_attributes(self): config, _ = self.model_tester.prepare_config_and_inputs_for_common() for model_class in self.all_model_classes: model = model_class(config) self.assertIsInstance(model.get_input_embeddings(), (nn.Module)) x = model.get_output_embeddings() self.assertTrue(x is None or isinstance(x, nn.Linear)) def test_forward_signature(self): config, _ = self.model_tester.prepare_config_and_inputs_for_common() for model_class in self.all_model_classes: model = model_class(config) signature = inspect.signature(model.forward) # signature.parameters is an OrderedDict => so arg_names order is deterministic arg_names = [*signature.parameters.keys()] expected_arg_names = ["pixel_values"] self.assertListEqual(arg_names[:1], expected_arg_names) def test_model(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_model(*config_and_inputs) @unittest.skip(reason="OWL-ViT does not support training yet") def test_training(self): pass @unittest.skip(reason="OWL-ViT does not support training yet") def test_training_gradient_checkpointing(self): pass @unittest.skip( reason= "OwlViTVisionModel has no base class and is not available in MODEL_MAPPING" ) def test_save_load_fast_init_from_base(self): pass @unittest.skip( reason= "OwlViTVisionModel has no base class and is not available in MODEL_MAPPING" ) def test_save_load_fast_init_to_base(self): pass @slow def test_model_from_pretrained(self): for model_name in OWLVIT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: model = OwlViTVisionModel.from_pretrained(model_name) self.assertIsNotNone(model)
class LayoutLMv3ModelTest(ModelTesterMixin, unittest.TestCase): test_pruning = False test_torchscript = False test_mismatched_shapes = False all_model_classes = (( LayoutLMv3Model, LayoutLMv3ForSequenceClassification, LayoutLMv3ForTokenClassification, LayoutLMv3ForQuestionAnswering, ) if is_torch_available() else ()) def setUp(self): self.model_tester = LayoutLMv3ModelTester(self) self.config_tester = ConfigTester(self, config_class=LayoutLMv3Config, hidden_size=37) def _prepare_for_class(self, inputs_dict, model_class, return_labels=False): inputs_dict = copy.deepcopy(inputs_dict) if model_class in get_values(MODEL_FOR_MULTIPLE_CHOICE_MAPPING): inputs_dict = { k: v.unsqueeze(1).expand(-1, self.model_tester.num_choices, -1).contiguous() if isinstance(v, torch.Tensor) and v.ndim > 1 else v for k, v in inputs_dict.items() } if return_labels: if model_class in get_values(MODEL_FOR_MULTIPLE_CHOICE_MAPPING): inputs_dict["labels"] = torch.ones( self.model_tester.batch_size, dtype=torch.long, device=torch_device) elif model_class in get_values( MODEL_FOR_QUESTION_ANSWERING_MAPPING): inputs_dict["start_positions"] = torch.zeros( self.model_tester.batch_size, dtype=torch.long, device=torch_device) inputs_dict["end_positions"] = torch.zeros( self.model_tester.batch_size, dtype=torch.long, device=torch_device) elif model_class in [ *get_values(MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING), ]: inputs_dict["labels"] = torch.zeros( self.model_tester.batch_size, dtype=torch.long, device=torch_device) elif model_class in [ *get_values(MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING), ]: inputs_dict["labels"] = torch.zeros( (self.model_tester.batch_size, self.model_tester.text_seq_length), dtype=torch.long, device=torch_device, ) return inputs_dict def test_config(self): self.config_tester.run_common_tests() def test_model(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_model(*config_and_inputs) def test_model_various_embeddings(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() for type in ["absolute", "relative_key", "relative_key_query"]: config_and_inputs[0].position_embedding_type = type self.model_tester.create_and_check_model(*config_and_inputs) def test_for_sequence_classification(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_for_sequence_classification( *config_and_inputs) def test_for_token_classification(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_for_token_classification( *config_and_inputs) def test_for_question_answering(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_for_question_answering( *config_and_inputs) @slow def test_model_from_pretrained(self): for model_name in LAYOUTLMV3_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: model = LayoutLMv3Model.from_pretrained(model_name) self.assertIsNotNone(model)
class RagTestMixin: all_model_classes = ( (RagModel, RagTokenForGeneration, RagSequenceForGeneration) if is_torch_available() and is_datasets_available() and is_faiss_available() else () ) retrieval_vector_size = 32 n_docs = 3 max_combined_length = 16 def setUp(self): self.tmpdirname = tempfile.mkdtemp() # DPR tok vocab_tokens = [ "[UNK]", "[CLS]", "[SEP]", "[PAD]", "[MASK]", "want", "##want", "##ed", "wa", "un", "runn", "##ing", ",", "low", "lowest", ] dpr_tokenizer_path = os.path.join(self.tmpdirname, "dpr_tokenizer") os.makedirs(dpr_tokenizer_path, exist_ok=True) self.vocab_file = os.path.join(dpr_tokenizer_path, DPR_VOCAB_FILES_NAMES["vocab_file"]) with open(self.vocab_file, "w", encoding="utf-8") as vocab_writer: vocab_writer.write("".join([x + "\n" for x in vocab_tokens])) # BART tok vocab = [ "l", "o", "w", "e", "r", "s", "t", "i", "d", "n", "\u0120", "\u0120l", "\u0120n", "\u0120lo", "\u0120low", "er", "\u0120lowest", "\u0120newer", "\u0120wider", "<unk>", ] vocab_tokens = dict(zip(vocab, range(len(vocab)))) merges = ["#version: 0.2", "\u0120 l", "\u0120l o", "\u0120lo w", "e r", ""] self.special_tokens_map = {"unk_token": "<unk>"} bart_tokenizer_path = os.path.join(self.tmpdirname, "bart_tokenizer") os.makedirs(bart_tokenizer_path, exist_ok=True) self.vocab_file = os.path.join(bart_tokenizer_path, BART_VOCAB_FILES_NAMES["vocab_file"]) self.merges_file = os.path.join(bart_tokenizer_path, BART_VOCAB_FILES_NAMES["merges_file"]) with open(self.vocab_file, "w", encoding="utf-8") as fp: fp.write(json.dumps(vocab_tokens) + "\n") with open(self.merges_file, "w", encoding="utf-8") as fp: fp.write("\n".join(merges)) t5_tokenizer = T5Tokenizer(T5_SAMPLE_VOCAB) t5_tokenizer_path = os.path.join(self.tmpdirname, "t5_tokenizer") t5_tokenizer.save_pretrained(t5_tokenizer_path) @cached_property def dpr_tokenizer(self) -> DPRQuestionEncoderTokenizer: return DPRQuestionEncoderTokenizer.from_pretrained(os.path.join(self.tmpdirname, "dpr_tokenizer")) @cached_property def dpr_ctx_encoder_tokenizer(self) -> DPRContextEncoderTokenizer: return DPRContextEncoderTokenizer.from_pretrained(os.path.join(self.tmpdirname, "dpr_tokenizer")) @cached_property def bart_tokenizer(self) -> BartTokenizer: return BartTokenizer.from_pretrained(os.path.join(self.tmpdirname, "bart_tokenizer")) @cached_property def t5_tokenizer(self) -> BartTokenizer: return T5Tokenizer.from_pretrained(os.path.join(self.tmpdirname, "t5_tokenizer")) def tearDown(self): shutil.rmtree(self.tmpdirname) def get_retriever(self, config): dataset = Dataset.from_dict( { "id": ["0", "1", "3"], "text": ["foo", "bar", "qux"], "title": ["Foo", "Bar", "Qux"], "embeddings": [ np.ones(self.retrieval_vector_size), 2 * np.ones(self.retrieval_vector_size), 3 * np.ones(self.retrieval_vector_size), ], } ) dataset.add_faiss_index("embeddings", string_factory="Flat", metric_type=faiss.METRIC_INNER_PRODUCT) tokenizer = self.bart_tokenizer if config.generator.model_type == "bart" else self.t5_tokenizer with patch("transformers.models.rag.retrieval_rag.load_dataset") as mock_load_dataset: mock_load_dataset.return_value = dataset retriever = RagRetriever( config, question_encoder_tokenizer=self.dpr_tokenizer, generator_tokenizer=tokenizer, ) return retriever def check_model_with_retriever( self, config, input_ids, attention_mask, decoder_input_ids, decoder_attention_mask, **kwargs ): self.assertIsNotNone(config.question_encoder) self.assertIsNotNone(config.generator) for model_class in self.all_model_classes: model = model_class(config, retriever=self.get_retriever(config)).to(torch_device) model.eval() self.assertTrue(model.config.is_encoder_decoder) outputs = model( input_ids=input_ids, attention_mask=attention_mask, decoder_input_ids=decoder_input_ids, decoder_attention_mask=decoder_attention_mask, ) # logits self.assertEqual( outputs.logits.shape, (self.n_docs * decoder_input_ids.shape[0], decoder_input_ids.shape[1], config.generator.vocab_size), ) # generator encoder last hidden states self.assertEqual( outputs.generator_enc_last_hidden_state.shape, (self.n_docs * decoder_input_ids.shape[0], self.max_combined_length, config.generator.hidden_size), ) # doc scores self.assertEqual(outputs.doc_scores.shape, (input_ids.shape[0], self.n_docs)) def check_model_with_end2end_retriever( self, config, input_ids, attention_mask, decoder_input_ids, decoder_attention_mask, **kwargs ): self.assertIsNotNone(config.question_encoder) self.assertIsNotNone(config.generator) context_encoder_tokenizer = self.dpr_ctx_encoder_tokenizer dpr_context_encoder = DPRContextEncoder(config.question_encoder) # dpr is a twin tower retriever = self.get_retriever(config) retriever.set_ctx_encoder_tokenizer(context_encoder_tokenizer) # setting the ctx_encoder_tokenizer. for model_class in [RagTokenForGeneration, RagSequenceForGeneration]: model = model_class(config, retriever=retriever) model.set_context_encoder_for_training(dpr_context_encoder) # set the context_encoder for training model.to(torch_device) model.eval() self.assertTrue(model.config.is_encoder_decoder) outputs = model( input_ids=input_ids, attention_mask=attention_mask, decoder_input_ids=decoder_input_ids, decoder_attention_mask=decoder_attention_mask, ) # logits self.assertEqual( outputs.logits.shape, (self.n_docs * decoder_input_ids.shape[0], decoder_input_ids.shape[1], config.generator.vocab_size), ) # generator encoder last hidden states self.assertEqual( outputs.generator_enc_last_hidden_state.shape, (self.n_docs * decoder_input_ids.shape[0], self.max_combined_length, config.generator.hidden_size), ) # doc scores self.assertEqual(outputs.doc_scores.shape, (input_ids.shape[0], self.n_docs)) def check_model_generate_from_context_input_ids( self, config, input_ids, attention_mask, decoder_input_ids, decoder_attention_mask, **kwargs ): self.assertIsNotNone(config.question_encoder) self.assertIsNotNone(config.generator) retriever = self.get_retriever(config) for model_class in self.all_model_classes: model = model_class(config).to(torch_device) model.eval() self.assertTrue(model.config.is_encoder_decoder) question_hidden_states = model.question_encoder(input_ids, attention_mask=attention_mask)[0] out = retriever( input_ids, question_hidden_states.cpu().detach().to(torch.float32).numpy(), prefix=config.generator.prefix, return_tensors="pt", ) context_input_ids, context_attention_mask, retrieved_doc_embeds = ( out["context_input_ids"], out["context_attention_mask"], out["retrieved_doc_embeds"], ) # cast retrieved_doc_embeds = retrieved_doc_embeds.to(question_hidden_states) context_input_ids = context_input_ids.to(input_ids) context_attention_mask = context_attention_mask.to(input_ids) # compute doc_scores doc_scores = torch.bmm(question_hidden_states.unsqueeze(1), retrieved_doc_embeds.transpose(1, 2)).squeeze( 1 ) outputs = model.generate( context_input_ids=context_input_ids, context_attention_mask=context_attention_mask, doc_scores=doc_scores, do_deduplication=True, ) self.assertIsNotNone(outputs) def check_model_generate( self, config, input_ids, attention_mask, decoder_input_ids, decoder_attention_mask, **kwargs ): self.assertIsNotNone(config.question_encoder) self.assertIsNotNone(config.generator) for model_class in self.all_model_classes[1:]: model = model_class(config, retriever=self.get_retriever(config)).to(torch_device) model.eval() self.assertTrue(model.config.is_encoder_decoder) outputs = model.generate( input_ids=input_ids, num_beams=2, num_return_sequences=2, decoder_start_token_id=config.generator.eos_token_id, ) self.assertIsNotNone(outputs) def check_model_without_retriever( self, config, input_ids, attention_mask, decoder_input_ids, decoder_attention_mask, **kwargs ): self.assertIsNotNone(config.question_encoder) self.assertIsNotNone(config.generator) retriever = self.get_retriever(config) for model_class in self.all_model_classes: model = model_class(config).to(torch_device) model.eval() self.assertTrue(model.config.is_encoder_decoder) question_hidden_states = model.question_encoder(input_ids, attention_mask=attention_mask)[0] out = retriever( input_ids, question_hidden_states.cpu().detach().to(torch.float32).numpy(), prefix=config.generator.prefix, return_tensors="pt", ) context_input_ids, context_attention_mask, retrieved_doc_embeds = ( out["context_input_ids"], out["context_attention_mask"], out["retrieved_doc_embeds"], ) # cast retrieved_doc_embeds = retrieved_doc_embeds.to(question_hidden_states) context_input_ids = context_input_ids.to(input_ids) context_attention_mask = context_attention_mask.to(input_ids) # compute doc_scores doc_scores = torch.bmm(question_hidden_states.unsqueeze(1), retrieved_doc_embeds.transpose(1, 2)).squeeze( 1 ) outputs = model( context_input_ids=context_input_ids, context_attention_mask=context_attention_mask, doc_scores=doc_scores, decoder_input_ids=decoder_input_ids, decoder_attention_mask=decoder_attention_mask, ) # logits self.assertEqual( outputs.logits.shape, (self.n_docs * decoder_input_ids.shape[0], decoder_input_ids.shape[1], config.generator.vocab_size), ) # generator encoder last hidden states self.assertEqual( outputs.generator_enc_last_hidden_state.shape, (self.n_docs * decoder_input_ids.shape[0], self.max_combined_length, config.generator.hidden_size), ) # doc scores self.assertEqual(outputs.doc_scores.shape, (input_ids.shape[0], self.n_docs)) def check_model_custom_n_docs( self, config, input_ids, attention_mask, decoder_input_ids, decoder_attention_mask, n_docs, **kwargs ): self.assertIsNotNone(config.question_encoder) self.assertIsNotNone(config.generator) retriever = self.get_retriever(config) for model_class in self.all_model_classes: model = model_class(config).to(torch_device) model.eval() self.assertTrue(model.config.is_encoder_decoder) question_hidden_states = model.question_encoder(input_ids, attention_mask=attention_mask)[0] out = retriever( input_ids, question_hidden_states.cpu().detach().to(torch.float32).numpy(), prefix=config.generator.prefix, return_tensors="pt", n_docs=n_docs, ) context_input_ids, context_attention_mask, retrieved_doc_embeds = ( out["context_input_ids"], out["context_attention_mask"], out["retrieved_doc_embeds"], ) # cast retrieved_doc_embeds = retrieved_doc_embeds.to(question_hidden_states) context_input_ids = context_input_ids.to(input_ids) context_attention_mask = context_attention_mask.to(input_ids) # compute doc_scores doc_scores = torch.bmm(question_hidden_states.unsqueeze(1), retrieved_doc_embeds.transpose(1, 2)).squeeze( 1 ) outputs = model( context_input_ids=context_input_ids, context_attention_mask=context_attention_mask, doc_scores=doc_scores, decoder_input_ids=decoder_input_ids, decoder_attention_mask=decoder_attention_mask, n_docs=n_docs, ) # logits self.assertEqual( outputs.logits.shape, (n_docs * decoder_input_ids.shape[0], decoder_input_ids.shape[1], config.generator.vocab_size), ) # generator encoder last hidden states self.assertEqual( outputs.generator_enc_last_hidden_state.shape, (n_docs * decoder_input_ids.shape[0], self.max_combined_length, config.generator.hidden_size), ) # doc scores self.assertEqual(outputs.doc_scores.shape, (input_ids.shape[0], n_docs)) def check_model_with_mismatch_n_docs_value( self, config, input_ids, attention_mask, decoder_input_ids, decoder_attention_mask, retriever_n_docs, generator_n_docs, **kwargs ): self.assertIsNotNone(config.question_encoder) self.assertIsNotNone(config.generator) retriever = self.get_retriever(config) for model_class in self.all_model_classes: model = model_class(config).to(torch_device) model.eval() self.assertTrue(model.config.is_encoder_decoder) question_hidden_states = model.question_encoder(input_ids, attention_mask=attention_mask)[0] out = retriever( input_ids, question_hidden_states.cpu().detach().to(torch.float32).numpy(), prefix=config.generator.prefix, return_tensors="pt", n_docs=retriever_n_docs, ) context_input_ids, context_attention_mask, retrieved_doc_embeds = ( out["context_input_ids"], out["context_attention_mask"], out["retrieved_doc_embeds"], ) # cast retrieved_doc_embeds = retrieved_doc_embeds.to(question_hidden_states) context_input_ids = context_input_ids.to(input_ids) context_attention_mask = context_attention_mask.to(input_ids) # compute doc_scores doc_scores = torch.bmm(question_hidden_states.unsqueeze(1), retrieved_doc_embeds.transpose(1, 2)).squeeze( 1 ) self.assertRaises( AssertionError, model.__call__, context_input_ids=context_input_ids, context_attention_mask=context_attention_mask, doc_scores=doc_scores, decoder_input_ids=decoder_input_ids, decoder_attention_mask=decoder_attention_mask, n_docs=generator_n_docs, ) def check_model_with_encoder_outputs( self, config, input_ids, attention_mask, decoder_input_ids, decoder_attention_mask, **kwargs ): self.assertIsNotNone(config.question_encoder) self.assertIsNotNone(config.generator) for model_class in self.all_model_classes: model = model_class(config, retriever=self.get_retriever(config)).to(torch_device) model.eval() self.assertTrue(model.config.is_encoder_decoder) outputs = model( input_ids=input_ids, attention_mask=attention_mask, decoder_input_ids=decoder_input_ids, decoder_attention_mask=decoder_attention_mask, ) encoder_outputs = BaseModelOutput(outputs.generator_enc_last_hidden_state) # run only generator outputs = model( encoder_outputs=encoder_outputs, doc_scores=outputs.doc_scores, decoder_input_ids=decoder_input_ids, decoder_attention_mask=decoder_attention_mask, ) # logits self.assertEqual( outputs.logits.shape, (self.n_docs * decoder_input_ids.shape[0], decoder_input_ids.shape[1], config.generator.vocab_size), ) # generator encoder last hidden states self.assertEqual( outputs.generator_enc_last_hidden_state.shape, (self.n_docs * decoder_input_ids.shape[0], self.max_combined_length, config.generator.hidden_size), ) # doc scores self.assertEqual(outputs.doc_scores.shape, (input_ids.shape[0], self.n_docs)) def test_model_with_retriever(self): inputs_dict = self.config_and_inputs self.check_model_with_retriever(**inputs_dict) def test_model_with_end2end_retriever(self): inputs_dict = self.config_and_inputs self.check_model_with_end2end_retriever(**inputs_dict) def test_model_without_retriever(self): inputs_dict = self.config_and_inputs self.check_model_without_retriever(**inputs_dict) def test_model_with_encoder_outputs(self): inputs_dict = self.config_and_inputs self.check_model_with_encoder_outputs(**inputs_dict) def test_model_generate(self): inputs_dict = self.config_and_inputs self.check_model_generate(**inputs_dict) def test_model_with_custom_n_docs(self): inputs_dict = self.config_and_inputs inputs_dict["n_docs"] = 1 self.check_model_custom_n_docs(**inputs_dict) def test_model_with_mismatch_n_docs_value(self): inputs_dict = self.config_and_inputs inputs_dict["retriever_n_docs"] = 3 inputs_dict["generator_n_docs"] = 2 self.check_model_with_mismatch_n_docs_value(**inputs_dict)
class MobileViTModelTest(ModelTesterMixin, unittest.TestCase): """ Here we also overwrite some of the tests of test_modeling_common.py, as MobileViT does not use input_ids, inputs_embeds, attention_mask and seq_length. """ all_model_classes = ((MobileViTModel, MobileViTForImageClassification, MobileViTForSemanticSegmentation) if is_torch_available() else ()) test_pruning = False test_resize_embeddings = False test_head_masking = False has_attentions = False def setUp(self): self.model_tester = MobileViTModelTester(self) self.config_tester = MobileViTConfigTester( self, config_class=MobileViTConfig, has_text_modality=False) def test_config(self): self.config_tester.run_common_tests() @unittest.skip(reason="MobileViT does not use inputs_embeds") def test_inputs_embeds(self): pass @unittest.skip( reason="MobileViT does not support input and output embeddings") def test_model_common_attributes(self): pass @unittest.skip(reason="MobileViT does not output attentions") def test_attention_outputs(self): pass def test_forward_signature(self): config, _ = self.model_tester.prepare_config_and_inputs_for_common() for model_class in self.all_model_classes: model = model_class(config) signature = inspect.signature(model.forward) # signature.parameters is an OrderedDict => so arg_names order is deterministic arg_names = [*signature.parameters.keys()] expected_arg_names = ["pixel_values"] self.assertListEqual(arg_names[:1], expected_arg_names) def test_model(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_model(*config_and_inputs) def test_hidden_states_output(self): def check_hidden_states_output(inputs_dict, config, model_class): model = model_class(config) model.to(torch_device) model.eval() with torch.no_grad(): outputs = model( **self._prepare_for_class(inputs_dict, model_class)) hidden_states = outputs.hidden_states expected_num_stages = 5 self.assertEqual(len(hidden_states), expected_num_stages) # MobileViT's feature maps are of shape (batch_size, num_channels, height, width) # with the width and height being successively divided by 2. divisor = 2 for i in range(len(hidden_states)): self.assertListEqual( list(hidden_states[i].shape[-2:]), [ self.model_tester.image_size // divisor, self.model_tester.image_size // divisor ], ) divisor *= 2 self.assertEqual(self.model_tester.output_stride, divisor // 2) config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common( ) for model_class in self.all_model_classes: inputs_dict["output_hidden_states"] = True check_hidden_states_output(inputs_dict, config, model_class) # check that output_hidden_states also work using config del inputs_dict["output_hidden_states"] config.output_hidden_states = True check_hidden_states_output(inputs_dict, config, model_class) def test_for_image_classification(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_for_image_classification( *config_and_inputs) def test_for_semantic_segmentation(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_for_semantic_segmentation( *config_and_inputs) @slow def test_model_from_pretrained(self): for model_name in MOBILEVIT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: model = MobileViTModel.from_pretrained(model_name) self.assertIsNotNone(model)
class OwlViTForObjectDetectionTest(ModelTesterMixin, unittest.TestCase): all_model_classes = ( OwlViTForObjectDetection, ) if is_torch_available() else () fx_compatible = False test_head_masking = False test_pruning = False test_resize_embeddings = False test_attention_outputs = False def setUp(self): self.model_tester = OwlViTForObjectDetectionTester(self) def test_model(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_model(*config_and_inputs) @unittest.skip(reason="Hidden_states is tested in individual model tests") def test_hidden_states_output(self): pass @unittest.skip(reason="Inputs_embeds is tested in individual model tests") def test_inputs_embeds(self): pass @unittest.skip(reason="Retain_grad is tested in individual model tests") def test_retain_grad_hidden_states_attentions(self): pass @unittest.skip(reason="OwlViTModel does not have input/output embeddings") def test_model_common_attributes(self): pass @unittest.skip( reason="Test_initialization is tested in individual model tests") def test_initialization(self): pass @unittest.skip( reason="Test_forward_signature is tested in individual model tests") def test_forward_signature(self): pass @unittest.skip( reason= "Test_save_load_fast_init_from_base is tested in individual model tests" ) def test_save_load_fast_init_from_base(self): pass @unittest.skip(reason="OWL-ViT does not support training yet") def test_training(self): pass @unittest.skip(reason="OWL-ViT does not support training yet") def test_training_gradient_checkpointing(self): pass def _create_and_check_torchscript(self, config, inputs_dict): if not self.test_torchscript: return configs_no_init = _config_zero_init( config) # To be sure we have no Nan configs_no_init.torchscript = True configs_no_init.return_dict = False for model_class in self.all_model_classes: model = model_class(config=configs_no_init).to(torch_device) model.eval() try: input_ids = inputs_dict["input_ids"] pixel_values = inputs_dict[ "pixel_values"] # OWLVIT needs pixel_values traced_model = torch.jit.trace(model, (input_ids, pixel_values)) except RuntimeError: self.fail("Couldn't trace module.") with tempfile.TemporaryDirectory() as tmp_dir_name: pt_file_name = os.path.join(tmp_dir_name, "traced_model.pt") try: torch.jit.save(traced_model, pt_file_name) except Exception: self.fail("Couldn't save module.") try: loaded_model = torch.jit.load(pt_file_name) except Exception: self.fail("Couldn't load module.") loaded_model = loaded_model.to(torch_device) loaded_model.eval() model_state_dict = model.state_dict() loaded_model_state_dict = loaded_model.state_dict() self.assertEqual(set(model_state_dict.keys()), set(loaded_model_state_dict.keys())) models_equal = True for layer_name, p1 in model_state_dict.items(): p2 = loaded_model_state_dict[layer_name] if p1.data.ne(p2.data).sum() > 0: models_equal = False self.assertTrue(models_equal) def test_model_outputs_equivalence(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common( ) def set_nan_tensor_to_zero(t): t[t != t] = 0 return t def check_equivalence(model, tuple_inputs, dict_inputs, additional_kwargs={}): with torch.no_grad(): tuple_output = model(**tuple_inputs, return_dict=False, **additional_kwargs) dict_output = model(**dict_inputs, return_dict=True, **additional_kwargs).to_tuple() def recursive_check(tuple_object, dict_object): if isinstance(tuple_object, (List, Tuple)): for tuple_iterable_value, dict_iterable_value in zip( tuple_object, dict_object): recursive_check(tuple_iterable_value, dict_iterable_value) elif isinstance(tuple_object, Dict): for tuple_iterable_value, dict_iterable_value in zip( tuple_object.values(), dict_object.values()): recursive_check(tuple_iterable_value, dict_iterable_value) elif tuple_object is None: return else: self.assertTrue( torch.allclose( set_nan_tensor_to_zero(tuple_object), set_nan_tensor_to_zero(dict_object), atol=1e-5), msg= ("Tuple and dict output are not equal. Difference:" f" {torch.max(torch.abs(tuple_object - dict_object))}. Tuple has `nan`:" f" {torch.isnan(tuple_object).any()} and `inf`: {torch.isinf(tuple_object)}. Dict has" f" `nan`: {torch.isnan(dict_object).any()} and `inf`: {torch.isinf(dict_object)}." ), ) recursive_check(tuple_output, dict_output) for model_class in self.all_model_classes: model = model_class(config).to(torch_device) model.eval() tuple_inputs = self._prepare_for_class(inputs_dict, model_class) dict_inputs = self._prepare_for_class(inputs_dict, model_class) check_equivalence(model, tuple_inputs, dict_inputs) @slow def test_model_from_pretrained(self): for model_name in OWLVIT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: model = OwlViTForObjectDetection.from_pretrained(model_name) self.assertIsNotNone(model)
class LayoutLMv2ModelTest(ModelTesterMixin, unittest.TestCase): test_pruning = False test_torchscript = False test_mismatched_shapes = False all_model_classes = (( LayoutLMv2Model, LayoutLMv2ForSequenceClassification, LayoutLMv2ForTokenClassification, LayoutLMv2ForQuestionAnswering, ) if is_torch_available() else ()) def setUp(self): self.model_tester = LayoutLMv2ModelTester(self) self.config_tester = ConfigTester(self, config_class=LayoutLMv2Config, hidden_size=37) def test_config(self): self.config_tester.run_common_tests() def test_model(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_model(*config_and_inputs) def test_model_various_embeddings(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() for type in ["absolute", "relative_key", "relative_key_query"]: config_and_inputs[0].position_embedding_type = type self.model_tester.create_and_check_model(*config_and_inputs) def test_for_sequence_classification(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_for_sequence_classification( *config_and_inputs) def test_for_token_classification(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_for_token_classification( *config_and_inputs) def test_for_question_answering(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_for_question_answering( *config_and_inputs) def test_save_load_fast_init_from_base(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common( ) base_class = MODEL_MAPPING[config.__class__] if isinstance(base_class, tuple): base_class = base_class[0] for model_class in self.all_model_classes: if model_class == base_class: continue # make a copy of model class to not break future tests # from https://stackoverflow.com/questions/9541025/how-to-copy-a-python-class class CopyClass(model_class): pass model_class_copy = CopyClass # make sure that all keys are expected for test model_class_copy._keys_to_ignore_on_load_missing = [] # make init deterministic, but make sure that # non-initialized weights throw errors nevertheless model_class_copy._init_weights = self._mock_init_weights model = base_class(config) state_dict = model.state_dict() # this will often delete a single weight of a multi-weight module # to test an edge case random_key_to_del = random.choice(list(state_dict.keys())) del state_dict[random_key_to_del] # check that certain keys didn't get saved with the model with tempfile.TemporaryDirectory() as tmpdirname: model.save_pretrained(tmpdirname) torch.save(state_dict, os.path.join(tmpdirname, "pytorch_model.bin")) model_fast_init = model_class_copy.from_pretrained(tmpdirname) model_slow_init = model_class_copy.from_pretrained( tmpdirname, _fast_init=False) for key in model_fast_init.state_dict().keys(): if key == "layoutlmv2.visual_segment_embedding": # we skip the visual segment embedding as it has a custom initialization scheme continue max_diff = ( model_slow_init.state_dict()[key] - model_fast_init.state_dict()[key]).sum().item() self.assertLessEqual(max_diff, 1e-3, msg=f"{key} not identical") def test_attention_outputs(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common( ) config.return_dict = True # LayoutLMv2 has a different expected sequence length expected_seq_len = (self.model_tester.seq_length + self.model_tester.image_feature_pool_shape[0] * self.model_tester.image_feature_pool_shape[1]) for model_class in self.all_model_classes: inputs_dict["output_attentions"] = True inputs_dict["output_hidden_states"] = False config.return_dict = True model = model_class(config) model.to(torch_device) model.eval() with torch.no_grad(): outputs = model( **self._prepare_for_class(inputs_dict, model_class)) attentions = outputs.attentions self.assertEqual(len(attentions), self.model_tester.num_hidden_layers) # check that output_attentions also work using config del inputs_dict["output_attentions"] config.output_attentions = True model = model_class(config) model.to(torch_device) model.eval() with torch.no_grad(): outputs = model( **self._prepare_for_class(inputs_dict, model_class)) attentions = outputs.attentions self.assertEqual(len(attentions), self.model_tester.num_hidden_layers) self.assertListEqual( list(attentions[0].shape[-3:]), [ self.model_tester.num_attention_heads, expected_seq_len, expected_seq_len ], ) out_len = len(outputs) # Check attention is always last and order is fine inputs_dict["output_attentions"] = True inputs_dict["output_hidden_states"] = True model = model_class(config) model.to(torch_device) model.eval() with torch.no_grad(): outputs = model( **self._prepare_for_class(inputs_dict, model_class)) if hasattr(self.model_tester, "num_hidden_states_types"): added_hidden_states = self.model_tester.num_hidden_states_types else: added_hidden_states = 1 self.assertEqual(out_len + added_hidden_states, len(outputs)) self_attentions = outputs.attentions self.assertEqual(len(self_attentions), self.model_tester.num_hidden_layers) self.assertListEqual( list(self_attentions[0].shape[-3:]), [ self.model_tester.num_attention_heads, expected_seq_len, expected_seq_len ], ) def test_hidden_states_output(self): def check_hidden_states_output(inputs_dict, config, model_class): model = model_class(config) model.to(torch_device) model.eval() with torch.no_grad(): outputs = model( **self._prepare_for_class(inputs_dict, model_class)) hidden_states = outputs.hidden_states expected_num_layers = getattr( self.model_tester, "expected_num_hidden_layers", self.model_tester.num_hidden_layers + 1) self.assertEqual(len(hidden_states), expected_num_layers) # LayoutLMv2 has a different expected sequence length expected_seq_len = (self.model_tester.seq_length + self.model_tester.image_feature_pool_shape[0] * self.model_tester.image_feature_pool_shape[1]) self.assertListEqual( list(hidden_states[0].shape[-2:]), [expected_seq_len, self.model_tester.hidden_size], ) config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common( ) for model_class in self.all_model_classes: inputs_dict["output_hidden_states"] = True check_hidden_states_output(inputs_dict, config, model_class) # check that output_hidden_states also work using config del inputs_dict["output_hidden_states"] config.output_hidden_states = True check_hidden_states_output(inputs_dict, config, model_class) @slow def test_model_from_pretrained(self): for model_name in LAYOUTLMV2_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: model = LayoutLMv2Model.from_pretrained(model_name) self.assertIsNotNone(model) def test_initialization(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common( ) configs_no_init = _config_zero_init(config) for model_class in self.all_model_classes: model = model_class(config=configs_no_init) for name, param in model.named_parameters(): if "backbone" in name or "visual_segment_embedding" in name: continue if param.requires_grad: self.assertIn( ((param.data.mean() * 1e9).round() / 1e9).item(), [0.0, 1.0], msg= f"Parameter {name} of model {model_class} seems not properly initialized", )
require_torch, require_torch_non_multi_gpu, slow, torch_device, ) from transformers.utils import cached_property, is_datasets_available, is_faiss_available, is_torch_available from ..bart.test_modeling_bart import BartModelTester from ..dpr.test_modeling_dpr import DPRModelTester from ..t5.test_modeling_t5 import T5ModelTester TOLERANCE = 1e-3 T5_SAMPLE_VOCAB = os.path.join(dirname(dirname(os.path.abspath(__file__))), "fixtures/test_sentencepiece.model") if is_torch_available() and is_datasets_available() and is_faiss_available(): import torch from datasets import Dataset import faiss from transformers import ( AutoConfig, AutoModel, AutoModelForSeq2SeqLM, DPRContextEncoder, RagConfig, RagModel, RagRetriever, RagSequenceForGeneration, RagTokenForGeneration, RagTokenizer,
class MaskFormerFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestCase): feature_extraction_class = MaskFormerFeatureExtractor if ( is_vision_available() and is_torch_available()) else None def setUp(self): self.feature_extract_tester = MaskFormerFeatureExtractionTester(self) @property def feat_extract_dict(self): return self.feature_extract_tester.prepare_feat_extract_dict() def test_feat_extract_properties(self): feature_extractor = self.feature_extraction_class( **self.feat_extract_dict) self.assertTrue(hasattr(feature_extractor, "image_mean")) self.assertTrue(hasattr(feature_extractor, "image_std")) self.assertTrue(hasattr(feature_extractor, "do_normalize")) self.assertTrue(hasattr(feature_extractor, "do_resize")) self.assertTrue(hasattr(feature_extractor, "size")) self.assertTrue(hasattr(feature_extractor, "max_size")) def test_batch_feature(self): pass def test_call_pil(self): # Initialize feature_extractor feature_extractor = self.feature_extraction_class( **self.feat_extract_dict) # create random PIL images image_inputs = prepare_image_inputs(self.feature_extract_tester, equal_resolution=False) for image in image_inputs: self.assertIsInstance(image, Image.Image) # Test not batched input encoded_images = feature_extractor(image_inputs[0], return_tensors="pt").pixel_values expected_height, expected_width = self.feature_extract_tester.get_expected_values( image_inputs) self.assertEqual( encoded_images.shape, (1, self.feature_extract_tester.num_channels, expected_height, expected_width), ) # Test batched expected_height, expected_width = self.feature_extract_tester.get_expected_values( image_inputs, batched=True) encoded_images = feature_extractor(image_inputs, return_tensors="pt").pixel_values self.assertEqual( encoded_images.shape, ( self.feature_extract_tester.batch_size, self.feature_extract_tester.num_channels, expected_height, expected_width, ), ) def test_call_numpy(self): # Initialize feature_extractor feature_extractor = self.feature_extraction_class( **self.feat_extract_dict) # create random numpy tensors image_inputs = prepare_image_inputs(self.feature_extract_tester, equal_resolution=False, numpify=True) for image in image_inputs: self.assertIsInstance(image, np.ndarray) # Test not batched input encoded_images = feature_extractor(image_inputs[0], return_tensors="pt").pixel_values expected_height, expected_width = self.feature_extract_tester.get_expected_values( image_inputs) self.assertEqual( encoded_images.shape, (1, self.feature_extract_tester.num_channels, expected_height, expected_width), ) # Test batched encoded_images = feature_extractor(image_inputs, return_tensors="pt").pixel_values expected_height, expected_width = self.feature_extract_tester.get_expected_values( image_inputs, batched=True) self.assertEqual( encoded_images.shape, ( self.feature_extract_tester.batch_size, self.feature_extract_tester.num_channels, expected_height, expected_width, ), ) def test_call_pytorch(self): # Initialize feature_extractor feature_extractor = self.feature_extraction_class( **self.feat_extract_dict) # create random PyTorch tensors image_inputs = prepare_image_inputs(self.feature_extract_tester, equal_resolution=False, torchify=True) for image in image_inputs: self.assertIsInstance(image, torch.Tensor) # Test not batched input encoded_images = feature_extractor(image_inputs[0], return_tensors="pt").pixel_values expected_height, expected_width = self.feature_extract_tester.get_expected_values( image_inputs) self.assertEqual( encoded_images.shape, (1, self.feature_extract_tester.num_channels, expected_height, expected_width), ) # Test batched encoded_images = feature_extractor(image_inputs, return_tensors="pt").pixel_values expected_height, expected_width = self.feature_extract_tester.get_expected_values( image_inputs, batched=True) self.assertEqual( encoded_images.shape, ( self.feature_extract_tester.batch_size, self.feature_extract_tester.num_channels, expected_height, expected_width, ), ) def test_equivalence_pad_and_create_pixel_mask(self): # Initialize feature_extractors feature_extractor_1 = self.feature_extraction_class( **self.feat_extract_dict) feature_extractor_2 = self.feature_extraction_class(do_resize=False, do_normalize=False) # create random PyTorch tensors image_inputs = prepare_image_inputs(self.feature_extract_tester, equal_resolution=False, torchify=True) for image in image_inputs: self.assertIsInstance(image, torch.Tensor) # Test whether the method "pad_and_return_pixel_mask" and calling the feature extractor return the same tensors encoded_images_with_method = feature_extractor_1.encode_inputs( image_inputs, return_tensors="pt") encoded_images = feature_extractor_2(image_inputs, return_tensors="pt") self.assertTrue( torch.allclose(encoded_images_with_method["pixel_values"], encoded_images["pixel_values"], atol=1e-4)) self.assertTrue( torch.allclose(encoded_images_with_method["pixel_mask"], encoded_images["pixel_mask"], atol=1e-4)) def comm_get_feature_extractor_inputs(self, with_annotations=False): feature_extractor = self.feature_extraction_class( **self.feat_extract_dict) # prepare image and target num_classes = 8 batch_size = self.feature_extract_tester.batch_size annotations = None if with_annotations: annotations = [{ "masks": np.random.rand(num_classes, 384, 384).astype(np.float32), "labels": (np.random.rand(num_classes) > 0.5).astype(np.int64), } for _ in range(batch_size)] image_inputs = prepare_image_inputs(self.feature_extract_tester, equal_resolution=False) inputs = feature_extractor(image_inputs, annotations, return_tensors="pt", pad_and_return_pixel_mask=True) return inputs def test_with_size_divisibility(self): size_divisibilities = [8, 16, 32] weird_input_sizes = [(407, 802), (582, 1094)] for size_divisibility in size_divisibilities: feat_extract_dict = { **self.feat_extract_dict, **{ "size_divisibility": size_divisibility } } feature_extractor = self.feature_extraction_class( **feat_extract_dict) for weird_input_size in weird_input_sizes: inputs = feature_extractor([np.ones((3, *weird_input_size))], return_tensors="pt") pixel_values = inputs["pixel_values"] # check if divisible self.assertTrue((pixel_values.shape[-1] % size_divisibility) == 0) self.assertTrue((pixel_values.shape[-2] % size_divisibility) == 0) def test_call_with_numpy_annotations(self): num_classes = 8 batch_size = self.feature_extract_tester.batch_size inputs = self.comm_get_feature_extractor_inputs(with_annotations=True) # check the batch_size for el in inputs.values(): self.assertEqual(el.shape[0], batch_size) pixel_values = inputs["pixel_values"] mask_labels = inputs["mask_labels"] class_labels = inputs["class_labels"] self.assertEqual(pixel_values.shape[-2], mask_labels.shape[-2]) self.assertEqual(pixel_values.shape[-1], mask_labels.shape[-1]) self.assertEqual(mask_labels.shape[1], class_labels.shape[1]) self.assertEqual(mask_labels.shape[1], num_classes) def test_post_process_segmentation(self): fature_extractor = self.feature_extraction_class() outputs = self.feature_extract_tester.get_fake_maskformer_outputs() segmentation = fature_extractor.post_process_segmentation(outputs) self.assertEqual( segmentation.shape, ( self.feature_extract_tester.batch_size, self.feature_extract_tester.num_classes, self.feature_extract_tester.height, self.feature_extract_tester.width, ), ) target_size = (1, 4) segmentation = fature_extractor.post_process_segmentation( outputs, target_size=target_size) self.assertEqual( segmentation.shape, (self.feature_extract_tester.batch_size, self.feature_extract_tester.num_classes, *target_size), ) def test_post_process_semantic_segmentation(self): fature_extractor = self.feature_extraction_class() outputs = self.feature_extract_tester.get_fake_maskformer_outputs() segmentation = fature_extractor.post_process_semantic_segmentation( outputs) self.assertEqual( segmentation.shape, ( self.feature_extract_tester.batch_size, self.feature_extract_tester.height, self.feature_extract_tester.width, ), ) target_size = (1, 4) segmentation = fature_extractor.post_process_semantic_segmentation( outputs, target_size=target_size) self.assertEqual( segmentation.shape, (self.feature_extract_tester.batch_size, *target_size)) def test_post_process_panoptic_segmentation(self): fature_extractor = self.feature_extraction_class() outputs = self.feature_extract_tester.get_fake_maskformer_outputs() segmentation = fature_extractor.post_process_panoptic_segmentation( outputs, object_mask_threshold=0) self.assertTrue( len(segmentation) == self.feature_extract_tester.batch_size) for el in segmentation: self.assertTrue("segmentation" in el) self.assertTrue("segments" in el) self.assertEqual(type(el["segments"]), list) self.assertEqual(el["segmentation"].shape, (self.feature_extract_tester.height, self.feature_extract_tester.width))
class Data2VecVisionModelTest(ModelTesterMixin, unittest.TestCase): """ Here we also overwrite some of the tests of test_modeling_common.py, as Data2VecVision does not use input_ids, inputs_embeds, attention_mask and seq_length. """ all_model_classes = ((Data2VecVisionModel, Data2VecVisionForImageClassification, Data2VecVisionForSemanticSegmentation) if is_torch_available() else ()) test_pruning = False test_resize_embeddings = False test_head_masking = False def setUp(self): self.model_tester = Data2VecVisionModelTester(self) self.config_tester = ConfigTester(self, config_class=Data2VecVisionConfig, has_text_modality=False, hidden_size=37) def test_config(self): self.config_tester.run_common_tests() def test_inputs_embeds(self): # Data2VecVision does not use inputs_embeds pass def test_model_common_attributes(self): config, _ = self.model_tester.prepare_config_and_inputs_for_common() for model_class in self.all_model_classes: model = model_class(config) self.assertIsInstance(model.get_input_embeddings(), (nn.Module)) x = model.get_output_embeddings() self.assertTrue(x is None or isinstance(x, nn.Linear)) def test_forward_signature(self): config, _ = self.model_tester.prepare_config_and_inputs_for_common() for model_class in self.all_model_classes: model = model_class(config) signature = inspect.signature(model.forward) # signature.parameters is an OrderedDict => so arg_names order is deterministic arg_names = [*signature.parameters.keys()] expected_arg_names = ["pixel_values"] self.assertListEqual(arg_names[:1], expected_arg_names) def test_model(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_model(*config_and_inputs) def test_for_image_segmentation(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_for_image_segmentation( *config_and_inputs) def test_training(self): if not self.model_tester.is_training: return config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common( ) config.return_dict = True for model_class in self.all_model_classes: if model_class in [*get_values(MODEL_MAPPING)]: continue model = model_class(config) model.to(torch_device) model.train() inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True) loss = model(**inputs).loss loss.backward() def test_training_gradient_checkpointing(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common( ) if not self.model_tester.is_training: return config.use_cache = False config.return_dict = True for model_class in self.all_model_classes: if model_class in [ *get_values(MODEL_MAPPING) ] or not model_class.supports_gradient_checkpointing: continue # TODO: remove the following 3 lines once we have a MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING # this can then be incorporated into _prepare_for_class in test_modeling_common.py elif model_class.__name__ == "Data2VecVisionForSemanticSegmentation": batch_size, num_channels, height, width = inputs_dict[ "pixel_values"].shape inputs_dict["labels"] = torch.zeros( [self.model_tester.batch_size, height, width], device=torch_device).long() model = model_class(config) model.gradient_checkpointing_enable() model.to(torch_device) model.train() inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True) loss = model(**inputs).loss loss.backward() def test_initialization(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common( ) configs_no_init = _config_zero_init(config) for model_class in self.all_model_classes: model = model_class(config=configs_no_init) for name, param in model.named_parameters(): # we skip lambda parameters as these require special initial values # determined by config.layer_scale_init_value if "lambda" in name: continue if param.requires_grad: self.assertIn( ((param.data.mean() * 1e9).round() / 1e9).item(), [0.0, 1.0], msg= f"Parameter {name} of model {model_class} seems not properly initialized", ) def check_pt_tf_outputs(self, tf_outputs, pt_outputs, model_class, tol=2e-4, name="outputs", attributes=None): # We override with a slightly higher tol value, as semseg models tend to diverge a bit more super().check_pt_tf_outputs(tf_outputs, pt_outputs, model_class, tol, name, attributes) def test_for_image_classification(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_for_image_classification( *config_and_inputs) @slow def test_model_from_pretrained(self): for model_name in DATA2VEC_VISION_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: model = Data2VecVisionModel.from_pretrained(model_name) self.assertIsNotNone(model)
class DonutSwinModelTest(ModelTesterMixin, unittest.TestCase): all_model_classes = (DonutSwinModel, ) if is_torch_available() else () fx_compatible = True test_pruning = False test_resize_embeddings = False test_head_masking = False def setUp(self): self.model_tester = DonutSwinModelTester(self) self.config_tester = ConfigTester(self, config_class=DonutSwinConfig, embed_dim=37) def test_config(self): self.create_and_test_config_common_properties() self.config_tester.create_and_test_config_to_json_string() self.config_tester.create_and_test_config_to_json_file() self.config_tester.create_and_test_config_from_and_save_pretrained() self.config_tester.create_and_test_config_with_num_labels() self.config_tester.check_config_can_be_init_without_params() self.config_tester.check_config_arguments_init() def create_and_test_config_common_properties(self): return def test_model(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_model(*config_and_inputs) def test_inputs_embeds(self): # DonutSwin does not use inputs_embeds pass def test_model_common_attributes(self): config, _ = self.model_tester.prepare_config_and_inputs_for_common() for model_class in self.all_model_classes: model = model_class(config) self.assertIsInstance(model.get_input_embeddings(), (nn.Module)) x = model.get_output_embeddings() self.assertTrue(x is None or isinstance(x, nn.Linear)) def test_forward_signature(self): config, _ = self.model_tester.prepare_config_and_inputs_for_common() for model_class in self.all_model_classes: model = model_class(config) signature = inspect.signature(model.forward) # signature.parameters is an OrderedDict => so arg_names order is deterministic arg_names = [*signature.parameters.keys()] expected_arg_names = ["pixel_values"] self.assertListEqual(arg_names[:1], expected_arg_names) def test_attention_outputs(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common( ) config.return_dict = True for model_class in self.all_model_classes: inputs_dict["output_attentions"] = True inputs_dict["output_hidden_states"] = False config.return_dict = True model = model_class(config) model.to(torch_device) model.eval() with torch.no_grad(): outputs = model( **self._prepare_for_class(inputs_dict, model_class)) attentions = outputs.attentions expected_num_attentions = len(self.model_tester.depths) self.assertEqual(len(attentions), expected_num_attentions) # check that output_attentions also work using config del inputs_dict["output_attentions"] config.output_attentions = True window_size_squared = config.window_size**2 model = model_class(config) model.to(torch_device) model.eval() with torch.no_grad(): outputs = model( **self._prepare_for_class(inputs_dict, model_class)) attentions = outputs.attentions self.assertEqual(len(attentions), expected_num_attentions) self.assertListEqual( list(attentions[0].shape[-3:]), [ self.model_tester.num_heads[0], window_size_squared, window_size_squared ], ) out_len = len(outputs) # Check attention is always last and order is fine inputs_dict["output_attentions"] = True inputs_dict["output_hidden_states"] = True model = model_class(config) model.to(torch_device) model.eval() with torch.no_grad(): outputs = model( **self._prepare_for_class(inputs_dict, model_class)) if hasattr(self.model_tester, "num_hidden_states_types"): added_hidden_states = self.model_tester.num_hidden_states_types else: # also another +1 for reshaped_hidden_states added_hidden_states = 2 self.assertEqual(out_len + added_hidden_states, len(outputs)) self_attentions = outputs.attentions self.assertEqual(len(self_attentions), expected_num_attentions) self.assertListEqual( list(self_attentions[0].shape[-3:]), [ self.model_tester.num_heads[0], window_size_squared, window_size_squared ], ) def check_hidden_states_output(self, inputs_dict, config, model_class, image_size): model = model_class(config) model.to(torch_device) model.eval() with torch.no_grad(): outputs = model( **self._prepare_for_class(inputs_dict, model_class)) hidden_states = outputs.hidden_states expected_num_layers = getattr(self.model_tester, "expected_num_hidden_layers", len(self.model_tester.depths) + 1) self.assertEqual(len(hidden_states), expected_num_layers) # DonutSwin has a different seq_length patch_size = (config.patch_size if isinstance( config.patch_size, collections.abc.Iterable) else (config.patch_size, config.patch_size)) num_patches = (image_size[1] // patch_size[1]) * (image_size[0] // patch_size[0]) self.assertListEqual( list(hidden_states[0].shape[-2:]), [num_patches, self.model_tester.embed_dim], ) reshaped_hidden_states = outputs.reshaped_hidden_states self.assertEqual(len(reshaped_hidden_states), expected_num_layers) batch_size, num_channels, height, width = reshaped_hidden_states[ 0].shape reshaped_hidden_states = (reshaped_hidden_states[0].view( batch_size, num_channels, height * width).permute(0, 2, 1)) self.assertListEqual( list(reshaped_hidden_states.shape[-2:]), [num_patches, self.model_tester.embed_dim], ) def test_hidden_states_output(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common( ) image_size = (self.model_tester.image_size if isinstance( self.model_tester.image_size, collections.abc.Iterable) else (self.model_tester.image_size, self.model_tester.image_size)) for model_class in self.all_model_classes: inputs_dict["output_hidden_states"] = True self.check_hidden_states_output(inputs_dict, config, model_class, image_size) # check that output_hidden_states also work using config del inputs_dict["output_hidden_states"] config.output_hidden_states = True self.check_hidden_states_output(inputs_dict, config, model_class, image_size) def test_hidden_states_output_with_padding(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common( ) config.patch_size = 3 image_size = (self.model_tester.image_size if isinstance( self.model_tester.image_size, collections.abc.Iterable) else (self.model_tester.image_size, self.model_tester.image_size)) patch_size = (config.patch_size if isinstance( config.patch_size, collections.abc.Iterable) else (config.patch_size, config.patch_size)) padded_height = image_size[0] + patch_size[0] - (image_size[0] % patch_size[0]) padded_width = image_size[1] + patch_size[1] - (image_size[1] % patch_size[1]) for model_class in self.all_model_classes: inputs_dict["output_hidden_states"] = True self.check_hidden_states_output(inputs_dict, config, model_class, (padded_height, padded_width)) # check that output_hidden_states also work using config del inputs_dict["output_hidden_states"] config.output_hidden_states = True self.check_hidden_states_output(inputs_dict, config, model_class, (padded_height, padded_width)) @slow def test_model_from_pretrained(self): for model_name in DONUT_SWIN_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: model = DonutSwinModel.from_pretrained(model_name) self.assertIsNotNone(model) def test_initialization(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common( ) configs_no_init = _config_zero_init(config) for model_class in self.all_model_classes: model = model_class(config=configs_no_init) for name, param in model.named_parameters(): if "embeddings" not in name and param.requires_grad: self.assertIn( ((param.data.mean() * 1e9).round() / 1e9).item(), [0.0, 1.0], msg= f"Parameter {name} of model {model_class} seems not properly initialized", ) def _create_and_check_torch_fx_tracing(self, config, inputs_dict, output_loss=False): if not is_torch_fx_available() or not self.fx_compatible: return configs_no_init = _config_zero_init( config) # To be sure we have no Nan configs_no_init.return_dict = False for model_class in self.all_model_classes: model = model_class(config=configs_no_init) model.to(torch_device) model.eval() inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=output_loss) try: if model.config.is_encoder_decoder: model.config.use_cache = False # FSTM still requires this hack -> FSTM should probably be refactored similar to BART afterward labels = inputs.get("labels", None) input_names = [ "input_ids", "attention_mask", "decoder_input_ids", "decoder_attention_mask" ] if labels is not None: input_names.append("labels") filtered_inputs = { k: v for (k, v) in inputs.items() if k in input_names } input_names = list(filtered_inputs.keys()) model_output = model(**filtered_inputs) traced_model = symbolic_trace(model, input_names) traced_output = traced_model(**filtered_inputs) else: input_names = [ "input_ids", "attention_mask", "token_type_ids", "pixel_values" ] labels = inputs.get("labels", None) start_positions = inputs.get("start_positions", None) end_positions = inputs.get("end_positions", None) if labels is not None: input_names.append("labels") if start_positions is not None: input_names.append("start_positions") if end_positions is not None: input_names.append("end_positions") filtered_inputs = { k: v for (k, v) in inputs.items() if k in input_names } input_names = list(filtered_inputs.keys()) model_output = model(**filtered_inputs) traced_model = symbolic_trace(model, input_names) traced_output = traced_model(**filtered_inputs) except RuntimeError as e: self.fail(f"Couldn't trace module: {e}") def flatten_output(output): flatten = [] for x in output: if isinstance(x, (tuple, list)): flatten += flatten_output(x) elif not isinstance(x, torch.Tensor): continue else: flatten.append(x) return flatten model_output = flatten_output(model_output) traced_output = flatten_output(traced_output) num_outputs = len(model_output) for i in range(num_outputs): self.assertTrue( torch.allclose(model_output[i], traced_output[i]), f"traced {i}th output doesn't match model {i}th output for {model_class}", ) # Test that the model can be serialized and restored properly with tempfile.TemporaryDirectory() as tmp_dir_name: pkl_file_name = os.path.join(tmp_dir_name, "model.pkl") try: with open(pkl_file_name, "wb") as f: pickle.dump(traced_model, f) with open(pkl_file_name, "rb") as f: loaded = pickle.load(f) except Exception as e: self.fail( f"Couldn't serialize / deserialize the traced model: {e}" ) loaded_output = loaded(**filtered_inputs) loaded_output = flatten_output(loaded_output) for i in range(num_outputs): self.assertTrue( torch.allclose(model_output[i], loaded_output[i]), f"serialized model {i}th output doesn't match model {i}th output for {model_class}", )
class VanModelTest(ModelTesterMixin, unittest.TestCase): """ Here we also overwrite some of the tests of test_modeling_common.py, as Van does not use input_ids, inputs_embeds, attention_mask and seq_length. """ all_model_classes = (VanModel, VanForImageClassification) if is_torch_available() else () test_pruning = False test_torchscript = False test_resize_embeddings = False test_head_masking = False has_attentions = False def setUp(self): self.model_tester = VanModelTester(self) self.config_tester = ConfigTester(self, config_class=VanConfig, has_text_modality=False, hidden_size=37) def test_config(self): self.create_and_test_config_common_properties() self.config_tester.create_and_test_config_to_json_string() self.config_tester.create_and_test_config_to_json_file() self.config_tester.create_and_test_config_from_and_save_pretrained() self.config_tester.create_and_test_config_with_num_labels() self.config_tester.check_config_can_be_init_without_params() self.config_tester.check_config_arguments_init() def create_and_test_config_common_properties(self): return @unittest.skip(reason="Van does not use inputs_embeds") def test_inputs_embeds(self): pass @unittest.skip(reason="Van does not support input and output embeddings") def test_model_common_attributes(self): pass def test_forward_signature(self): config, _ = self.model_tester.prepare_config_and_inputs_for_common() for model_class in self.all_model_classes: model = model_class(config) signature = inspect.signature(model.forward) # signature.parameters is an OrderedDict => so arg_names order is deterministic arg_names = [*signature.parameters.keys()] expected_arg_names = ["pixel_values"] self.assertListEqual(arg_names[:1], expected_arg_names) def test_model(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_model(*config_and_inputs) @require_scipy def test_initialization(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() configs_no_init = _config_zero_init(config) for model_class in self.all_model_classes: model = model_class(config=configs_no_init) for name, module in model.named_modules(): if isinstance(module, (nn.BatchNorm2d, nn.GroupNorm, nn.LayerNorm)): self.assertTrue( torch.all(module.weight == 1), msg=f"Parameter {name} of model {model_class} seems not properly initialized", ) self.assertTrue( torch.all(module.bias == 0), msg=f"Parameter {name} of model {model_class} seems not properly initialized", ) elif isinstance(module, nn.Conv2d): fan_out = module.kernel_size[0] * module.kernel_size[1] * module.out_channels fan_out //= module.groups std = math.sqrt(2.0 / fan_out) # divide by std -> mean = 0, std = 1 data = module.weight.data.cpu().flatten().numpy() / std test = stats.anderson(data) self.assertTrue(test.statistic > 0.05) def test_hidden_states_output(self): def check_hidden_states_output(inputs_dict, config, model_class): model = model_class(config) model.to(torch_device) model.eval() with torch.no_grad(): outputs = model(**self._prepare_for_class(inputs_dict, model_class)) hidden_states = outputs.encoder_hidden_states if config.is_encoder_decoder else outputs.hidden_states expected_num_stages = len(self.model_tester.hidden_sizes) # van has no embeddings self.assertEqual(len(hidden_states), expected_num_stages) # Van's feature maps are of shape (batch_size, num_channels, height, width) self.assertListEqual( list(hidden_states[0].shape[-2:]), [self.model_tester.image_size // 4, self.model_tester.image_size // 4], ) config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() for model_class in self.all_model_classes: inputs_dict["output_hidden_states"] = True check_hidden_states_output(inputs_dict, config, model_class) # check that output_hidden_states also work using config del inputs_dict["output_hidden_states"] config.output_hidden_states = True check_hidden_states_output(inputs_dict, config, model_class) def test_for_image_classification(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_for_image_classification(*config_and_inputs) @slow def test_model_from_pretrained(self): for model_name in VAN_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: model = VanModel.from_pretrained(model_name) self.assertIsNotNone(model)
import inspect import math import tempfile import unittest import numpy as np from transformers import ViTMAEConfig from transformers.testing_utils import require_torch, require_vision, slow, torch_device from transformers.utils import cached_property, is_torch_available, is_vision_available from ...test_configuration_common import ConfigTester from ...test_modeling_common import ModelTesterMixin, floats_tensor, ids_tensor if is_torch_available(): import torch from torch import nn from transformers import ViTMAEForPreTraining, ViTMAEModel from transformers.models.vit.modeling_vit import VIT_PRETRAINED_MODEL_ARCHIVE_LIST if is_vision_available(): from PIL import Image from transformers import ViTFeatureExtractor class ViTMAEModelTester: def __init__( self,
class PerceiverModelTest(ModelTesterMixin, unittest.TestCase): all_model_classes = (( PerceiverModel, PerceiverForMaskedLM, PerceiverForImageClassificationLearned, PerceiverForImageClassificationConvProcessing, PerceiverForImageClassificationFourier, PerceiverForOpticalFlow, PerceiverForMultimodalAutoencoding, PerceiverForSequenceClassification, ) if is_torch_available() else ()) test_pruning = False test_head_masking = False test_torchscript = False maxDiff = None def setUp(self): self.model_tester = PerceiverModelTester(self) self.config_tester = ConfigTester(self, config_class=PerceiverConfig, hidden_size=37) def _prepare_for_class(self, inputs_dict, model_class, return_labels=False): inputs_dict = copy.deepcopy(inputs_dict) if model_class.__name__ == "PerceiverForMultimodalAutoencoding": inputs_dict[ "subsampled_output_points"] = self.model_tester.subsampling if return_labels: if model_class in [ *get_values(MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING), *get_values(MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING), ]: inputs_dict["labels"] = torch.zeros( self.model_tester.batch_size, dtype=torch.long, device=torch_device) elif model_class in [ *get_values(MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING), *get_values(MODEL_FOR_MASKED_LM_MAPPING), ]: inputs_dict["labels"] = torch.zeros( (self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device) return inputs_dict def test_config(self): # we don't test common_properties and arguments_init as these don't apply for Perceiver self.config_tester.create_and_test_config_to_json_string() self.config_tester.create_and_test_config_to_json_file() self.config_tester.create_and_test_config_from_and_save_pretrained() self.config_tester.create_and_test_config_with_num_labels() self.config_tester.check_config_can_be_init_without_params() def test_for_masked_lm(self): config_and_inputs = self.model_tester.prepare_config_and_inputs( model_class=PerceiverForMaskedLM) self.model_tester.create_and_check_for_masked_lm(*config_and_inputs) def test_for_sequence_classification(self): config_and_inputs = self.model_tester.prepare_config_and_inputs( model_class=PerceiverForSequenceClassification) self.model_tester.create_and_check_for_sequence_classification( *config_and_inputs) def test_for_image_classification_learned(self): config_and_inputs = self.model_tester.prepare_config_and_inputs( model_class=PerceiverForImageClassificationLearned) self.model_tester.create_and_check_for_image_classification_learned( *config_and_inputs) def test_for_image_classification_fourier(self): config_and_inputs = self.model_tester.prepare_config_and_inputs( model_class=PerceiverForImageClassificationFourier) self.model_tester.create_and_check_for_image_classification_fourier( *config_and_inputs) def test_for_image_classification_conv(self): config_and_inputs = self.model_tester.prepare_config_and_inputs( model_class=PerceiverForImageClassificationConvProcessing) self.model_tester.create_and_check_for_image_classification_conv( *config_and_inputs) def test_model_common_attributes(self): for model_class in self.all_model_classes: config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_model_class( model_class) model = model_class(config) # we overwrite this, as the embeddings of Perceiver are an instance of nn.Parameter # and Perceiver doesn't support get_output_embeddings self.assertIsInstance(model.get_input_embeddings(), (nn.Parameter)) def test_training(self): if not self.model_tester.is_training: return for model_class in self.all_model_classes: if model_class in [ *get_values(MODEL_MAPPING), PerceiverForOpticalFlow, PerceiverForMultimodalAutoencoding, ]: continue config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_model_class( model_class) config.return_dict = True model = model_class(config) model.to(torch_device) model.train() inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True) loss = model(**inputs).loss loss.backward() def test_forward_signature(self): for model_class in self.all_model_classes: config, _ = self.model_tester.prepare_config_and_inputs_for_model_class( model_class) model = model_class(config) signature = inspect.signature(model.forward) # signature.parameters is an OrderedDict => so arg_names order is deterministic arg_names = [*signature.parameters.keys()] expected_arg_names = ["inputs"] self.assertListEqual(arg_names[:1], expected_arg_names) def test_determinism(self): for model_class in self.all_model_classes: config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_model_class( model_class) model = model_class(config) model.to(torch_device) model.eval() with torch.no_grad(): inputs_dict = self._prepare_for_class(inputs_dict, model_class) first = model(**inputs_dict)[0] second = model(**inputs_dict)[0] if model_class.__name__ == "PerceiverForMultimodalAutoencoding": # model outputs a dictionary with logits per modality, let's verify each modality for modality in first.keys(): out_1 = first[modality].cpu().numpy() out_2 = second[modality].cpu().numpy() out_1 = out_1[~np.isnan(out_1)] out_2 = out_2[~np.isnan(out_2)] max_diff = np.amax(np.abs(out_1 - out_2)) self.assertLessEqual(max_diff, 1e-5) else: out_1 = first.cpu().numpy() out_2 = second.cpu().numpy() out_1 = out_1[~np.isnan(out_1)] out_2 = out_2[~np.isnan(out_2)] max_diff = np.amax(np.abs(out_1 - out_2)) self.assertLessEqual(max_diff, 1e-5) def test_attention_outputs(self): seq_len = getattr(self.model_tester, "num_latents", None) for model_class in self.all_model_classes: config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_model_class( model_class) config.return_dict = True inputs_dict["output_attentions"] = True inputs_dict["output_hidden_states"] = False config.return_dict = True model = model_class(config) model.to(torch_device) model.eval() with torch.no_grad(): outputs = model( **self._prepare_for_class(inputs_dict, model_class)) self_attentions = outputs.attentions cross_attentions = outputs.cross_attentions # check expected number of attentions depending on model class expected_num_self_attentions = self.model_tester.num_blocks * self.model_tester.num_self_attends_per_block if model.__class__.__name__ == "PerceiverModel": # we expect to have 2 cross-attentions, namely one in the PerceiverEncoder, and one in PerceiverBasicDecoder expected_num_cross_attentions = 1 else: # we expect to have 2 cross-attentions, namely one in the PerceiverEncoder, and one in PerceiverBasicDecoder expected_num_cross_attentions = 2 self.assertEqual(len(self_attentions), expected_num_self_attentions) self.assertEqual(len(cross_attentions), expected_num_cross_attentions) # check that output_attentions also work using config del inputs_dict["output_attentions"] config.output_attentions = True model = model_class(config) model.to(torch_device) model.eval() with torch.no_grad(): outputs = model( **self._prepare_for_class(inputs_dict, model_class)) self_attentions = outputs.attentions cross_attentions = outputs.cross_attentions self.assertEqual(len(self_attentions), expected_num_self_attentions) self.assertEqual(len(cross_attentions), expected_num_cross_attentions) self.assertListEqual( list(self_attentions[0].shape[-3:]), [self.model_tester.num_self_attention_heads, seq_len, seq_len], ) out_len = len(outputs) # Check attention is always last and order is fine inputs_dict["output_attentions"] = True inputs_dict["output_hidden_states"] = True model = model_class(config) model.to(torch_device) model.eval() with torch.no_grad(): outputs = model( **self._prepare_for_class(inputs_dict, model_class)) self.assertEqual(out_len + 1, len(outputs)) self_attentions = outputs.attentions self.assertEqual(len(self_attentions), expected_num_self_attentions) self.assertListEqual( list(self_attentions[0].shape[-3:]), [self.model_tester.num_self_attention_heads, seq_len, seq_len], ) def test_hidden_states_output(self): def check_hidden_states_output(inputs_dict, config, model_class): model = model_class(config) model.to(torch_device) model.eval() with torch.no_grad(): outputs = model( **self._prepare_for_class(inputs_dict, model_class)) hidden_states = outputs.hidden_states expected_num_layers = self.model_tester.num_blocks * self.model_tester.num_self_attends_per_block + 1 self.assertEqual(len(hidden_states), expected_num_layers) seq_length = self.model_tester.num_latents self.assertListEqual( list(hidden_states[0].shape[-2:]), [seq_length, self.model_tester.d_latents], ) for model_class in self.all_model_classes: config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_model_class( model_class) inputs_dict["output_hidden_states"] = True check_hidden_states_output(inputs_dict, config, model_class) # check that output_hidden_states also work using config del inputs_dict["output_hidden_states"] config.output_hidden_states = True check_hidden_states_output(inputs_dict, config, model_class) def test_model_outputs_equivalence(self): def set_nan_tensor_to_zero(t): t[t != t] = 0 return t def check_equivalence(model, tuple_inputs, dict_inputs, additional_kwargs={}): with torch.no_grad(): tuple_output = model(**tuple_inputs, return_dict=False, **additional_kwargs) dict_output = model(**dict_inputs, return_dict=True, **additional_kwargs).to_tuple() def recursive_check(tuple_object, dict_object): if isinstance(tuple_object, (List, Tuple)): for tuple_iterable_value, dict_iterable_value in zip( tuple_object, dict_object): recursive_check(tuple_iterable_value, dict_iterable_value) elif isinstance(tuple_object, Dict): for tuple_iterable_value, dict_iterable_value in zip( tuple_object.values(), dict_object.values()): recursive_check(tuple_iterable_value, dict_iterable_value) elif tuple_object is None: return else: self.assertTrue( torch.allclose( set_nan_tensor_to_zero(tuple_object), set_nan_tensor_to_zero(dict_object), atol=1e-5), msg= f"Tuple and dict output are not equal. Difference: {torch.max(torch.abs(tuple_object - dict_object))}. " f"Tuple has `nan`: {torch.isnan(tuple_object).any()} and `inf`: {torch.isinf(tuple_object)}. " f"Dict has `nan`: {torch.isnan(dict_object).any()} and `inf`: {torch.isinf(dict_object)}.", ) recursive_check(tuple_output, dict_output) for model_class in self.all_model_classes: config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_model_class( model_class) model = model_class(config) model.to(torch_device) model.eval() tuple_inputs = self._prepare_for_class(inputs_dict, model_class) dict_inputs = self._prepare_for_class(inputs_dict, model_class) check_equivalence(model, tuple_inputs, dict_inputs) if model_class.__name__ not in [ "PerceiverForOpticalFlow", "PerceiverForMultimodalAutoencoding" ]: # optical flow + multimodal models don't support training for now tuple_inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True) dict_inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True) check_equivalence(model, tuple_inputs, dict_inputs) tuple_inputs = self._prepare_for_class(inputs_dict, model_class) dict_inputs = self._prepare_for_class(inputs_dict, model_class) check_equivalence(model, tuple_inputs, dict_inputs, {"output_hidden_states": True}) tuple_inputs = self._prepare_for_class(inputs_dict, model_class) dict_inputs = self._prepare_for_class(inputs_dict, model_class) check_equivalence(model, tuple_inputs, dict_inputs, {"output_attentions": True}) if model_class.__name__ not in [ "PerceiverForOpticalFlow", "PerceiverForMultimodalAutoencoding" ]: # optical flow + multimodal models don't support training for now tuple_inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True) dict_inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True) check_equivalence(model, tuple_inputs, dict_inputs, {"output_hidden_states": True}) if model_class.__name__ not in [ "PerceiverForOpticalFlow", "PerceiverForMultimodalAutoencoding" ]: # optical flow + multimodal models don't support training for now tuple_inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True) dict_inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True) check_equivalence(model, tuple_inputs, dict_inputs, {"output_attentions": True}) if model_class.__name__ not in [ "PerceiverForOpticalFlow", "PerceiverForMultimodalAutoencoding" ]: # optical flow + multimodal models don't support training for now tuple_inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True) dict_inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True) check_equivalence(model, tuple_inputs, dict_inputs, { "output_hidden_states": True, "output_attentions": True }) def test_retain_grad_hidden_states_attentions(self): # no need to test all models as different heads yield the same functionality model_class = PerceiverForMaskedLM config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_model_class( model_class) config.output_hidden_states = True config.output_attentions = True model = model_class(config) model.to(torch_device) inputs = self._prepare_for_class(inputs_dict, model_class) outputs = model(**inputs) output = outputs[0] # Encoder-only model hidden_states = outputs.hidden_states[0] attentions = outputs.attentions[0] hidden_states.retain_grad() attentions.retain_grad() output.flatten()[0].backward(retain_graph=True) self.assertIsNotNone(hidden_states.grad) self.assertIsNotNone(attentions.grad) def test_feed_forward_chunking(self): for model_class in self.all_model_classes: original_config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_model_class( model_class) torch.manual_seed(0) config = copy.deepcopy(original_config) model = model_class(config) model.to(torch_device) model.eval() hidden_states_no_chunk = model( **self._prepare_for_class(inputs_dict, model_class))[0] torch.manual_seed(0) config.chunk_size_feed_forward = 1 model = model_class(config) model.to(torch_device) model.eval() hidden_states_with_chunk = model( **self._prepare_for_class(inputs_dict, model_class))[0] if model_class.__name__ == "PerceiverForMultimodalAutoencoding": # model outputs a dictionary with logits for each modality for modality in hidden_states_no_chunk.keys(): self.assertTrue( torch.allclose(hidden_states_no_chunk[modality], hidden_states_with_chunk[modality], atol=1e-3)) else: self.assertTrue( torch.allclose(hidden_states_no_chunk, hidden_states_with_chunk, atol=1e-3)) def test_save_load(self): for model_class in self.all_model_classes: config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_model_class( model_class) model = model_class(config) model.to(torch_device) model.eval() with torch.no_grad(): outputs = model( **self._prepare_for_class(inputs_dict, model_class)) if model_class.__name__ == "PerceiverForMultimodalAutoencoding": for modality in outputs[0].keys(): out_2 = outputs[0][modality].cpu().numpy() out_2[np.isnan(out_2)] = 0 with tempfile.TemporaryDirectory() as tmpdirname: model.save_pretrained(tmpdirname) model = model_class.from_pretrained(tmpdirname) model.to(torch_device) with torch.no_grad(): after_outputs = model(**self._prepare_for_class( inputs_dict, model_class)) # Make sure we don't have nans out_1 = after_outputs[0][modality].cpu().numpy() out_1[np.isnan(out_1)] = 0 max_diff = np.amax(np.abs(out_1 - out_2)) self.assertLessEqual(max_diff, 1e-5) else: out_2 = outputs[0].cpu().numpy() out_2[np.isnan(out_2)] = 0 with tempfile.TemporaryDirectory() as tmpdirname: model.save_pretrained(tmpdirname) model = model_class.from_pretrained(tmpdirname) model.to(torch_device) with torch.no_grad(): after_outputs = model(**self._prepare_for_class( inputs_dict, model_class)) # Make sure we don't have nans out_1 = after_outputs[0].cpu().numpy() out_1[np.isnan(out_1)] = 0 max_diff = np.amax(np.abs(out_1 - out_2)) self.assertLessEqual(max_diff, 1e-5) def test_correct_missing_keys(self): if not self.test_missing_keys: return config, _ = self.model_tester.prepare_config_and_inputs_for_common() for model_class in self.all_model_classes: # most Perceiver models don't have a typical head like is the case with BERT if model_class in [ PerceiverForOpticalFlow, PerceiverForMultimodalAutoencoding, *get_values(MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING), *get_values(MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING), ]: continue model = model_class(config) base_model_prefix = model.base_model_prefix if hasattr(model, base_model_prefix): with tempfile.TemporaryDirectory() as temp_dir_name: model.base_model.save_pretrained(temp_dir_name) model, loading_info = model_class.from_pretrained( temp_dir_name, output_loading_info=True) with self.subTest( msg=f"Missing keys for {model.__class__.__name__}" ): self.assertGreater(len(loading_info["missing_keys"]), 0) def test_problem_types(self): problem_types = [ { "title": "multi_label_classification", "num_labels": 2, "dtype": torch.float }, { "title": "single_label_classification", "num_labels": 1, "dtype": torch.long }, { "title": "regression", "num_labels": 1, "dtype": torch.float }, ] for model_class in self.all_model_classes: if model_class not in get_values( MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING): continue config, inputs, input_mask, _, _ = self.model_tester.prepare_config_and_inputs( model_class=model_class) inputs_dict = dict(inputs=inputs, attention_mask=input_mask) for problem_type in problem_types: with self.subTest( msg= f"Testing {model_class} with {problem_type['title']}"): config.problem_type = problem_type["title"] config.num_labels = problem_type["num_labels"] model = model_class(config) model.to(torch_device) model.train() inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True) if problem_type["num_labels"] > 1: inputs["labels"] = inputs["labels"].unsqueeze( 1).repeat(1, problem_type["num_labels"]) inputs["labels"] = inputs["labels"].to( problem_type["dtype"]) # This tests that we do not trigger the warning form PyTorch "Using a target size that is different # to the input size. This will likely lead to incorrect results due to broadcasting. Please ensure # they have the same size." which is a symptom something in wrong for the regression problem. # See https://github.com/huggingface/transformers/issues/11780 with warnings.catch_warnings(record=True) as warning_list: loss = model(**inputs).loss for w in warning_list: if "Using a target size that is different to the input size" in str( w.message): raise ValueError( f"Something is going wrong in the regression problem: intercepted {w.message}" ) loss.backward() @require_torch_multi_gpu @unittest.skip( reason= "Perceiver does not work with data parallel (DP) because of a bug in PyTorch: https://github.com/pytorch/pytorch/issues/36035" ) def test_multi_gpu_data_parallel_forward(self): pass @unittest.skip( reason= "Perceiver models don't have a typical head like is the case with BERT" ) def test_save_load_fast_init_from_base(self): pass @unittest.skip( reason= "Perceiver models don't have a typical head like is the case with BERT" ) def test_save_load_fast_init_to_base(self): pass @unittest.skip(reason="Perceiver doesn't support resize_token_embeddings") def test_resize_tokens_embeddings(self): pass @unittest.skip(reason="Perceiver doesn't support resize_token_embeddings") def test_resize_embeddings_untied(self): pass @unittest.skip(reason="Perceiver doesn't support inputs_embeds") def test_inputs_embeds(self): pass @unittest.skip(reason="Perceiver doesn't support the AutoModel API") def test_load_with_mismatched_shapes(self): pass @slow def test_model_from_pretrained(self): for model_name in PERCEIVER_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: model = PerceiverModel.from_pretrained(model_name) self.assertIsNotNone(model)
class SwinModelTest(ModelTesterMixin, unittest.TestCase): all_model_classes = (( SwinModel, SwinForImageClassification, SwinForMaskedImageModeling, ) if is_torch_available() else ()) fx_compatible = True test_pruning = False test_resize_embeddings = False test_head_masking = False def setUp(self): self.model_tester = SwinModelTester(self) self.config_tester = ConfigTester(self, config_class=SwinConfig, embed_dim=37) def test_config(self): self.create_and_test_config_common_properties() self.config_tester.create_and_test_config_to_json_string() self.config_tester.create_and_test_config_to_json_file() self.config_tester.create_and_test_config_from_and_save_pretrained() self.config_tester.create_and_test_config_with_num_labels() self.config_tester.check_config_can_be_init_without_params() self.config_tester.check_config_arguments_init() def create_and_test_config_common_properties(self): return def test_model(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_model(*config_and_inputs) def test_inputs_embeds(self): # Swin does not use inputs_embeds pass def test_model_common_attributes(self): config, _ = self.model_tester.prepare_config_and_inputs_for_common() for model_class in self.all_model_classes: model = model_class(config) self.assertIsInstance(model.get_input_embeddings(), (nn.Module)) x = model.get_output_embeddings() self.assertTrue(x is None or isinstance(x, nn.Linear)) def test_forward_signature(self): config, _ = self.model_tester.prepare_config_and_inputs_for_common() for model_class in self.all_model_classes: model = model_class(config) signature = inspect.signature(model.forward) # signature.parameters is an OrderedDict => so arg_names order is deterministic arg_names = [*signature.parameters.keys()] expected_arg_names = ["pixel_values"] self.assertListEqual(arg_names[:1], expected_arg_names) def test_attention_outputs(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common( ) config.return_dict = True for model_class in self.all_model_classes: inputs_dict["output_attentions"] = True inputs_dict["output_hidden_states"] = False config.return_dict = True model = model_class(config) model.to(torch_device) model.eval() with torch.no_grad(): outputs = model( **self._prepare_for_class(inputs_dict, model_class)) attentions = outputs.attentions expected_num_attentions = len(self.model_tester.depths) self.assertEqual(len(attentions), expected_num_attentions) # check that output_attentions also work using config del inputs_dict["output_attentions"] config.output_attentions = True window_size_squared = config.window_size**2 model = model_class(config) model.to(torch_device) model.eval() with torch.no_grad(): outputs = model( **self._prepare_for_class(inputs_dict, model_class)) attentions = outputs.attentions self.assertEqual(len(attentions), expected_num_attentions) self.assertListEqual( list(attentions[0].shape[-3:]), [ self.model_tester.num_heads[0], window_size_squared, window_size_squared ], ) out_len = len(outputs) # Check attention is always last and order is fine inputs_dict["output_attentions"] = True inputs_dict["output_hidden_states"] = True model = model_class(config) model.to(torch_device) model.eval() with torch.no_grad(): outputs = model( **self._prepare_for_class(inputs_dict, model_class)) if hasattr(self.model_tester, "num_hidden_states_types"): added_hidden_states = self.model_tester.num_hidden_states_types else: # also another +1 for reshaped_hidden_states added_hidden_states = 2 self.assertEqual(out_len + added_hidden_states, len(outputs)) self_attentions = outputs.attentions self.assertEqual(len(self_attentions), expected_num_attentions) self.assertListEqual( list(self_attentions[0].shape[-3:]), [ self.model_tester.num_heads[0], window_size_squared, window_size_squared ], ) def check_hidden_states_output(self, inputs_dict, config, model_class, image_size): model = model_class(config) model.to(torch_device) model.eval() with torch.no_grad(): outputs = model( **self._prepare_for_class(inputs_dict, model_class)) hidden_states = outputs.hidden_states expected_num_layers = getattr(self.model_tester, "expected_num_hidden_layers", len(self.model_tester.depths) + 1) self.assertEqual(len(hidden_states), expected_num_layers) # Swin has a different seq_length patch_size = to_2tuple(config.patch_size) num_patches = (image_size[1] // patch_size[1]) * (image_size[0] // patch_size[0]) self.assertListEqual( list(hidden_states[0].shape[-2:]), [num_patches, self.model_tester.embed_dim], ) reshaped_hidden_states = outputs.reshaped_hidden_states self.assertEqual(len(reshaped_hidden_states), expected_num_layers) batch_size, num_channels, height, width = reshaped_hidden_states[ 0].shape reshaped_hidden_states = (reshaped_hidden_states[0].view( batch_size, num_channels, height * width).permute(0, 2, 1)) self.assertListEqual( list(reshaped_hidden_states.shape[-2:]), [num_patches, self.model_tester.embed_dim], ) def test_hidden_states_output(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common( ) image_size = to_2tuple(self.model_tester.image_size) for model_class in self.all_model_classes: inputs_dict["output_hidden_states"] = True self.check_hidden_states_output(inputs_dict, config, model_class, image_size) # check that output_hidden_states also work using config del inputs_dict["output_hidden_states"] config.output_hidden_states = True self.check_hidden_states_output(inputs_dict, config, model_class, image_size) def test_hidden_states_output_with_padding(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common( ) config.patch_size = 3 image_size = to_2tuple(self.model_tester.image_size) patch_size = to_2tuple(config.patch_size) padded_height = image_size[0] + patch_size[0] - (image_size[0] % patch_size[0]) padded_width = image_size[1] + patch_size[1] - (image_size[1] % patch_size[1]) for model_class in self.all_model_classes: inputs_dict["output_hidden_states"] = True self.check_hidden_states_output(inputs_dict, config, model_class, (padded_height, padded_width)) # check that output_hidden_states also work using config del inputs_dict["output_hidden_states"] config.output_hidden_states = True self.check_hidden_states_output(inputs_dict, config, model_class, (padded_height, padded_width)) def test_for_image_classification(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_for_image_classification( *config_and_inputs) @slow def test_model_from_pretrained(self): for model_name in SWIN_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: model = SwinModel.from_pretrained(model_name) self.assertIsNotNone(model) def test_initialization(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common( ) configs_no_init = _config_zero_init(config) for model_class in self.all_model_classes: model = model_class(config=configs_no_init) for name, param in model.named_parameters(): if "embeddings" not in name and param.requires_grad: self.assertIn( ((param.data.mean() * 1e9).round() / 1e9).item(), [0.0, 1.0], msg= f"Parameter {name} of model {model_class} seems not properly initialized", )
class ViTMAEModelTest(ModelTesterMixin, unittest.TestCase): """ Here we also overwrite some of the tests of test_modeling_common.py, as ViTMAE does not use input_ids, inputs_embeds, attention_mask and seq_length. """ all_model_classes = (ViTMAEModel, ViTMAEForPreTraining) if is_torch_available() else () test_pruning = False test_torchscript = False test_resize_embeddings = False test_head_masking = False def setUp(self): self.model_tester = ViTMAEModelTester(self) self.config_tester = ConfigTester(self, config_class=ViTMAEConfig, has_text_modality=False, hidden_size=37) def test_config(self): self.config_tester.run_common_tests() @unittest.skip(reason="ViTMAE does not use inputs_embeds") def test_inputs_embeds(self): pass def test_model_common_attributes(self): config, _ = self.model_tester.prepare_config_and_inputs_for_common() for model_class in self.all_model_classes: model = model_class(config) self.assertIsInstance(model.get_input_embeddings(), (nn.Module)) x = model.get_output_embeddings() self.assertTrue(x is None or isinstance(x, nn.Linear)) def test_forward_signature(self): config, _ = self.model_tester.prepare_config_and_inputs_for_common() for model_class in self.all_model_classes: model = model_class(config) signature = inspect.signature(model.forward) # signature.parameters is an OrderedDict => so arg_names order is deterministic arg_names = [*signature.parameters.keys()] expected_arg_names = ["pixel_values"] self.assertListEqual(arg_names[:1], expected_arg_names) def test_model(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_model(*config_and_inputs) def test_for_pretraining(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_for_pretraining(*config_and_inputs) # overwrite from common since ViTMAEForPretraining has random masking, we need to fix the noise # to generate masks during test def check_pt_tf_models(self, tf_model, pt_model, pt_inputs_dict): # make masks reproducible np.random.seed(2) num_patches = int( (pt_model.config.image_size // pt_model.config.patch_size)**2) noise = np.random.uniform(size=(self.model_tester.batch_size, num_patches)) pt_noise = torch.from_numpy(noise) # Add `noise` argument. # PT inputs will be prepared in `super().check_pt_tf_models()` with this added `noise` argument pt_inputs_dict["noise"] = pt_noise super().check_pt_tf_models(tf_model, pt_model, pt_inputs_dict) def test_save_load(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common( ) for model_class in self.all_model_classes: model = model_class(config) model.to(torch_device) model.eval() # make random mask reproducible torch.manual_seed(2) with torch.no_grad(): outputs = model( **self._prepare_for_class(inputs_dict, model_class)) out_2 = outputs[0].cpu().numpy() out_2[np.isnan(out_2)] = 0 with tempfile.TemporaryDirectory() as tmpdirname: model.save_pretrained(tmpdirname) model = model_class.from_pretrained(tmpdirname) model.to(torch_device) # make random mask reproducible torch.manual_seed(2) with torch.no_grad(): after_outputs = model( **self._prepare_for_class(inputs_dict, model_class)) # Make sure we don't have nans out_1 = after_outputs[0].cpu().numpy() out_1[np.isnan(out_1)] = 0 max_diff = np.amax(np.abs(out_1 - out_2)) self.assertLessEqual(max_diff, 1e-5) @unittest.skip( reason= """ViTMAE returns a random mask + ids_restore in each forward pass. See test_save_load to get deterministic results.""") def test_determinism(self): pass @unittest.skip( reason= """ViTMAE returns a random mask + ids_restore in each forward pass. See test_save_load to get deterministic results.""") def test_save_load_fast_init_from_base(self): pass @unittest.skip( reason= """ViTMAE returns a random mask + ids_restore in each forward pass. See test_save_load to get deterministic results.""") def test_save_load_fast_init_to_base(self): pass @unittest.skip( reason= """ViTMAE returns a random mask + ids_restore in each forward pass. See test_save_load""" ) def test_model_outputs_equivalence(self): pass @slow def test_model_from_pretrained(self): for model_name in VIT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: model = ViTMAEModel.from_pretrained(model_name) self.assertIsNotNone(model)
class BeitModelTest(ModelTesterMixin, unittest.TestCase): """ Here we also overwrite some of the tests of test_modeling_common.py, as BEiT does not use input_ids, inputs_embeds, attention_mask and seq_length. """ all_model_classes = ((BeitModel, BeitForImageClassification, BeitForMaskedImageModeling, BeitForSemanticSegmentation) if is_torch_available() else ()) test_pruning = False test_resize_embeddings = False test_head_masking = False def setUp(self): self.model_tester = BeitModelTester(self) self.config_tester = ConfigTester(self, config_class=BeitConfig, has_text_modality=False, hidden_size=37) def test_config(self): self.config_tester.run_common_tests() @unittest.skip(reason="BEiT does not use inputs_embeds") def test_inputs_embeds(self): pass def test_model_common_attributes(self): config, _ = self.model_tester.prepare_config_and_inputs_for_common() for model_class in self.all_model_classes: model = model_class(config) self.assertIsInstance(model.get_input_embeddings(), (nn.Module)) x = model.get_output_embeddings() self.assertTrue(x is None or isinstance(x, nn.Linear)) def test_forward_signature(self): config, _ = self.model_tester.prepare_config_and_inputs_for_common() for model_class in self.all_model_classes: model = model_class(config) signature = inspect.signature(model.forward) # signature.parameters is an OrderedDict => so arg_names order is deterministic arg_names = [*signature.parameters.keys()] expected_arg_names = ["pixel_values"] self.assertListEqual(arg_names[:1], expected_arg_names) def test_model(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_model(*config_and_inputs) def test_for_masked_lm(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_for_masked_lm(*config_and_inputs) def test_for_image_classification(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_for_image_classification( *config_and_inputs) def test_for_semantic_segmentation(self): config_and_inputs = self.model_tester.prepare_config_and_inputs() self.model_tester.create_and_check_for_semantic_segmentation( *config_and_inputs) def test_training(self): if not self.model_tester.is_training: return config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common( ) config.return_dict = True for model_class in self.all_model_classes: # we don't test BeitForMaskedImageModeling if model_class in [ *get_values(MODEL_MAPPING), BeitForMaskedImageModeling ]: continue model = model_class(config) model.to(torch_device) model.train() inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True) loss = model(**inputs).loss loss.backward() def test_training_gradient_checkpointing(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common( ) if not self.model_tester.is_training: return config.use_cache = False config.return_dict = True for model_class in self.all_model_classes: # we don't test BeitForMaskedImageModeling if (model_class in [ *get_values(MODEL_MAPPING), BeitForMaskedImageModeling ] or not model_class.supports_gradient_checkpointing): continue model = model_class(config) model.gradient_checkpointing_enable() model.to(torch_device) model.train() inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True) loss = model(**inputs).loss loss.backward() def test_initialization(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common( ) configs_no_init = _config_zero_init(config) for model_class in self.all_model_classes: model = model_class(config=configs_no_init) for name, param in model.named_parameters(): # we skip lambda parameters as these require special initial values # determined by config.layer_scale_init_value if "lambda" in name: continue if param.requires_grad: self.assertIn( ((param.data.mean() * 1e9).round() / 1e9).item(), [0.0, 1.0], msg= f"Parameter {name} of model {model_class} seems not properly initialized", ) @slow def test_model_from_pretrained(self): for model_name in BEIT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]: model = BeitModel.from_pretrained(model_name) self.assertIsNotNone(model)