def test_register_pipeline(self):
        PIPELINE_REGISTRY.register_pipeline(
            "custom-text-classification",
            pipeline_class=PairClassificationPipeline,
            pt_model=AutoModelForSequenceClassification
            if is_torch_available() else None,
            tf_model=TFAutoModelForSequenceClassification
            if is_tf_available() else None,
            default={"pt": "hf-internal-testing/tiny-random-distilbert"},
            type="text",
        )
        assert "custom-text-classification" in PIPELINE_REGISTRY.get_supported_tasks(
        )

        _, task_def, _ = PIPELINE_REGISTRY.check_task(
            "custom-text-classification")
        self.assertEqual(
            task_def["pt"],
            (AutoModelForSequenceClassification, ) if is_torch_available() else
            ())
        self.assertEqual(
            task_def["tf"],
            (TFAutoModelForSequenceClassification, ) if is_tf_available() else
            ())
        self.assertEqual(task_def["type"], "text")
        self.assertEqual(task_def["impl"], PairClassificationPipeline)
        self.assertEqual(
            task_def["default"],
            {"model": {
                "pt": "hf-internal-testing/tiny-random-distilbert"
            }})

        # Clean registry for next tests.
        del PIPELINE_REGISTRY.supported_tasks["custom-text-classification"]
예제 #2
0
    def test_find_labels(self):
        if is_torch_available():
            from transformers import BertForPreTraining, BertForQuestionAnswering, BertForSequenceClassification

            self.assertEqual(find_labels(BertForSequenceClassification),
                             ["labels"])
            self.assertEqual(find_labels(BertForPreTraining),
                             ["labels", "next_sentence_label"])
            self.assertEqual(find_labels(BertForQuestionAnswering),
                             ["start_positions", "end_positions"])

        if is_tf_available():
            from transformers import TFBertForPreTraining, TFBertForQuestionAnswering, TFBertForSequenceClassification

            self.assertEqual(find_labels(TFBertForSequenceClassification),
                             ["labels"])
            self.assertEqual(find_labels(TFBertForPreTraining),
                             ["labels", "next_sentence_label"])
            self.assertEqual(find_labels(TFBertForQuestionAnswering),
                             ["start_positions", "end_positions"])

        if is_flax_available():
            # Flax models don't have labels
            from transformers import (
                FlaxBertForPreTraining,
                FlaxBertForQuestionAnswering,
                FlaxBertForSequenceClassification,
            )

            self.assertEqual(find_labels(FlaxBertForSequenceClassification),
                             [])
            self.assertEqual(find_labels(FlaxBertForPreTraining), [])
            self.assertEqual(find_labels(FlaxBertForQuestionAnswering), [])
def convert_pytorch(nlp: Pipeline, opset: int, output: Path,
                    use_external_format: bool):
    """
    Export a PyTorch backed pipeline to ONNX Intermediate Representation (IR

    Args:
        nlp: The pipeline to be exported
        opset: The actual version of the ONNX operator set to use
        output: Path where will be stored the generated ONNX model
        use_external_format: Split the model definition from its parameters to allow model bigger than 2GB

    Returns:

    """
    if not is_torch_available():
        raise Exception(
            "Cannot convert because PyTorch is not installed. Please install torch first."
        )

    import torch
    from torch.onnx import export

    from .pytorch_utils import is_torch_less_than_1_11

    print(f"Using framework PyTorch: {torch.__version__}")

    with torch.no_grad():
        input_names, output_names, dynamic_axes, tokens = infer_shapes(
            nlp, "pt")
        ordered_input_names, model_args = ensure_valid_input(
            nlp.model, tokens, input_names)

        # PyTorch deprecated the `enable_onnx_checker` and `use_external_data_format` arguments in v1.11,
        # so we check the torch version for backwards compatibility
        if is_torch_less_than_1_11:
            export(
                nlp.model,
                model_args,
                f=output.as_posix(),
                input_names=ordered_input_names,
                output_names=output_names,
                dynamic_axes=dynamic_axes,
                do_constant_folding=True,
                use_external_data_format=use_external_format,
                enable_onnx_checker=True,
                opset_version=opset,
            )
        else:
            export(
                nlp.model,
                model_args,
                f=output.as_posix(),
                input_names=ordered_input_names,
                output_names=output_names,
                dynamic_axes=dynamic_axes,
                do_constant_folding=True,
                opset_version=opset,
            )
예제 #4
0
def require_retrieval(test_case):
    """
    Decorator marking a test that requires a set of dependencies necessary for pefrorm retrieval with
    [`RagRetriever`].

    These tests are skipped when respective libraries are not installed.

    """
    if not (is_torch_available() and is_datasets_available() and is_faiss_available()):
        test_case = unittest.skip("test requires PyTorch, datasets and faiss")(test_case)
    return test_case
예제 #5
0
class OwlViTTextModelTest(ModelTesterMixin, unittest.TestCase):

    all_model_classes = (OwlViTTextModel, ) if is_torch_available() else ()
    fx_compatible = False
    test_pruning = False
    test_head_masking = False

    def setUp(self):
        self.model_tester = OwlViTTextModelTester(self)
        self.config_tester = ConfigTester(self,
                                          config_class=OwlViTTextConfig,
                                          hidden_size=37)

    def test_config(self):
        self.config_tester.run_common_tests()

    def test_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_model(*config_and_inputs)

    @unittest.skip(reason="OWL-ViT does not support training yet")
    def test_training(self):
        pass

    @unittest.skip(reason="OWL-ViT does not support training yet")
    def test_training_gradient_checkpointing(self):
        pass

    @unittest.skip(reason="OWLVIT does not use inputs_embeds")
    def test_inputs_embeds(self):
        pass

    @unittest.skip(
        reason=
        "OwlViTTextModel has no base class and is not available in MODEL_MAPPING"
    )
    def test_save_load_fast_init_from_base(self):
        pass

    @unittest.skip(
        reason=
        "OwlViTTextModel has no base class and is not available in MODEL_MAPPING"
    )
    def test_save_load_fast_init_to_base(self):
        pass

    @slow
    def test_model_from_pretrained(self):
        for model_name in OWLVIT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
            model = OwlViTTextModel.from_pretrained(model_name)
            self.assertIsNotNone(model)
예제 #6
0
class FlavaTextModelTest(ModelTesterMixin, unittest.TestCase):

    all_model_classes = (FlavaTextModel, ) if is_torch_available() else ()
    test_pruning = False
    test_head_masking = False
    test_torchscript = False

    def setUp(self):
        self.model_tester = FlavaTextModelTester(self)
        self.config_tester = ConfigTester(self,
                                          config_class=FlavaTextConfig,
                                          hidden_size=37)

    def test_config(self):
        self.config_tester.run_common_tests()

    def test_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_model(*config_and_inputs)

    def test_training(self):
        pass

    def test_training_gradient_checkpointing(self):
        pass

    def test_inputs_embeds(self):
        # FLAVA does not use inputs_embeds
        pass

    # skip this test as FlavaTextModel has no base class and is
    # not available in MODEL_MAPPING
    def test_save_load_fast_init_from_base(self):
        pass

    # skip this test as FlavaTextModel has no base class and is
    # not available in MODEL_MAPPING
    def test_save_load_fast_init_to_base(self):
        pass

    @slow
    def test_model_from_pretrained(self):
        for model_name in FLAVA_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
            model = FlavaTextModel.from_pretrained(model_name)
            self.assertIsNotNone(model)
def load_graph_from_args(pipeline_name: str,
                         framework: str,
                         model: str,
                         tokenizer: Optional[str] = None,
                         **models_kwargs) -> Pipeline:
    """
    Convert the set of arguments provided through the CLI to an actual pipeline reference (tokenizer + model

    Args:
        pipeline_name: The kind of pipeline to use (ner, question-answering, etc.)
        framework: The actual model to convert the pipeline from ("pt" or "tf")
        model: The model name which will be loaded by the pipeline
        tokenizer: The tokenizer name which will be loaded by the pipeline, default to the model's value

    Returns: Pipeline object

    """
    # If no tokenizer provided
    if tokenizer is None:
        tokenizer = model

    # Check the wanted framework is available
    if framework == "pt" and not is_torch_available():
        raise Exception(
            "Cannot convert because PyTorch is not installed. Please install torch first."
        )
    if framework == "tf" and not is_tf_available():
        raise Exception(
            "Cannot convert because TF is not installed. Please install tensorflow first."
        )

    print(f"Loading pipeline (model: {model}, tokenizer: {tokenizer})")

    # Allocate tokenizer and model
    return pipeline(pipeline_name,
                    model=model,
                    tokenizer=tokenizer,
                    framework=framework,
                    model_kwargs=models_kwargs)
예제 #8
0
class GroupViTModelTest(ModelTesterMixin, unittest.TestCase):
    all_model_classes = (GroupViTModel, ) if is_torch_available() else ()
    test_head_masking = False
    test_pruning = False
    test_resize_embeddings = False
    test_attention_outputs = False

    def setUp(self):
        self.model_tester = GroupViTModelTester(self)

    def test_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_model(*config_and_inputs)

    @unittest.skip(reason="hidden_states are tested in individual model tests")
    def test_hidden_states_output(self):
        pass

    @unittest.skip(reason="input_embeds are tested in individual model tests")
    def test_inputs_embeds(self):
        pass

    @unittest.skip(reason="tested in individual model tests")
    def test_retain_grad_hidden_states_attentions(self):
        pass

    @unittest.skip(reason="GroupViTModel does not have input/output embeddings"
                   )
    def test_model_common_attributes(self):
        pass

    # override as the `logit_scale` parameter initilization is different for GROUPVIT
    def test_initialization(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )

        configs_no_init = _config_zero_init(config)
        for model_class in self.all_model_classes:
            model = model_class(config=configs_no_init)
            for name, param in model.named_parameters():
                if param.requires_grad:
                    # check if `logit_scale` is initilized as per the original implementation
                    if name == "logit_scale":
                        self.assertAlmostEqual(
                            param.data.item(),
                            np.log(1 / 0.07),
                            delta=1e-3,
                            msg=
                            f"Parameter {name} of model {model_class} seems not properly initialized",
                        )
                    else:
                        self.assertIn(
                            ((param.data.mean() * 1e9).round() / 1e9).item(),
                            [0.0, 1.0],
                            msg=
                            f"Parameter {name} of model {model_class} seems not properly initialized",
                        )

    def _create_and_check_torchscript(self, config, inputs_dict):
        if not self.test_torchscript:
            return

        configs_no_init = _config_zero_init(
            config)  # To be sure we have no Nan
        configs_no_init.torchscript = True
        configs_no_init.return_dict = False
        for model_class in self.all_model_classes:
            model = model_class(config=configs_no_init)
            model.to(torch_device)
            model.eval()

            try:
                input_ids = inputs_dict["input_ids"]
                pixel_values = inputs_dict[
                    "pixel_values"]  # GROUPVIT needs pixel_values
                traced_model = torch.jit.trace(model,
                                               (input_ids, pixel_values))
            except RuntimeError:
                self.fail("Couldn't trace module.")

            with tempfile.TemporaryDirectory() as tmp_dir_name:
                pt_file_name = os.path.join(tmp_dir_name, "traced_model.pt")

                try:
                    torch.jit.save(traced_model, pt_file_name)
                except Exception:
                    self.fail("Couldn't save module.")

                try:
                    loaded_model = torch.jit.load(pt_file_name)
                except Exception:
                    self.fail("Couldn't load module.")

            model.to(torch_device)
            model.eval()

            loaded_model.to(torch_device)
            loaded_model.eval()

            model_state_dict = model.state_dict()
            loaded_model_state_dict = loaded_model.state_dict()

            self.assertEqual(set(model_state_dict.keys()),
                             set(loaded_model_state_dict.keys()))

            models_equal = True
            for layer_name, p1 in model_state_dict.items():
                p2 = loaded_model_state_dict[layer_name]
                if p1.data.ne(p2.data).sum() > 0:
                    models_equal = False

            self.assertTrue(models_equal)

    def test_load_vision_text_config(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )

        # Save GroupViTConfig and check if we can load GroupViTVisionConfig from it
        with tempfile.TemporaryDirectory() as tmp_dir_name:
            config.save_pretrained(tmp_dir_name)
            vision_config = GroupViTVisionConfig.from_pretrained(tmp_dir_name)
            self.assertDictEqual(config.vision_config.to_dict(),
                                 vision_config.to_dict())

        # Save GroupViTConfig and check if we can load GroupViTTextConfig from it
        with tempfile.TemporaryDirectory() as tmp_dir_name:
            config.save_pretrained(tmp_dir_name)
            text_config = GroupViTTextConfig.from_pretrained(tmp_dir_name)
            self.assertDictEqual(config.text_config.to_dict(),
                                 text_config.to_dict())

    @slow
    def test_model_from_pretrained(self):
        for model_name in GROUPVIT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
            model = GroupViTModel.from_pretrained(model_name)
            self.assertIsNotNone(model)
class ResNetModelTest(ModelTesterMixin, unittest.TestCase):
    """
    Here we also overwrite some of the tests of test_modeling_common.py, as ResNet does not use input_ids, inputs_embeds,
    attention_mask and seq_length.
    """

    all_model_classes = (
        ResNetModel,
        ResNetForImageClassification) if is_torch_available() else ()

    test_pruning = False
    test_resize_embeddings = False
    test_head_masking = False
    has_attentions = False

    def setUp(self):
        self.model_tester = ResNetModelTester(self)
        self.config_tester = ConfigTester(self,
                                          config_class=ResNetConfig,
                                          has_text_modality=False)

    def test_config(self):
        self.create_and_test_config_common_properties()
        self.config_tester.create_and_test_config_to_json_string()
        self.config_tester.create_and_test_config_to_json_file()
        self.config_tester.create_and_test_config_from_and_save_pretrained()
        self.config_tester.create_and_test_config_with_num_labels()
        self.config_tester.check_config_can_be_init_without_params()
        self.config_tester.check_config_arguments_init()

    def create_and_test_config_common_properties(self):
        return

    @unittest.skip(reason="ResNet does not output attentions")
    def test_attention_outputs(self):
        pass

    @unittest.skip(reason="ResNet does not use inputs_embeds")
    def test_inputs_embeds(self):
        pass

    @unittest.skip(reason="ResNet does not support input and output embeddings"
                   )
    def test_model_common_attributes(self):
        pass

    def test_forward_signature(self):
        config, _ = self.model_tester.prepare_config_and_inputs_for_common()

        for model_class in self.all_model_classes:
            model = model_class(config)
            signature = inspect.signature(model.forward)
            # signature.parameters is an OrderedDict => so arg_names order is deterministic
            arg_names = [*signature.parameters.keys()]

            expected_arg_names = ["pixel_values"]
            self.assertListEqual(arg_names[:1], expected_arg_names)

    def test_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_model(*config_and_inputs)

    def test_initialization(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )

        for model_class in self.all_model_classes:
            model = model_class(config=config)
            for name, module in model.named_modules():
                if isinstance(module, (nn.BatchNorm2d, nn.GroupNorm)):
                    self.assertTrue(
                        torch.all(module.weight == 1),
                        msg=
                        f"Parameter {name} of model {model_class} seems not properly initialized",
                    )
                    self.assertTrue(
                        torch.all(module.bias == 0),
                        msg=
                        f"Parameter {name} of model {model_class} seems not properly initialized",
                    )

    def test_hidden_states_output(self):
        def check_hidden_states_output(inputs_dict, config, model_class):
            model = model_class(config)
            model.to(torch_device)
            model.eval()

            with torch.no_grad():
                outputs = model(
                    **self._prepare_for_class(inputs_dict, model_class))

            hidden_states = outputs.encoder_hidden_states if config.is_encoder_decoder else outputs.hidden_states

            expected_num_stages = self.model_tester.num_stages
            self.assertEqual(len(hidden_states), expected_num_stages + 1)

            # ResNet's feature maps are of shape (batch_size, num_channels, height, width)
            self.assertListEqual(
                list(hidden_states[0].shape[-2:]),
                [
                    self.model_tester.image_size // 4,
                    self.model_tester.image_size // 4
                ],
            )

        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )
        layers_type = ["basic", "bottleneck"]
        for model_class in self.all_model_classes:
            for layer_type in layers_type:
                config.layer_type = layer_type
                inputs_dict["output_hidden_states"] = True
                check_hidden_states_output(inputs_dict, config, model_class)

                # check that output_hidden_states also work using config
                del inputs_dict["output_hidden_states"]
                config.output_hidden_states = True

                check_hidden_states_output(inputs_dict, config, model_class)

    def test_for_image_classification(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_image_classification(
            *config_and_inputs)

    @slow
    def test_model_from_pretrained(self):
        for model_name in RESNET_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
            model = ResNetModel.from_pretrained(model_name)
            self.assertIsNotNone(model)
예제 #10
0
class BeitModelTest(ModelTesterMixin, unittest.TestCase):
    """
    Here we also overwrite some of the tests of test_modeling_common.py, as BEiT does not use input_ids, inputs_embeds,
    attention_mask and seq_length.
    """

    all_model_classes = (
        (BeitModel, BeitForImageClassification, BeitForMaskedImageModeling, BeitForSemanticSegmentation)
        if is_torch_available()
        else ()
    )

    test_pruning = False
    test_resize_embeddings = False
    test_head_masking = False

    def setUp(self):
        self.model_tester = BeitModelTester(self)
        self.config_tester = ConfigTester(self, config_class=BeitConfig, has_text_modality=False, hidden_size=37)

    def test_config(self):
        self.config_tester.run_common_tests()

    def test_inputs_embeds(self):
        # BEiT does not use inputs_embeds
        pass

    def test_model_common_attributes(self):
        config, _ = self.model_tester.prepare_config_and_inputs_for_common()

        for model_class in self.all_model_classes:
            model = model_class(config)
            self.assertIsInstance(model.get_input_embeddings(), (nn.Module))
            x = model.get_output_embeddings()
            self.assertTrue(x is None or isinstance(x, nn.Linear))

    def test_forward_signature(self):
        config, _ = self.model_tester.prepare_config_and_inputs_for_common()

        for model_class in self.all_model_classes:
            model = model_class(config)
            signature = inspect.signature(model.forward)
            # signature.parameters is an OrderedDict => so arg_names order is deterministic
            arg_names = [*signature.parameters.keys()]

            expected_arg_names = ["pixel_values"]
            self.assertListEqual(arg_names[:1], expected_arg_names)

    def test_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_model(*config_and_inputs)

    def test_for_image_segmentation(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_image_segmentation(*config_and_inputs)

    def test_training(self):
        if not self.model_tester.is_training:
            return

        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
        config.return_dict = True

        for model_class in self.all_model_classes:
            # we don't test BeitForMaskedImageModeling
            if model_class in [*get_values(MODEL_MAPPING), BeitForMaskedImageModeling]:
                continue

            model = model_class(config)
            model.to(torch_device)
            model.train()
            inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True)
            loss = model(**inputs).loss
            loss.backward()

    def test_training_gradient_checkpointing(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
        if not self.model_tester.is_training:
            return

        config.use_cache = False
        config.return_dict = True

        for model_class in self.all_model_classes:
            # we don't test BeitForMaskedImageModeling
            if (
                model_class in [*get_values(MODEL_MAPPING), BeitForMaskedImageModeling]
                or not model_class.supports_gradient_checkpointing
            ):
                continue
            # TODO: remove the following 3 lines once we have a MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING
            # this can then be incorporated into _prepare_for_class in test_modeling_common.py
            elif model_class.__name__ == "BeitForSemanticSegmentation":
                batch_size, num_channels, height, width = inputs_dict["pixel_values"].shape
                inputs_dict["labels"] = torch.zeros(
                    [self.model_tester.batch_size, height, width], device=torch_device
                ).long()
            model = model_class(config)
            model.gradient_checkpointing_enable()
            model.to(torch_device)
            model.train()
            inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True)
            loss = model(**inputs).loss
            loss.backward()

    def test_initialization(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()

        configs_no_init = _config_zero_init(config)
        for model_class in self.all_model_classes:
            model = model_class(config=configs_no_init)
            for name, param in model.named_parameters():
                # we skip lambda parameters as these require special initial values
                # determined by config.layer_scale_init_value
                if "lambda" in name:
                    continue
                if param.requires_grad:
                    self.assertIn(
                        ((param.data.mean() * 1e9).round() / 1e9).item(),
                        [0.0, 1.0],
                        msg=f"Parameter {name} of model {model_class} seems not properly initialized",
                    )

    def test_attention_outputs(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
        config.return_dict = True

        # BEiT has a different seq_length
        seq_len = self.model_tester.expected_seq_length

        for model_class in self.all_model_classes:
            inputs_dict["output_attentions"] = True
            inputs_dict["output_hidden_states"] = False
            config.return_dict = True
            model = model_class(config)
            model.to(torch_device)
            model.eval()
            with torch.no_grad():
                outputs = model(**self._prepare_for_class(inputs_dict, model_class))
            attentions = outputs.attentions
            self.assertEqual(len(attentions), self.model_tester.num_hidden_layers)

            # check that output_attentions also work using config
            del inputs_dict["output_attentions"]
            config.output_attentions = True
            model = model_class(config)
            model.to(torch_device)
            model.eval()
            with torch.no_grad():
                outputs = model(**self._prepare_for_class(inputs_dict, model_class))

            attentions = outputs.attentions
            self.assertEqual(len(attentions), self.model_tester.num_hidden_layers)

            self.assertListEqual(
                list(attentions[0].shape[-3:]),
                [self.model_tester.num_attention_heads, seq_len, seq_len],
            )
            out_len = len(outputs)

            # Check attention is always last and order is fine
            inputs_dict["output_attentions"] = True
            inputs_dict["output_hidden_states"] = True
            model = model_class(config)
            model.to(torch_device)
            model.eval()
            with torch.no_grad():
                outputs = model(**self._prepare_for_class(inputs_dict, model_class))

            self.assertEqual(out_len + 1, len(outputs))

            self_attentions = outputs.attentions

            self.assertEqual(len(self_attentions), self.model_tester.num_hidden_layers)
            self.assertListEqual(
                list(self_attentions[0].shape[-3:]),
                [self.model_tester.num_attention_heads, seq_len, seq_len],
            )

    def test_hidden_states_output(self):
        def check_hidden_states_output(inputs_dict, config, model_class):
            model = model_class(config)
            model.to(torch_device)
            model.eval()

            with torch.no_grad():
                outputs = model(**self._prepare_for_class(inputs_dict, model_class))

            hidden_states = outputs.hidden_states

            expected_num_layers = getattr(
                self.model_tester, "expected_num_hidden_layers", self.model_tester.num_hidden_layers + 1
            )
            self.assertEqual(len(hidden_states), expected_num_layers)

            # BEiT has a different seq_length
            seq_length = self.model_tester.expected_seq_length

            self.assertListEqual(
                list(hidden_states[0].shape[-2:]),
                [seq_length, self.model_tester.hidden_size],
            )

        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()

        for model_class in self.all_model_classes:
            inputs_dict["output_hidden_states"] = True
            check_hidden_states_output(inputs_dict, config, model_class)

            # check that output_hidden_states also work using config
            del inputs_dict["output_hidden_states"]
            config.output_hidden_states = True

            check_hidden_states_output(inputs_dict, config, model_class)

    def test_for_masked_lm(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_masked_lm(*config_and_inputs)

    def test_for_image_classification(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_image_classification(*config_and_inputs)

    @slow
    def test_model_from_pretrained(self):
        for model_name in BEIT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
            model = BeitModel.from_pretrained(model_name)
            self.assertIsNotNone(model)
예제 #11
0
class GroupViTVisionModelTest(ModelTesterMixin, unittest.TestCase):
    """
    Here we also overwrite some of the tests of test_modeling_common.py, as GROUPVIT does not use input_ids, inputs_embeds,
    attention_mask and seq_length.
    """

    all_model_classes = (GroupViTVisionModel, ) if is_torch_available() else ()

    test_pruning = False
    test_torchscript = False
    test_resize_embeddings = False
    test_head_masking = False

    def setUp(self):
        self.model_tester = GroupViTVisionModelTester(self)
        self.config_tester = ConfigTester(self,
                                          config_class=GroupViTVisionConfig,
                                          has_text_modality=False,
                                          hidden_size=37)

    def test_config(self):
        self.config_tester.run_common_tests()

    @unittest.skip(reason="GroupViT does not use inputs_embeds")
    def test_inputs_embeds(self):
        pass

    def test_model_common_attributes(self):
        config, _ = self.model_tester.prepare_config_and_inputs_for_common()

        for model_class in self.all_model_classes:
            model = model_class(config)
            self.assertIsInstance(model.get_input_embeddings(), (nn.Module))
            x = model.get_output_embeddings()
            self.assertTrue(x is None or isinstance(x, nn.Linear))

    def test_forward_signature(self):
        config, _ = self.model_tester.prepare_config_and_inputs_for_common()

        for model_class in self.all_model_classes:
            model = model_class(config)
            signature = inspect.signature(model.forward)
            # signature.parameters is an OrderedDict => so arg_names order is deterministic
            arg_names = [*signature.parameters.keys()]

            expected_arg_names = ["pixel_values"]
            self.assertListEqual(arg_names[:1], expected_arg_names)

    def test_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_model(*config_and_inputs)

    def test_attention_outputs(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )
        config.return_dict = True

        seq_len = getattr(self.model_tester, "seq_length", None)

        expected_num_attention_outputs = sum(
            g > 0 for g in self.model_tester.num_group_tokens)

        for model_class in self.all_model_classes:
            inputs_dict["output_attentions"] = True
            inputs_dict["output_hidden_states"] = False
            config.return_dict = True
            model = model_class(config)
            model.to(torch_device)
            model.eval()
            with torch.no_grad():
                outputs = model(
                    **self._prepare_for_class(inputs_dict, model_class))
            attentions = outputs.attentions
            # GroupViT returns attention grouping of each stage
            self.assertEqual(
                len(attentions),
                sum(g > 0 for g in self.model_tester.num_group_tokens))

            # check that output_attentions also work using config
            del inputs_dict["output_attentions"]
            config.output_attentions = True
            model = model_class(config)
            model.to(torch_device)
            model.eval()
            with torch.no_grad():
                outputs = model(
                    **self._prepare_for_class(inputs_dict, model_class))
            attentions = outputs.attentions
            # GroupViT returns attention grouping of each stage
            self.assertEqual(len(attentions), expected_num_attention_outputs)

            out_len = len(outputs)

            # Check attention is always last and order is fine
            inputs_dict["output_attentions"] = True
            inputs_dict["output_hidden_states"] = True
            model = model_class(config)
            model.to(torch_device)
            model.eval()
            with torch.no_grad():
                outputs = model(
                    **self._prepare_for_class(inputs_dict, model_class))

            added_hidden_states = 1
            self.assertEqual(out_len + added_hidden_states, len(outputs))

            self_attentions = outputs.attentions

            # GroupViT returns attention grouping of each stage
            self.assertEqual(len(self_attentions),
                             expected_num_attention_outputs)
            for i, self_attn in enumerate(self_attentions):
                if self_attn is None:
                    continue

                self.assertListEqual(
                    list(self_attentions[i].shape[-2:]),
                    [
                        self.model_tester.num_output_groups[i],
                        self.model_tester.num_output_groups[i - 1]
                        if i > 0 else seq_len,
                    ],
                )

    def test_training(self):
        pass

    def test_training_gradient_checkpointing(self):
        pass

    @unittest.skip(
        reason=
        "GroupViTVisionModel has no base class and is not available in MODEL_MAPPING"
    )
    def test_save_load_fast_init_from_base(self):
        pass

    @unittest.skip(
        reason=
        "GroupViTVisionModel has no base class and is not available in MODEL_MAPPING"
    )
    def test_save_load_fast_init_to_base(self):
        pass

    # override since the attention mask from GroupViT is not used to compute loss, thus no grad
    def test_retain_grad_hidden_states_attentions(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )
        config.output_hidden_states = True
        config.output_attentions = self.has_attentions

        # no need to test all models as different heads yield the same functionality
        model_class = self.all_model_classes[0]
        model = model_class(config)
        model.to(torch_device)

        inputs = self._prepare_for_class(inputs_dict, model_class)

        outputs = model(**inputs)

        output = outputs[0]

        if config.is_encoder_decoder:
            # Seq2Seq models
            encoder_hidden_states = outputs.encoder_hidden_states[0]
            encoder_hidden_states.retain_grad()

            decoder_hidden_states = outputs.decoder_hidden_states[0]
            decoder_hidden_states.retain_grad()

            if self.has_attentions:
                encoder_attentions = outputs.encoder_attentions[0]
                encoder_attentions.retain_grad()

                decoder_attentions = outputs.decoder_attentions[0]
                decoder_attentions.retain_grad()

                cross_attentions = outputs.cross_attentions[0]
                cross_attentions.retain_grad()

            output.flatten()[0].backward(retain_graph=True)

            self.assertIsNotNone(encoder_hidden_states.grad)
            self.assertIsNotNone(decoder_hidden_states.grad)

            if self.has_attentions:
                self.assertIsNotNone(encoder_attentions.grad)
                self.assertIsNotNone(decoder_attentions.grad)
                self.assertIsNotNone(cross_attentions.grad)
        else:
            # Encoder-/Decoder-only models
            hidden_states = outputs.hidden_states[0]
            hidden_states.retain_grad()

            if self.has_attentions:
                attentions = outputs.attentions[0]
                attentions.retain_grad()

            output.flatten()[0].backward(retain_graph=True)

            self.assertIsNotNone(hidden_states.grad)

            if self.has_attentions:
                self.assertIsNone(attentions.grad)

    @slow
    def test_model_from_pretrained(self):
        for model_name in GROUPVIT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
            model = GroupViTVisionModel.from_pretrained(model_name)
            self.assertIsNotNone(model)
예제 #12
0
class CLIPVisionModelTest(ModelTesterMixin, unittest.TestCase):
    """
    Here we also overwrite some of the tests of test_modeling_common.py, as CLIP does not use input_ids, inputs_embeds,
    attention_mask and seq_length.
    """

    all_model_classes = (CLIPVisionModel, ) if is_torch_available() else ()

    test_pruning = False
    test_resize_embeddings = False
    test_head_masking = False

    def setUp(self):
        self.model_tester = CLIPVisionModelTester(self)
        self.config_tester = ConfigTester(self,
                                          config_class=CLIPVisionConfig,
                                          has_text_modality=False,
                                          hidden_size=37)

    def test_config(self):
        self.config_tester.run_common_tests()

    def test_inputs_embeds(self):
        # CLIP does not use inputs_embeds
        pass

    def test_model_common_attributes(self):
        config, _ = self.model_tester.prepare_config_and_inputs_for_common()

        for model_class in self.all_model_classes:
            model = model_class(config)
            self.assertIsInstance(model.get_input_embeddings(), (nn.Module))
            x = model.get_output_embeddings()
            self.assertTrue(x is None or isinstance(x, nn.Linear))

    def test_forward_signature(self):
        config, _ = self.model_tester.prepare_config_and_inputs_for_common()

        for model_class in self.all_model_classes:
            model = model_class(config)
            signature = inspect.signature(model.forward)
            # signature.parameters is an OrderedDict => so arg_names order is deterministic
            arg_names = [*signature.parameters.keys()]

            expected_arg_names = ["pixel_values"]
            self.assertListEqual(arg_names[:1], expected_arg_names)

    def test_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_model(*config_and_inputs)

    def test_attention_outputs(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )
        config.return_dict = True

        # in CLIP, the seq_len equals the number of patches + 1 (we add 1 for the [CLS] token)
        image_size = (self.model_tester.image_size,
                      self.model_tester.image_size)
        patch_size = (self.model_tester.patch_size,
                      self.model_tester.patch_size)
        num_patches = (image_size[1] // patch_size[1]) * (image_size[0] //
                                                          patch_size[0])
        seq_len = num_patches + 1

        for model_class in self.all_model_classes:
            inputs_dict["output_attentions"] = True
            inputs_dict["output_hidden_states"] = False
            config.return_dict = True
            model = model_class(config)
            model.to(torch_device)
            model.eval()
            with torch.no_grad():
                outputs = model(
                    **self._prepare_for_class(inputs_dict, model_class))
            attentions = outputs.attentions
            self.assertEqual(len(attentions),
                             self.model_tester.num_hidden_layers)

            # check that output_attentions also work using config
            del inputs_dict["output_attentions"]
            config.output_attentions = True
            model = model_class(config)
            model.to(torch_device)
            model.eval()
            with torch.no_grad():
                outputs = model(
                    **self._prepare_for_class(inputs_dict, model_class))
            attentions = outputs.attentions
            self.assertEqual(len(attentions),
                             self.model_tester.num_hidden_layers)

            out_len = len(outputs)

            # Check attention is always last and order is fine
            inputs_dict["output_attentions"] = True
            inputs_dict["output_hidden_states"] = True
            model = model_class(config)
            model.to(torch_device)
            model.eval()
            with torch.no_grad():
                outputs = model(
                    **self._prepare_for_class(inputs_dict, model_class))

            added_hidden_states = 1
            self.assertEqual(out_len + added_hidden_states, len(outputs))

            self_attentions = outputs.attentions

            self.assertEqual(len(self_attentions),
                             self.model_tester.num_hidden_layers)

            self.assertListEqual(
                list(self_attentions[0].shape[-3:]),
                [self.model_tester.num_attention_heads, seq_len, seq_len],
            )

    def test_hidden_states_output(self):
        def check_hidden_states_output(inputs_dict, config, model_class):
            model = model_class(config)
            model.to(torch_device)
            model.eval()

            with torch.no_grad():
                outputs = model(
                    **self._prepare_for_class(inputs_dict, model_class))

            hidden_states = outputs.encoder_hidden_states if config.is_encoder_decoder else outputs.hidden_states

            expected_num_layers = getattr(
                self.model_tester, "expected_num_hidden_layers",
                self.model_tester.num_hidden_layers + 1)
            self.assertEqual(len(hidden_states), expected_num_layers)

            # CLIP has a different seq_length
            image_size = (self.model_tester.image_size,
                          self.model_tester.image_size)
            patch_size = (self.model_tester.patch_size,
                          self.model_tester.patch_size)
            num_patches = (image_size[1] // patch_size[1]) * (image_size[0] //
                                                              patch_size[0])
            seq_length = num_patches + 1

            self.assertListEqual(
                list(hidden_states[0].shape[-2:]),
                [seq_length, self.model_tester.hidden_size],
            )

        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )

        for model_class in self.all_model_classes:
            inputs_dict["output_hidden_states"] = True
            check_hidden_states_output(inputs_dict, config, model_class)

            # check that output_hidden_states also work using config
            del inputs_dict["output_hidden_states"]
            config.output_hidden_states = True

            check_hidden_states_output(inputs_dict, config, model_class)

    def test_training(self):
        pass

    def test_training_gradient_checkpointing(self):
        pass

    # skip this test as CLIPVisionModel has no base class and is
    # not available in MODEL_MAPPING
    def test_save_load_fast_init_from_base(self):
        pass

    # skip this test as CLIPVisionModel has no base class and is
    # not available in MODEL_MAPPING
    def test_save_load_fast_init_to_base(self):
        pass

    @slow
    def test_model_from_pretrained(self):
        for model_name in CLIP_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
            model = CLIPVisionModel.from_pretrained(model_name)
            self.assertIsNotNone(model)
예제 #13
0
class CLIPModelTest(ModelTesterMixin, unittest.TestCase):
    all_model_classes = (CLIPModel, ) if is_torch_available() else ()
    test_head_masking = False
    test_pruning = False
    test_resize_embeddings = False
    test_attention_outputs = False

    def setUp(self):
        self.model_tester = CLIPModelTester(self)

    def test_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_model(*config_and_inputs)

    # hidden_states are tested in individual model tests
    def test_hidden_states_output(self):
        pass

    # input_embeds are tested in individual model tests
    def test_inputs_embeds(self):
        pass

    # tested in individual model tests
    def test_retain_grad_hidden_states_attentions(self):
        pass

    # CLIPModel does not have input/output embeddings
    def test_model_common_attributes(self):
        pass

    # override as the `logit_scale` parameter initilization is different for CLIP
    def test_initialization(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )

        configs_no_init = _config_zero_init(config)
        for model_class in self.all_model_classes:
            model = model_class(config=configs_no_init)
            for name, param in model.named_parameters():
                if param.requires_grad:
                    # check if `logit_scale` is initilized as per the original implementation
                    if name == "logit_scale":
                        self.assertAlmostEqual(
                            param.data.item(),
                            np.log(1 / 0.07),
                            delta=1e-3,
                            msg=
                            f"Parameter {name} of model {model_class} seems not properly initialized",
                        )
                    else:
                        self.assertIn(
                            ((param.data.mean() * 1e9).round() / 1e9).item(),
                            [0.0, 1.0],
                            msg=
                            f"Parameter {name} of model {model_class} seems not properly initialized",
                        )

    def _create_and_check_torchscript(self, config, inputs_dict):
        if not self.test_torchscript:
            return

        configs_no_init = _config_zero_init(
            config)  # To be sure we have no Nan
        configs_no_init.torchscript = True
        configs_no_init.return_dict = False
        for model_class in self.all_model_classes:
            model = model_class(config=configs_no_init)
            model.to(torch_device)
            model.eval()

            try:
                input_ids = inputs_dict["input_ids"]
                pixel_values = inputs_dict[
                    "pixel_values"]  # CLIP needs pixel_values
                traced_model = torch.jit.trace(model,
                                               (input_ids, pixel_values))
            except RuntimeError:
                self.fail("Couldn't trace module.")

            with tempfile.TemporaryDirectory() as tmp_dir_name:
                pt_file_name = os.path.join(tmp_dir_name, "traced_model.pt")

                try:
                    torch.jit.save(traced_model, pt_file_name)
                except Exception:
                    self.fail("Couldn't save module.")

                try:
                    loaded_model = torch.jit.load(pt_file_name)
                except Exception:
                    self.fail("Couldn't load module.")

            model.to(torch_device)
            model.eval()

            loaded_model.to(torch_device)
            loaded_model.eval()

            model_state_dict = model.state_dict()
            loaded_model_state_dict = loaded_model.state_dict()

            self.assertEqual(set(model_state_dict.keys()),
                             set(loaded_model_state_dict.keys()))

            models_equal = True
            for layer_name, p1 in model_state_dict.items():
                p2 = loaded_model_state_dict[layer_name]
                if p1.data.ne(p2.data).sum() > 0:
                    models_equal = False

            self.assertTrue(models_equal)

    def test_load_vision_text_config(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )

        # Save CLIPConfig and check if we can load CLIPVisionConfig from it
        with tempfile.TemporaryDirectory() as tmp_dir_name:
            config.save_pretrained(tmp_dir_name)
            vision_config = CLIPVisionConfig.from_pretrained(tmp_dir_name)
            self.assertDictEqual(config.vision_config.to_dict(),
                                 vision_config.to_dict())

        # Save CLIPConfig and check if we can load CLIPTextConfig from it
        with tempfile.TemporaryDirectory() as tmp_dir_name:
            config.save_pretrained(tmp_dir_name)
            text_config = CLIPTextConfig.from_pretrained(tmp_dir_name)
            self.assertDictEqual(config.text_config.to_dict(),
                                 text_config.to_dict())

    # overwrite from common since FlaxCLIPModel returns nested output
    # which is not supported in the common test
    @is_pt_flax_cross_test
    def test_equivalence_pt_to_flax(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )

        for model_class in self.all_model_classes:
            with self.subTest(model_class.__name__):

                # load PyTorch class
                pt_model = model_class(config).eval()
                # Flax models don't use the `use_cache` option and cache is not returned as a default.
                # So we disable `use_cache` here for PyTorch model.
                pt_model.config.use_cache = False

                fx_model_class_name = "Flax" + model_class.__name__

                if not hasattr(transformers, fx_model_class_name):
                    return

                fx_model_class = getattr(transformers, fx_model_class_name)

                # load Flax class
                fx_model = fx_model_class(config, dtype=jnp.float32)
                # make sure only flax inputs are forward that actually exist in function args
                fx_input_keys = inspect.signature(
                    fx_model.__call__).parameters.keys()

                # prepare inputs
                pt_inputs = self._prepare_for_class(inputs_dict, model_class)

                # remove function args that don't exist in Flax
                pt_inputs = {
                    k: v
                    for k, v in pt_inputs.items() if k in fx_input_keys
                }

                fx_state = convert_pytorch_state_dict_to_flax(
                    pt_model.state_dict(), fx_model)
                fx_model.params = fx_state

                with torch.no_grad():
                    pt_outputs = pt_model(**pt_inputs).to_tuple()

                # convert inputs to Flax
                fx_inputs = {
                    k: np.array(v)
                    for k, v in pt_inputs.items() if torch.is_tensor(v)
                }
                fx_outputs = fx_model(**fx_inputs).to_tuple()
                self.assertEqual(
                    len(fx_outputs), len(pt_outputs),
                    "Output lengths differ between Flax and PyTorch")
                for fx_output, pt_output in zip(fx_outputs[:4],
                                                pt_outputs[:4]):
                    self.assert_almost_equals(fx_output, pt_output.numpy(),
                                              4e-2)

                with tempfile.TemporaryDirectory() as tmpdirname:
                    pt_model.save_pretrained(tmpdirname)
                    fx_model_loaded = fx_model_class.from_pretrained(
                        tmpdirname, from_pt=True)

                fx_outputs_loaded = fx_model_loaded(**fx_inputs).to_tuple()
                self.assertEqual(
                    len(fx_outputs_loaded), len(pt_outputs),
                    "Output lengths differ between Flax and PyTorch")
                for fx_output_loaded, pt_output in zip(fx_outputs_loaded[:4],
                                                       pt_outputs[:4]):
                    self.assert_almost_equals(fx_output_loaded,
                                              pt_output.numpy(), 4e-2)

    # overwrite from common since FlaxCLIPModel returns nested output
    # which is not supported in the common test
    @is_pt_flax_cross_test
    def test_equivalence_flax_to_pt(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )

        for model_class in self.all_model_classes:
            with self.subTest(model_class.__name__):
                # load corresponding PyTorch class
                pt_model = model_class(config).eval()

                # So we disable `use_cache` here for PyTorch model.
                pt_model.config.use_cache = False

                fx_model_class_name = "Flax" + model_class.__name__

                if not hasattr(transformers, fx_model_class_name):
                    # no flax model exists for this class
                    return

                fx_model_class = getattr(transformers, fx_model_class_name)

                # load Flax class
                fx_model = fx_model_class(config, dtype=jnp.float32)
                # make sure only flax inputs are forward that actually exist in function args
                fx_input_keys = inspect.signature(
                    fx_model.__call__).parameters.keys()

                pt_model = load_flax_weights_in_pytorch_model(
                    pt_model, fx_model.params)

                # make sure weights are tied in PyTorch
                pt_model.tie_weights()

                # prepare inputs
                pt_inputs = self._prepare_for_class(inputs_dict, model_class)

                # remove function args that don't exist in Flax
                pt_inputs = {
                    k: v
                    for k, v in pt_inputs.items() if k in fx_input_keys
                }

                with torch.no_grad():
                    pt_outputs = pt_model(**pt_inputs).to_tuple()

                fx_inputs = {
                    k: np.array(v)
                    for k, v in pt_inputs.items() if torch.is_tensor(v)
                }

                fx_outputs = fx_model(**fx_inputs).to_tuple()
                self.assertEqual(
                    len(fx_outputs), len(pt_outputs),
                    "Output lengths differ between Flax and PyTorch")

                for fx_output, pt_output in zip(fx_outputs[:4],
                                                pt_outputs[:4]):
                    self.assert_almost_equals(fx_output, pt_output.numpy(),
                                              4e-2)

                with tempfile.TemporaryDirectory() as tmpdirname:
                    fx_model.save_pretrained(tmpdirname)
                    pt_model_loaded = model_class.from_pretrained(
                        tmpdirname, from_flax=True)

                with torch.no_grad():
                    pt_outputs_loaded = pt_model_loaded(**pt_inputs).to_tuple()

                self.assertEqual(
                    len(fx_outputs), len(pt_outputs_loaded),
                    "Output lengths differ between Flax and PyTorch")
                for fx_output, pt_output in zip(fx_outputs[:4],
                                                pt_outputs_loaded[:4]):
                    self.assert_almost_equals(fx_output, pt_output.numpy(),
                                              4e-2)

    @slow
    def test_model_from_pretrained(self):
        for model_name in CLIP_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
            model = CLIPModel.from_pretrained(model_name)
            self.assertIsNotNone(model)
class DeiTModelTest(ModelTesterMixin, unittest.TestCase):
    """
    Here we also overwrite some of the tests of test_modeling_common.py, as DeiT does not use input_ids, inputs_embeds,
    attention_mask and seq_length.
    """

    all_model_classes = ((
        DeiTModel,
        DeiTForImageClassification,
        DeiTForImageClassificationWithTeacher,
        DeiTForMaskedImageModeling,
    ) if is_torch_available() else ())

    test_pruning = False
    test_resize_embeddings = False
    test_head_masking = False

    def setUp(self):
        self.model_tester = DeiTModelTester(self)
        self.config_tester = ConfigTester(self,
                                          config_class=DeiTConfig,
                                          has_text_modality=False,
                                          hidden_size=37)

    def test_config(self):
        self.config_tester.run_common_tests()

    @unittest.skip(reason="DeiT does not use inputs_embeds")
    def test_inputs_embeds(self):
        pass

    def test_model_common_attributes(self):
        config, _ = self.model_tester.prepare_config_and_inputs_for_common()

        for model_class in self.all_model_classes:
            model = model_class(config)
            self.assertIsInstance(model.get_input_embeddings(), (nn.Module))
            x = model.get_output_embeddings()
            self.assertTrue(x is None or isinstance(x, nn.Linear))

    def test_forward_signature(self):
        config, _ = self.model_tester.prepare_config_and_inputs_for_common()

        for model_class in self.all_model_classes:
            model = model_class(config)
            signature = inspect.signature(model.forward)
            # signature.parameters is an OrderedDict => so arg_names order is deterministic
            arg_names = [*signature.parameters.keys()]

            expected_arg_names = ["pixel_values"]
            self.assertListEqual(arg_names[:1], expected_arg_names)

    def test_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_model(*config_and_inputs)

    def test_for_image_classification(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_image_classification(
            *config_and_inputs)

    # special case for DeiTForImageClassificationWithTeacher model
    def _prepare_for_class(self,
                           inputs_dict,
                           model_class,
                           return_labels=False):
        inputs_dict = super()._prepare_for_class(inputs_dict,
                                                 model_class,
                                                 return_labels=return_labels)

        if return_labels:
            if model_class.__name__ == "DeiTForImageClassificationWithTeacher":
                del inputs_dict["labels"]

        return inputs_dict

    def test_training(self):
        if not self.model_tester.is_training:
            return

        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )
        config.return_dict = True

        for model_class in self.all_model_classes:
            # DeiTForImageClassificationWithTeacher supports inference-only
            if (model_class in get_values(MODEL_MAPPING)
                    or model_class.__name__
                    == "DeiTForImageClassificationWithTeacher"):
                continue
            model = model_class(config)
            model.to(torch_device)
            model.train()
            inputs = self._prepare_for_class(inputs_dict,
                                             model_class,
                                             return_labels=True)
            loss = model(**inputs).loss
            loss.backward()

    def test_training_gradient_checkpointing(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )
        if not self.model_tester.is_training:
            return

        config.use_cache = False
        config.return_dict = True

        for model_class in self.all_model_classes:
            if model_class in get_values(
                    MODEL_MAPPING
            ) or not model_class.supports_gradient_checkpointing:
                continue
            # DeiTForImageClassificationWithTeacher supports inference-only
            if model_class.__name__ == "DeiTForImageClassificationWithTeacher":
                continue
            model = model_class(config)
            model.gradient_checkpointing_enable()
            model.to(torch_device)
            model.train()
            inputs = self._prepare_for_class(inputs_dict,
                                             model_class,
                                             return_labels=True)
            loss = model(**inputs).loss
            loss.backward()

    def test_problem_types(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )

        problem_types = [
            {
                "title": "multi_label_classification",
                "num_labels": 2,
                "dtype": torch.float
            },
            {
                "title": "single_label_classification",
                "num_labels": 1,
                "dtype": torch.long
            },
            {
                "title": "regression",
                "num_labels": 1,
                "dtype": torch.float
            },
        ]

        for model_class in self.all_model_classes:
            if (model_class not in [
                    *get_values(MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING),
                    *get_values(MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING),
            ] or model_class.__name__
                    == "DeiTForImageClassificationWithTeacher"):
                continue

            for problem_type in problem_types:
                with self.subTest(
                        msg=
                        f"Testing {model_class} with {problem_type['title']}"):

                    config.problem_type = problem_type["title"]
                    config.num_labels = problem_type["num_labels"]

                    model = model_class(config)
                    model.to(torch_device)
                    model.train()

                    inputs = self._prepare_for_class(inputs_dict,
                                                     model_class,
                                                     return_labels=True)

                    if problem_type["num_labels"] > 1:
                        inputs["labels"] = inputs["labels"].unsqueeze(
                            1).repeat(1, problem_type["num_labels"])

                    inputs["labels"] = inputs["labels"].to(
                        problem_type["dtype"])

                    # This tests that we do not trigger the warning form PyTorch "Using a target size that is different
                    # to the input size. This will likely lead to incorrect results due to broadcasting. Please ensure
                    # they have the same size." which is a symptom something in wrong for the regression problem.
                    # See https://github.com/huggingface/transformers/issues/11780
                    with warnings.catch_warnings(record=True) as warning_list:
                        loss = model(**inputs).loss
                    for w in warning_list:
                        if "Using a target size that is different to the input size" in str(
                                w.message):
                            raise ValueError(
                                f"Something is going wrong in the regression problem: intercepted {w.message}"
                            )

                    loss.backward()

    @slow
    def test_model_from_pretrained(self):
        for model_name in DEIT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
            model = DeiTModel.from_pretrained(model_name)
            self.assertIsNotNone(model)
예제 #15
0
class OwlViTVisionModelTest(ModelTesterMixin, unittest.TestCase):
    """
    Here we also overwrite some of the tests of test_modeling_common.py, as OWLVIT does not use input_ids, inputs_embeds,
    attention_mask and seq_length.
    """

    all_model_classes = (OwlViTVisionModel, ) if is_torch_available() else ()
    fx_compatible = False
    test_pruning = False
    test_resize_embeddings = False
    test_head_masking = False

    def setUp(self):
        self.model_tester = OwlViTVisionModelTester(self)
        self.config_tester = ConfigTester(self,
                                          config_class=OwlViTVisionConfig,
                                          has_text_modality=False,
                                          hidden_size=37)

    def test_config(self):
        self.config_tester.run_common_tests()

    @unittest.skip(reason="OWLVIT does not use inputs_embeds")
    def test_inputs_embeds(self):
        pass

    def test_model_common_attributes(self):
        config, _ = self.model_tester.prepare_config_and_inputs_for_common()

        for model_class in self.all_model_classes:
            model = model_class(config)
            self.assertIsInstance(model.get_input_embeddings(), (nn.Module))
            x = model.get_output_embeddings()
            self.assertTrue(x is None or isinstance(x, nn.Linear))

    def test_forward_signature(self):
        config, _ = self.model_tester.prepare_config_and_inputs_for_common()

        for model_class in self.all_model_classes:
            model = model_class(config)
            signature = inspect.signature(model.forward)
            # signature.parameters is an OrderedDict => so arg_names order is deterministic
            arg_names = [*signature.parameters.keys()]

            expected_arg_names = ["pixel_values"]
            self.assertListEqual(arg_names[:1], expected_arg_names)

    def test_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_model(*config_and_inputs)

    @unittest.skip(reason="OWL-ViT does not support training yet")
    def test_training(self):
        pass

    @unittest.skip(reason="OWL-ViT does not support training yet")
    def test_training_gradient_checkpointing(self):
        pass

    @unittest.skip(
        reason=
        "OwlViTVisionModel has no base class and is not available in MODEL_MAPPING"
    )
    def test_save_load_fast_init_from_base(self):
        pass

    @unittest.skip(
        reason=
        "OwlViTVisionModel has no base class and is not available in MODEL_MAPPING"
    )
    def test_save_load_fast_init_to_base(self):
        pass

    @slow
    def test_model_from_pretrained(self):
        for model_name in OWLVIT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
            model = OwlViTVisionModel.from_pretrained(model_name)
            self.assertIsNotNone(model)
class LayoutLMv3ModelTest(ModelTesterMixin, unittest.TestCase):

    test_pruning = False
    test_torchscript = False
    test_mismatched_shapes = False

    all_model_classes = ((
        LayoutLMv3Model,
        LayoutLMv3ForSequenceClassification,
        LayoutLMv3ForTokenClassification,
        LayoutLMv3ForQuestionAnswering,
    ) if is_torch_available() else ())

    def setUp(self):
        self.model_tester = LayoutLMv3ModelTester(self)
        self.config_tester = ConfigTester(self,
                                          config_class=LayoutLMv3Config,
                                          hidden_size=37)

    def _prepare_for_class(self,
                           inputs_dict,
                           model_class,
                           return_labels=False):
        inputs_dict = copy.deepcopy(inputs_dict)
        if model_class in get_values(MODEL_FOR_MULTIPLE_CHOICE_MAPPING):
            inputs_dict = {
                k: v.unsqueeze(1).expand(-1, self.model_tester.num_choices,
                                         -1).contiguous()
                if isinstance(v, torch.Tensor) and v.ndim > 1 else v
                for k, v in inputs_dict.items()
            }
        if return_labels:
            if model_class in get_values(MODEL_FOR_MULTIPLE_CHOICE_MAPPING):
                inputs_dict["labels"] = torch.ones(
                    self.model_tester.batch_size,
                    dtype=torch.long,
                    device=torch_device)
            elif model_class in get_values(
                    MODEL_FOR_QUESTION_ANSWERING_MAPPING):
                inputs_dict["start_positions"] = torch.zeros(
                    self.model_tester.batch_size,
                    dtype=torch.long,
                    device=torch_device)
                inputs_dict["end_positions"] = torch.zeros(
                    self.model_tester.batch_size,
                    dtype=torch.long,
                    device=torch_device)
            elif model_class in [
                    *get_values(MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING),
            ]:
                inputs_dict["labels"] = torch.zeros(
                    self.model_tester.batch_size,
                    dtype=torch.long,
                    device=torch_device)
            elif model_class in [
                    *get_values(MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING),
            ]:
                inputs_dict["labels"] = torch.zeros(
                    (self.model_tester.batch_size,
                     self.model_tester.text_seq_length),
                    dtype=torch.long,
                    device=torch_device,
                )

        return inputs_dict

    def test_config(self):
        self.config_tester.run_common_tests()

    def test_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_model(*config_and_inputs)

    def test_model_various_embeddings(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        for type in ["absolute", "relative_key", "relative_key_query"]:
            config_and_inputs[0].position_embedding_type = type
            self.model_tester.create_and_check_model(*config_and_inputs)

    def test_for_sequence_classification(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_sequence_classification(
            *config_and_inputs)

    def test_for_token_classification(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_token_classification(
            *config_and_inputs)

    def test_for_question_answering(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_question_answering(
            *config_and_inputs)

    @slow
    def test_model_from_pretrained(self):
        for model_name in LAYOUTLMV3_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
            model = LayoutLMv3Model.from_pretrained(model_name)
            self.assertIsNotNone(model)
예제 #17
0
class RagTestMixin:

    all_model_classes = (
        (RagModel, RagTokenForGeneration, RagSequenceForGeneration)
        if is_torch_available() and is_datasets_available() and is_faiss_available()
        else ()
    )

    retrieval_vector_size = 32
    n_docs = 3
    max_combined_length = 16

    def setUp(self):
        self.tmpdirname = tempfile.mkdtemp()

        # DPR tok
        vocab_tokens = [
            "[UNK]",
            "[CLS]",
            "[SEP]",
            "[PAD]",
            "[MASK]",
            "want",
            "##want",
            "##ed",
            "wa",
            "un",
            "runn",
            "##ing",
            ",",
            "low",
            "lowest",
        ]
        dpr_tokenizer_path = os.path.join(self.tmpdirname, "dpr_tokenizer")
        os.makedirs(dpr_tokenizer_path, exist_ok=True)
        self.vocab_file = os.path.join(dpr_tokenizer_path, DPR_VOCAB_FILES_NAMES["vocab_file"])
        with open(self.vocab_file, "w", encoding="utf-8") as vocab_writer:
            vocab_writer.write("".join([x + "\n" for x in vocab_tokens]))

        # BART tok
        vocab = [
            "l",
            "o",
            "w",
            "e",
            "r",
            "s",
            "t",
            "i",
            "d",
            "n",
            "\u0120",
            "\u0120l",
            "\u0120n",
            "\u0120lo",
            "\u0120low",
            "er",
            "\u0120lowest",
            "\u0120newer",
            "\u0120wider",
            "<unk>",
        ]
        vocab_tokens = dict(zip(vocab, range(len(vocab))))
        merges = ["#version: 0.2", "\u0120 l", "\u0120l o", "\u0120lo w", "e r", ""]
        self.special_tokens_map = {"unk_token": "<unk>"}

        bart_tokenizer_path = os.path.join(self.tmpdirname, "bart_tokenizer")
        os.makedirs(bart_tokenizer_path, exist_ok=True)
        self.vocab_file = os.path.join(bart_tokenizer_path, BART_VOCAB_FILES_NAMES["vocab_file"])
        self.merges_file = os.path.join(bart_tokenizer_path, BART_VOCAB_FILES_NAMES["merges_file"])
        with open(self.vocab_file, "w", encoding="utf-8") as fp:
            fp.write(json.dumps(vocab_tokens) + "\n")
        with open(self.merges_file, "w", encoding="utf-8") as fp:
            fp.write("\n".join(merges))

        t5_tokenizer = T5Tokenizer(T5_SAMPLE_VOCAB)
        t5_tokenizer_path = os.path.join(self.tmpdirname, "t5_tokenizer")
        t5_tokenizer.save_pretrained(t5_tokenizer_path)

    @cached_property
    def dpr_tokenizer(self) -> DPRQuestionEncoderTokenizer:
        return DPRQuestionEncoderTokenizer.from_pretrained(os.path.join(self.tmpdirname, "dpr_tokenizer"))

    @cached_property
    def dpr_ctx_encoder_tokenizer(self) -> DPRContextEncoderTokenizer:
        return DPRContextEncoderTokenizer.from_pretrained(os.path.join(self.tmpdirname, "dpr_tokenizer"))

    @cached_property
    def bart_tokenizer(self) -> BartTokenizer:
        return BartTokenizer.from_pretrained(os.path.join(self.tmpdirname, "bart_tokenizer"))

    @cached_property
    def t5_tokenizer(self) -> BartTokenizer:
        return T5Tokenizer.from_pretrained(os.path.join(self.tmpdirname, "t5_tokenizer"))

    def tearDown(self):
        shutil.rmtree(self.tmpdirname)

    def get_retriever(self, config):
        dataset = Dataset.from_dict(
            {
                "id": ["0", "1", "3"],
                "text": ["foo", "bar", "qux"],
                "title": ["Foo", "Bar", "Qux"],
                "embeddings": [
                    np.ones(self.retrieval_vector_size),
                    2 * np.ones(self.retrieval_vector_size),
                    3 * np.ones(self.retrieval_vector_size),
                ],
            }
        )
        dataset.add_faiss_index("embeddings", string_factory="Flat", metric_type=faiss.METRIC_INNER_PRODUCT)
        tokenizer = self.bart_tokenizer if config.generator.model_type == "bart" else self.t5_tokenizer
        with patch("transformers.models.rag.retrieval_rag.load_dataset") as mock_load_dataset:
            mock_load_dataset.return_value = dataset
            retriever = RagRetriever(
                config,
                question_encoder_tokenizer=self.dpr_tokenizer,
                generator_tokenizer=tokenizer,
            )
        return retriever

    def check_model_with_retriever(
        self, config, input_ids, attention_mask, decoder_input_ids, decoder_attention_mask, **kwargs
    ):
        self.assertIsNotNone(config.question_encoder)
        self.assertIsNotNone(config.generator)

        for model_class in self.all_model_classes:
            model = model_class(config, retriever=self.get_retriever(config)).to(torch_device)
            model.eval()

            self.assertTrue(model.config.is_encoder_decoder)

            outputs = model(
                input_ids=input_ids,
                attention_mask=attention_mask,
                decoder_input_ids=decoder_input_ids,
                decoder_attention_mask=decoder_attention_mask,
            )

            # logits
            self.assertEqual(
                outputs.logits.shape,
                (self.n_docs * decoder_input_ids.shape[0], decoder_input_ids.shape[1], config.generator.vocab_size),
            )
            # generator encoder last hidden states
            self.assertEqual(
                outputs.generator_enc_last_hidden_state.shape,
                (self.n_docs * decoder_input_ids.shape[0], self.max_combined_length, config.generator.hidden_size),
            )
            # doc scores
            self.assertEqual(outputs.doc_scores.shape, (input_ids.shape[0], self.n_docs))

    def check_model_with_end2end_retriever(
        self, config, input_ids, attention_mask, decoder_input_ids, decoder_attention_mask, **kwargs
    ):
        self.assertIsNotNone(config.question_encoder)
        self.assertIsNotNone(config.generator)

        context_encoder_tokenizer = self.dpr_ctx_encoder_tokenizer
        dpr_context_encoder = DPRContextEncoder(config.question_encoder)  # dpr is a twin tower

        retriever = self.get_retriever(config)
        retriever.set_ctx_encoder_tokenizer(context_encoder_tokenizer)  # setting the ctx_encoder_tokenizer.

        for model_class in [RagTokenForGeneration, RagSequenceForGeneration]:
            model = model_class(config, retriever=retriever)
            model.set_context_encoder_for_training(dpr_context_encoder)  # set the context_encoder for training
            model.to(torch_device)
            model.eval()

            self.assertTrue(model.config.is_encoder_decoder)

            outputs = model(
                input_ids=input_ids,
                attention_mask=attention_mask,
                decoder_input_ids=decoder_input_ids,
                decoder_attention_mask=decoder_attention_mask,
            )

            # logits
            self.assertEqual(
                outputs.logits.shape,
                (self.n_docs * decoder_input_ids.shape[0], decoder_input_ids.shape[1], config.generator.vocab_size),
            )
            # generator encoder last hidden states
            self.assertEqual(
                outputs.generator_enc_last_hidden_state.shape,
                (self.n_docs * decoder_input_ids.shape[0], self.max_combined_length, config.generator.hidden_size),
            )
            # doc scores
            self.assertEqual(outputs.doc_scores.shape, (input_ids.shape[0], self.n_docs))

    def check_model_generate_from_context_input_ids(
        self, config, input_ids, attention_mask, decoder_input_ids, decoder_attention_mask, **kwargs
    ):
        self.assertIsNotNone(config.question_encoder)
        self.assertIsNotNone(config.generator)

        retriever = self.get_retriever(config)

        for model_class in self.all_model_classes:
            model = model_class(config).to(torch_device)
            model.eval()
            self.assertTrue(model.config.is_encoder_decoder)

            question_hidden_states = model.question_encoder(input_ids, attention_mask=attention_mask)[0]

            out = retriever(
                input_ids,
                question_hidden_states.cpu().detach().to(torch.float32).numpy(),
                prefix=config.generator.prefix,
                return_tensors="pt",
            )

            context_input_ids, context_attention_mask, retrieved_doc_embeds = (
                out["context_input_ids"],
                out["context_attention_mask"],
                out["retrieved_doc_embeds"],
            )

            # cast
            retrieved_doc_embeds = retrieved_doc_embeds.to(question_hidden_states)
            context_input_ids = context_input_ids.to(input_ids)
            context_attention_mask = context_attention_mask.to(input_ids)

            # compute doc_scores
            doc_scores = torch.bmm(question_hidden_states.unsqueeze(1), retrieved_doc_embeds.transpose(1, 2)).squeeze(
                1
            )

            outputs = model.generate(
                context_input_ids=context_input_ids,
                context_attention_mask=context_attention_mask,
                doc_scores=doc_scores,
                do_deduplication=True,
            )

            self.assertIsNotNone(outputs)

    def check_model_generate(
        self, config, input_ids, attention_mask, decoder_input_ids, decoder_attention_mask, **kwargs
    ):
        self.assertIsNotNone(config.question_encoder)
        self.assertIsNotNone(config.generator)

        for model_class in self.all_model_classes[1:]:
            model = model_class(config, retriever=self.get_retriever(config)).to(torch_device)
            model.eval()

            self.assertTrue(model.config.is_encoder_decoder)

            outputs = model.generate(
                input_ids=input_ids,
                num_beams=2,
                num_return_sequences=2,
                decoder_start_token_id=config.generator.eos_token_id,
            )

            self.assertIsNotNone(outputs)

    def check_model_without_retriever(
        self, config, input_ids, attention_mask, decoder_input_ids, decoder_attention_mask, **kwargs
    ):
        self.assertIsNotNone(config.question_encoder)
        self.assertIsNotNone(config.generator)

        retriever = self.get_retriever(config)

        for model_class in self.all_model_classes:
            model = model_class(config).to(torch_device)
            model.eval()
            self.assertTrue(model.config.is_encoder_decoder)

            question_hidden_states = model.question_encoder(input_ids, attention_mask=attention_mask)[0]

            out = retriever(
                input_ids,
                question_hidden_states.cpu().detach().to(torch.float32).numpy(),
                prefix=config.generator.prefix,
                return_tensors="pt",
            )

            context_input_ids, context_attention_mask, retrieved_doc_embeds = (
                out["context_input_ids"],
                out["context_attention_mask"],
                out["retrieved_doc_embeds"],
            )

            # cast
            retrieved_doc_embeds = retrieved_doc_embeds.to(question_hidden_states)
            context_input_ids = context_input_ids.to(input_ids)
            context_attention_mask = context_attention_mask.to(input_ids)

            # compute doc_scores
            doc_scores = torch.bmm(question_hidden_states.unsqueeze(1), retrieved_doc_embeds.transpose(1, 2)).squeeze(
                1
            )

            outputs = model(
                context_input_ids=context_input_ids,
                context_attention_mask=context_attention_mask,
                doc_scores=doc_scores,
                decoder_input_ids=decoder_input_ids,
                decoder_attention_mask=decoder_attention_mask,
            )

            # logits
            self.assertEqual(
                outputs.logits.shape,
                (self.n_docs * decoder_input_ids.shape[0], decoder_input_ids.shape[1], config.generator.vocab_size),
            )
            # generator encoder last hidden states
            self.assertEqual(
                outputs.generator_enc_last_hidden_state.shape,
                (self.n_docs * decoder_input_ids.shape[0], self.max_combined_length, config.generator.hidden_size),
            )
            # doc scores
            self.assertEqual(outputs.doc_scores.shape, (input_ids.shape[0], self.n_docs))

    def check_model_custom_n_docs(
        self, config, input_ids, attention_mask, decoder_input_ids, decoder_attention_mask, n_docs, **kwargs
    ):
        self.assertIsNotNone(config.question_encoder)
        self.assertIsNotNone(config.generator)

        retriever = self.get_retriever(config)

        for model_class in self.all_model_classes:
            model = model_class(config).to(torch_device)
            model.eval()
            self.assertTrue(model.config.is_encoder_decoder)

            question_hidden_states = model.question_encoder(input_ids, attention_mask=attention_mask)[0]

            out = retriever(
                input_ids,
                question_hidden_states.cpu().detach().to(torch.float32).numpy(),
                prefix=config.generator.prefix,
                return_tensors="pt",
                n_docs=n_docs,
            )

            context_input_ids, context_attention_mask, retrieved_doc_embeds = (
                out["context_input_ids"],
                out["context_attention_mask"],
                out["retrieved_doc_embeds"],
            )

            # cast
            retrieved_doc_embeds = retrieved_doc_embeds.to(question_hidden_states)
            context_input_ids = context_input_ids.to(input_ids)
            context_attention_mask = context_attention_mask.to(input_ids)

            # compute doc_scores
            doc_scores = torch.bmm(question_hidden_states.unsqueeze(1), retrieved_doc_embeds.transpose(1, 2)).squeeze(
                1
            )

            outputs = model(
                context_input_ids=context_input_ids,
                context_attention_mask=context_attention_mask,
                doc_scores=doc_scores,
                decoder_input_ids=decoder_input_ids,
                decoder_attention_mask=decoder_attention_mask,
                n_docs=n_docs,
            )

            # logits
            self.assertEqual(
                outputs.logits.shape,
                (n_docs * decoder_input_ids.shape[0], decoder_input_ids.shape[1], config.generator.vocab_size),
            )
            # generator encoder last hidden states
            self.assertEqual(
                outputs.generator_enc_last_hidden_state.shape,
                (n_docs * decoder_input_ids.shape[0], self.max_combined_length, config.generator.hidden_size),
            )
            # doc scores
            self.assertEqual(outputs.doc_scores.shape, (input_ids.shape[0], n_docs))

    def check_model_with_mismatch_n_docs_value(
        self,
        config,
        input_ids,
        attention_mask,
        decoder_input_ids,
        decoder_attention_mask,
        retriever_n_docs,
        generator_n_docs,
        **kwargs
    ):
        self.assertIsNotNone(config.question_encoder)
        self.assertIsNotNone(config.generator)

        retriever = self.get_retriever(config)

        for model_class in self.all_model_classes:
            model = model_class(config).to(torch_device)
            model.eval()
            self.assertTrue(model.config.is_encoder_decoder)

            question_hidden_states = model.question_encoder(input_ids, attention_mask=attention_mask)[0]

            out = retriever(
                input_ids,
                question_hidden_states.cpu().detach().to(torch.float32).numpy(),
                prefix=config.generator.prefix,
                return_tensors="pt",
                n_docs=retriever_n_docs,
            )

            context_input_ids, context_attention_mask, retrieved_doc_embeds = (
                out["context_input_ids"],
                out["context_attention_mask"],
                out["retrieved_doc_embeds"],
            )

            # cast
            retrieved_doc_embeds = retrieved_doc_embeds.to(question_hidden_states)
            context_input_ids = context_input_ids.to(input_ids)
            context_attention_mask = context_attention_mask.to(input_ids)

            # compute doc_scores
            doc_scores = torch.bmm(question_hidden_states.unsqueeze(1), retrieved_doc_embeds.transpose(1, 2)).squeeze(
                1
            )

            self.assertRaises(
                AssertionError,
                model.__call__,
                context_input_ids=context_input_ids,
                context_attention_mask=context_attention_mask,
                doc_scores=doc_scores,
                decoder_input_ids=decoder_input_ids,
                decoder_attention_mask=decoder_attention_mask,
                n_docs=generator_n_docs,
            )

    def check_model_with_encoder_outputs(
        self, config, input_ids, attention_mask, decoder_input_ids, decoder_attention_mask, **kwargs
    ):
        self.assertIsNotNone(config.question_encoder)
        self.assertIsNotNone(config.generator)

        for model_class in self.all_model_classes:
            model = model_class(config, retriever=self.get_retriever(config)).to(torch_device)
            model.eval()

            self.assertTrue(model.config.is_encoder_decoder)

            outputs = model(
                input_ids=input_ids,
                attention_mask=attention_mask,
                decoder_input_ids=decoder_input_ids,
                decoder_attention_mask=decoder_attention_mask,
            )

            encoder_outputs = BaseModelOutput(outputs.generator_enc_last_hidden_state)

            # run only generator
            outputs = model(
                encoder_outputs=encoder_outputs,
                doc_scores=outputs.doc_scores,
                decoder_input_ids=decoder_input_ids,
                decoder_attention_mask=decoder_attention_mask,
            )

            # logits
            self.assertEqual(
                outputs.logits.shape,
                (self.n_docs * decoder_input_ids.shape[0], decoder_input_ids.shape[1], config.generator.vocab_size),
            )
            # generator encoder last hidden states
            self.assertEqual(
                outputs.generator_enc_last_hidden_state.shape,
                (self.n_docs * decoder_input_ids.shape[0], self.max_combined_length, config.generator.hidden_size),
            )
            # doc scores
            self.assertEqual(outputs.doc_scores.shape, (input_ids.shape[0], self.n_docs))

    def test_model_with_retriever(self):
        inputs_dict = self.config_and_inputs
        self.check_model_with_retriever(**inputs_dict)

    def test_model_with_end2end_retriever(self):
        inputs_dict = self.config_and_inputs
        self.check_model_with_end2end_retriever(**inputs_dict)

    def test_model_without_retriever(self):
        inputs_dict = self.config_and_inputs
        self.check_model_without_retriever(**inputs_dict)

    def test_model_with_encoder_outputs(self):
        inputs_dict = self.config_and_inputs
        self.check_model_with_encoder_outputs(**inputs_dict)

    def test_model_generate(self):
        inputs_dict = self.config_and_inputs
        self.check_model_generate(**inputs_dict)

    def test_model_with_custom_n_docs(self):
        inputs_dict = self.config_and_inputs
        inputs_dict["n_docs"] = 1
        self.check_model_custom_n_docs(**inputs_dict)

    def test_model_with_mismatch_n_docs_value(self):
        inputs_dict = self.config_and_inputs
        inputs_dict["retriever_n_docs"] = 3
        inputs_dict["generator_n_docs"] = 2
        self.check_model_with_mismatch_n_docs_value(**inputs_dict)
class MobileViTModelTest(ModelTesterMixin, unittest.TestCase):
    """
    Here we also overwrite some of the tests of test_modeling_common.py, as MobileViT does not use input_ids, inputs_embeds,
    attention_mask and seq_length.
    """

    all_model_classes = ((MobileViTModel, MobileViTForImageClassification,
                          MobileViTForSemanticSegmentation)
                         if is_torch_available() else ())

    test_pruning = False
    test_resize_embeddings = False
    test_head_masking = False
    has_attentions = False

    def setUp(self):
        self.model_tester = MobileViTModelTester(self)
        self.config_tester = MobileViTConfigTester(
            self, config_class=MobileViTConfig, has_text_modality=False)

    def test_config(self):
        self.config_tester.run_common_tests()

    @unittest.skip(reason="MobileViT does not use inputs_embeds")
    def test_inputs_embeds(self):
        pass

    @unittest.skip(
        reason="MobileViT does not support input and output embeddings")
    def test_model_common_attributes(self):
        pass

    @unittest.skip(reason="MobileViT does not output attentions")
    def test_attention_outputs(self):
        pass

    def test_forward_signature(self):
        config, _ = self.model_tester.prepare_config_and_inputs_for_common()

        for model_class in self.all_model_classes:
            model = model_class(config)
            signature = inspect.signature(model.forward)
            # signature.parameters is an OrderedDict => so arg_names order is deterministic
            arg_names = [*signature.parameters.keys()]

            expected_arg_names = ["pixel_values"]
            self.assertListEqual(arg_names[:1], expected_arg_names)

    def test_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_model(*config_and_inputs)

    def test_hidden_states_output(self):
        def check_hidden_states_output(inputs_dict, config, model_class):
            model = model_class(config)
            model.to(torch_device)
            model.eval()

            with torch.no_grad():
                outputs = model(
                    **self._prepare_for_class(inputs_dict, model_class))

            hidden_states = outputs.hidden_states

            expected_num_stages = 5
            self.assertEqual(len(hidden_states), expected_num_stages)

            # MobileViT's feature maps are of shape (batch_size, num_channels, height, width)
            # with the width and height being successively divided by 2.
            divisor = 2
            for i in range(len(hidden_states)):
                self.assertListEqual(
                    list(hidden_states[i].shape[-2:]),
                    [
                        self.model_tester.image_size // divisor,
                        self.model_tester.image_size // divisor
                    ],
                )
                divisor *= 2

            self.assertEqual(self.model_tester.output_stride, divisor // 2)

        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )

        for model_class in self.all_model_classes:
            inputs_dict["output_hidden_states"] = True
            check_hidden_states_output(inputs_dict, config, model_class)

            # check that output_hidden_states also work using config
            del inputs_dict["output_hidden_states"]
            config.output_hidden_states = True

            check_hidden_states_output(inputs_dict, config, model_class)

    def test_for_image_classification(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_image_classification(
            *config_and_inputs)

    def test_for_semantic_segmentation(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_semantic_segmentation(
            *config_and_inputs)

    @slow
    def test_model_from_pretrained(self):
        for model_name in MOBILEVIT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
            model = MobileViTModel.from_pretrained(model_name)
            self.assertIsNotNone(model)
예제 #19
0
class OwlViTForObjectDetectionTest(ModelTesterMixin, unittest.TestCase):
    all_model_classes = (
        OwlViTForObjectDetection, ) if is_torch_available() else ()
    fx_compatible = False
    test_head_masking = False
    test_pruning = False
    test_resize_embeddings = False
    test_attention_outputs = False

    def setUp(self):
        self.model_tester = OwlViTForObjectDetectionTester(self)

    def test_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_model(*config_and_inputs)

    @unittest.skip(reason="Hidden_states is tested in individual model tests")
    def test_hidden_states_output(self):
        pass

    @unittest.skip(reason="Inputs_embeds is tested in individual model tests")
    def test_inputs_embeds(self):
        pass

    @unittest.skip(reason="Retain_grad is tested in individual model tests")
    def test_retain_grad_hidden_states_attentions(self):
        pass

    @unittest.skip(reason="OwlViTModel does not have input/output embeddings")
    def test_model_common_attributes(self):
        pass

    @unittest.skip(
        reason="Test_initialization is tested in individual model tests")
    def test_initialization(self):
        pass

    @unittest.skip(
        reason="Test_forward_signature is tested in individual model tests")
    def test_forward_signature(self):
        pass

    @unittest.skip(
        reason=
        "Test_save_load_fast_init_from_base is tested in individual model tests"
    )
    def test_save_load_fast_init_from_base(self):
        pass

    @unittest.skip(reason="OWL-ViT does not support training yet")
    def test_training(self):
        pass

    @unittest.skip(reason="OWL-ViT does not support training yet")
    def test_training_gradient_checkpointing(self):
        pass

    def _create_and_check_torchscript(self, config, inputs_dict):
        if not self.test_torchscript:
            return

        configs_no_init = _config_zero_init(
            config)  # To be sure we have no Nan
        configs_no_init.torchscript = True
        configs_no_init.return_dict = False
        for model_class in self.all_model_classes:
            model = model_class(config=configs_no_init).to(torch_device)
            model.eval()

            try:
                input_ids = inputs_dict["input_ids"]
                pixel_values = inputs_dict[
                    "pixel_values"]  # OWLVIT needs pixel_values
                traced_model = torch.jit.trace(model,
                                               (input_ids, pixel_values))
            except RuntimeError:
                self.fail("Couldn't trace module.")

            with tempfile.TemporaryDirectory() as tmp_dir_name:
                pt_file_name = os.path.join(tmp_dir_name, "traced_model.pt")

                try:
                    torch.jit.save(traced_model, pt_file_name)
                except Exception:
                    self.fail("Couldn't save module.")

                try:
                    loaded_model = torch.jit.load(pt_file_name)
                except Exception:
                    self.fail("Couldn't load module.")

            loaded_model = loaded_model.to(torch_device)
            loaded_model.eval()

            model_state_dict = model.state_dict()
            loaded_model_state_dict = loaded_model.state_dict()

            self.assertEqual(set(model_state_dict.keys()),
                             set(loaded_model_state_dict.keys()))

            models_equal = True
            for layer_name, p1 in model_state_dict.items():
                p2 = loaded_model_state_dict[layer_name]
                if p1.data.ne(p2.data).sum() > 0:
                    models_equal = False

            self.assertTrue(models_equal)

    def test_model_outputs_equivalence(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )

        def set_nan_tensor_to_zero(t):
            t[t != t] = 0
            return t

        def check_equivalence(model,
                              tuple_inputs,
                              dict_inputs,
                              additional_kwargs={}):
            with torch.no_grad():
                tuple_output = model(**tuple_inputs,
                                     return_dict=False,
                                     **additional_kwargs)
                dict_output = model(**dict_inputs,
                                    return_dict=True,
                                    **additional_kwargs).to_tuple()

                def recursive_check(tuple_object, dict_object):
                    if isinstance(tuple_object, (List, Tuple)):
                        for tuple_iterable_value, dict_iterable_value in zip(
                                tuple_object, dict_object):
                            recursive_check(tuple_iterable_value,
                                            dict_iterable_value)
                    elif isinstance(tuple_object, Dict):
                        for tuple_iterable_value, dict_iterable_value in zip(
                                tuple_object.values(), dict_object.values()):
                            recursive_check(tuple_iterable_value,
                                            dict_iterable_value)
                    elif tuple_object is None:
                        return
                    else:
                        self.assertTrue(
                            torch.allclose(
                                set_nan_tensor_to_zero(tuple_object),
                                set_nan_tensor_to_zero(dict_object),
                                atol=1e-5),
                            msg=
                            ("Tuple and dict output are not equal. Difference:"
                             f" {torch.max(torch.abs(tuple_object - dict_object))}. Tuple has `nan`:"
                             f" {torch.isnan(tuple_object).any()} and `inf`: {torch.isinf(tuple_object)}. Dict has"
                             f" `nan`: {torch.isnan(dict_object).any()} and `inf`: {torch.isinf(dict_object)}."
                             ),
                        )

                recursive_check(tuple_output, dict_output)

        for model_class in self.all_model_classes:
            model = model_class(config).to(torch_device)
            model.eval()

            tuple_inputs = self._prepare_for_class(inputs_dict, model_class)
            dict_inputs = self._prepare_for_class(inputs_dict, model_class)
            check_equivalence(model, tuple_inputs, dict_inputs)

    @slow
    def test_model_from_pretrained(self):
        for model_name in OWLVIT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
            model = OwlViTForObjectDetection.from_pretrained(model_name)
            self.assertIsNotNone(model)
class LayoutLMv2ModelTest(ModelTesterMixin, unittest.TestCase):

    test_pruning = False
    test_torchscript = False
    test_mismatched_shapes = False

    all_model_classes = ((
        LayoutLMv2Model,
        LayoutLMv2ForSequenceClassification,
        LayoutLMv2ForTokenClassification,
        LayoutLMv2ForQuestionAnswering,
    ) if is_torch_available() else ())

    def setUp(self):
        self.model_tester = LayoutLMv2ModelTester(self)
        self.config_tester = ConfigTester(self,
                                          config_class=LayoutLMv2Config,
                                          hidden_size=37)

    def test_config(self):
        self.config_tester.run_common_tests()

    def test_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_model(*config_and_inputs)

    def test_model_various_embeddings(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        for type in ["absolute", "relative_key", "relative_key_query"]:
            config_and_inputs[0].position_embedding_type = type
            self.model_tester.create_and_check_model(*config_and_inputs)

    def test_for_sequence_classification(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_sequence_classification(
            *config_and_inputs)

    def test_for_token_classification(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_token_classification(
            *config_and_inputs)

    def test_for_question_answering(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_question_answering(
            *config_and_inputs)

    def test_save_load_fast_init_from_base(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )
        base_class = MODEL_MAPPING[config.__class__]

        if isinstance(base_class, tuple):
            base_class = base_class[0]

        for model_class in self.all_model_classes:
            if model_class == base_class:
                continue

            # make a copy of model class to not break future tests
            # from https://stackoverflow.com/questions/9541025/how-to-copy-a-python-class
            class CopyClass(model_class):
                pass

            model_class_copy = CopyClass

            # make sure that all keys are expected for test
            model_class_copy._keys_to_ignore_on_load_missing = []

            # make init deterministic, but make sure that
            # non-initialized weights throw errors nevertheless
            model_class_copy._init_weights = self._mock_init_weights

            model = base_class(config)
            state_dict = model.state_dict()

            # this will often delete a single weight of a multi-weight module
            # to test an edge case
            random_key_to_del = random.choice(list(state_dict.keys()))
            del state_dict[random_key_to_del]

            # check that certain keys didn't get saved with the model
            with tempfile.TemporaryDirectory() as tmpdirname:
                model.save_pretrained(tmpdirname)
                torch.save(state_dict,
                           os.path.join(tmpdirname, "pytorch_model.bin"))

                model_fast_init = model_class_copy.from_pretrained(tmpdirname)
                model_slow_init = model_class_copy.from_pretrained(
                    tmpdirname, _fast_init=False)

                for key in model_fast_init.state_dict().keys():
                    if key == "layoutlmv2.visual_segment_embedding":
                        # we skip the visual segment embedding as it has a custom initialization scheme
                        continue
                    max_diff = (
                        model_slow_init.state_dict()[key] -
                        model_fast_init.state_dict()[key]).sum().item()
                    self.assertLessEqual(max_diff,
                                         1e-3,
                                         msg=f"{key} not identical")

    def test_attention_outputs(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )
        config.return_dict = True

        # LayoutLMv2 has a different expected sequence length
        expected_seq_len = (self.model_tester.seq_length +
                            self.model_tester.image_feature_pool_shape[0] *
                            self.model_tester.image_feature_pool_shape[1])

        for model_class in self.all_model_classes:
            inputs_dict["output_attentions"] = True
            inputs_dict["output_hidden_states"] = False
            config.return_dict = True
            model = model_class(config)
            model.to(torch_device)
            model.eval()
            with torch.no_grad():
                outputs = model(
                    **self._prepare_for_class(inputs_dict, model_class))
            attentions = outputs.attentions
            self.assertEqual(len(attentions),
                             self.model_tester.num_hidden_layers)

            # check that output_attentions also work using config
            del inputs_dict["output_attentions"]
            config.output_attentions = True
            model = model_class(config)
            model.to(torch_device)
            model.eval()
            with torch.no_grad():
                outputs = model(
                    **self._prepare_for_class(inputs_dict, model_class))
            attentions = outputs.attentions
            self.assertEqual(len(attentions),
                             self.model_tester.num_hidden_layers)

            self.assertListEqual(
                list(attentions[0].shape[-3:]),
                [
                    self.model_tester.num_attention_heads, expected_seq_len,
                    expected_seq_len
                ],
            )
            out_len = len(outputs)

            # Check attention is always last and order is fine
            inputs_dict["output_attentions"] = True
            inputs_dict["output_hidden_states"] = True
            model = model_class(config)
            model.to(torch_device)
            model.eval()
            with torch.no_grad():
                outputs = model(
                    **self._prepare_for_class(inputs_dict, model_class))

            if hasattr(self.model_tester, "num_hidden_states_types"):
                added_hidden_states = self.model_tester.num_hidden_states_types
            else:
                added_hidden_states = 1
            self.assertEqual(out_len + added_hidden_states, len(outputs))

            self_attentions = outputs.attentions

            self.assertEqual(len(self_attentions),
                             self.model_tester.num_hidden_layers)
            self.assertListEqual(
                list(self_attentions[0].shape[-3:]),
                [
                    self.model_tester.num_attention_heads, expected_seq_len,
                    expected_seq_len
                ],
            )

    def test_hidden_states_output(self):
        def check_hidden_states_output(inputs_dict, config, model_class):
            model = model_class(config)
            model.to(torch_device)
            model.eval()

            with torch.no_grad():
                outputs = model(
                    **self._prepare_for_class(inputs_dict, model_class))

            hidden_states = outputs.hidden_states

            expected_num_layers = getattr(
                self.model_tester, "expected_num_hidden_layers",
                self.model_tester.num_hidden_layers + 1)
            self.assertEqual(len(hidden_states), expected_num_layers)

            # LayoutLMv2 has a different expected sequence length
            expected_seq_len = (self.model_tester.seq_length +
                                self.model_tester.image_feature_pool_shape[0] *
                                self.model_tester.image_feature_pool_shape[1])

            self.assertListEqual(
                list(hidden_states[0].shape[-2:]),
                [expected_seq_len, self.model_tester.hidden_size],
            )

        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )

        for model_class in self.all_model_classes:
            inputs_dict["output_hidden_states"] = True
            check_hidden_states_output(inputs_dict, config, model_class)

            # check that output_hidden_states also work using config
            del inputs_dict["output_hidden_states"]
            config.output_hidden_states = True

            check_hidden_states_output(inputs_dict, config, model_class)

    @slow
    def test_model_from_pretrained(self):
        for model_name in LAYOUTLMV2_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
            model = LayoutLMv2Model.from_pretrained(model_name)
            self.assertIsNotNone(model)

    def test_initialization(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )

        configs_no_init = _config_zero_init(config)
        for model_class in self.all_model_classes:
            model = model_class(config=configs_no_init)
            for name, param in model.named_parameters():
                if "backbone" in name or "visual_segment_embedding" in name:
                    continue

                if param.requires_grad:
                    self.assertIn(
                        ((param.data.mean() * 1e9).round() / 1e9).item(),
                        [0.0, 1.0],
                        msg=
                        f"Parameter {name} of model {model_class} seems not properly initialized",
                    )
예제 #21
0
    require_torch,
    require_torch_non_multi_gpu,
    slow,
    torch_device,
)
from transformers.utils import cached_property, is_datasets_available, is_faiss_available, is_torch_available

from ..bart.test_modeling_bart import BartModelTester
from ..dpr.test_modeling_dpr import DPRModelTester
from ..t5.test_modeling_t5 import T5ModelTester


TOLERANCE = 1e-3

T5_SAMPLE_VOCAB = os.path.join(dirname(dirname(os.path.abspath(__file__))), "fixtures/test_sentencepiece.model")
if is_torch_available() and is_datasets_available() and is_faiss_available():
    import torch
    from datasets import Dataset

    import faiss
    from transformers import (
        AutoConfig,
        AutoModel,
        AutoModelForSeq2SeqLM,
        DPRContextEncoder,
        RagConfig,
        RagModel,
        RagRetriever,
        RagSequenceForGeneration,
        RagTokenForGeneration,
        RagTokenizer,
class MaskFormerFeatureExtractionTest(FeatureExtractionSavingTestMixin,
                                      unittest.TestCase):

    feature_extraction_class = MaskFormerFeatureExtractor if (
        is_vision_available() and is_torch_available()) else None

    def setUp(self):
        self.feature_extract_tester = MaskFormerFeatureExtractionTester(self)

    @property
    def feat_extract_dict(self):
        return self.feature_extract_tester.prepare_feat_extract_dict()

    def test_feat_extract_properties(self):
        feature_extractor = self.feature_extraction_class(
            **self.feat_extract_dict)
        self.assertTrue(hasattr(feature_extractor, "image_mean"))
        self.assertTrue(hasattr(feature_extractor, "image_std"))
        self.assertTrue(hasattr(feature_extractor, "do_normalize"))
        self.assertTrue(hasattr(feature_extractor, "do_resize"))
        self.assertTrue(hasattr(feature_extractor, "size"))
        self.assertTrue(hasattr(feature_extractor, "max_size"))

    def test_batch_feature(self):
        pass

    def test_call_pil(self):
        # Initialize feature_extractor
        feature_extractor = self.feature_extraction_class(
            **self.feat_extract_dict)
        # create random PIL images
        image_inputs = prepare_image_inputs(self.feature_extract_tester,
                                            equal_resolution=False)
        for image in image_inputs:
            self.assertIsInstance(image, Image.Image)

        # Test not batched input
        encoded_images = feature_extractor(image_inputs[0],
                                           return_tensors="pt").pixel_values

        expected_height, expected_width = self.feature_extract_tester.get_expected_values(
            image_inputs)

        self.assertEqual(
            encoded_images.shape,
            (1, self.feature_extract_tester.num_channels, expected_height,
             expected_width),
        )

        # Test batched
        expected_height, expected_width = self.feature_extract_tester.get_expected_values(
            image_inputs, batched=True)

        encoded_images = feature_extractor(image_inputs,
                                           return_tensors="pt").pixel_values
        self.assertEqual(
            encoded_images.shape,
            (
                self.feature_extract_tester.batch_size,
                self.feature_extract_tester.num_channels,
                expected_height,
                expected_width,
            ),
        )

    def test_call_numpy(self):
        # Initialize feature_extractor
        feature_extractor = self.feature_extraction_class(
            **self.feat_extract_dict)
        # create random numpy tensors
        image_inputs = prepare_image_inputs(self.feature_extract_tester,
                                            equal_resolution=False,
                                            numpify=True)
        for image in image_inputs:
            self.assertIsInstance(image, np.ndarray)

        # Test not batched input
        encoded_images = feature_extractor(image_inputs[0],
                                           return_tensors="pt").pixel_values

        expected_height, expected_width = self.feature_extract_tester.get_expected_values(
            image_inputs)

        self.assertEqual(
            encoded_images.shape,
            (1, self.feature_extract_tester.num_channels, expected_height,
             expected_width),
        )

        # Test batched
        encoded_images = feature_extractor(image_inputs,
                                           return_tensors="pt").pixel_values

        expected_height, expected_width = self.feature_extract_tester.get_expected_values(
            image_inputs, batched=True)

        self.assertEqual(
            encoded_images.shape,
            (
                self.feature_extract_tester.batch_size,
                self.feature_extract_tester.num_channels,
                expected_height,
                expected_width,
            ),
        )

    def test_call_pytorch(self):
        # Initialize feature_extractor
        feature_extractor = self.feature_extraction_class(
            **self.feat_extract_dict)
        # create random PyTorch tensors
        image_inputs = prepare_image_inputs(self.feature_extract_tester,
                                            equal_resolution=False,
                                            torchify=True)
        for image in image_inputs:
            self.assertIsInstance(image, torch.Tensor)

        # Test not batched input
        encoded_images = feature_extractor(image_inputs[0],
                                           return_tensors="pt").pixel_values

        expected_height, expected_width = self.feature_extract_tester.get_expected_values(
            image_inputs)

        self.assertEqual(
            encoded_images.shape,
            (1, self.feature_extract_tester.num_channels, expected_height,
             expected_width),
        )

        # Test batched
        encoded_images = feature_extractor(image_inputs,
                                           return_tensors="pt").pixel_values

        expected_height, expected_width = self.feature_extract_tester.get_expected_values(
            image_inputs, batched=True)

        self.assertEqual(
            encoded_images.shape,
            (
                self.feature_extract_tester.batch_size,
                self.feature_extract_tester.num_channels,
                expected_height,
                expected_width,
            ),
        )

    def test_equivalence_pad_and_create_pixel_mask(self):
        # Initialize feature_extractors
        feature_extractor_1 = self.feature_extraction_class(
            **self.feat_extract_dict)
        feature_extractor_2 = self.feature_extraction_class(do_resize=False,
                                                            do_normalize=False)
        # create random PyTorch tensors
        image_inputs = prepare_image_inputs(self.feature_extract_tester,
                                            equal_resolution=False,
                                            torchify=True)
        for image in image_inputs:
            self.assertIsInstance(image, torch.Tensor)

        # Test whether the method "pad_and_return_pixel_mask" and calling the feature extractor return the same tensors
        encoded_images_with_method = feature_extractor_1.encode_inputs(
            image_inputs, return_tensors="pt")
        encoded_images = feature_extractor_2(image_inputs, return_tensors="pt")

        self.assertTrue(
            torch.allclose(encoded_images_with_method["pixel_values"],
                           encoded_images["pixel_values"],
                           atol=1e-4))
        self.assertTrue(
            torch.allclose(encoded_images_with_method["pixel_mask"],
                           encoded_images["pixel_mask"],
                           atol=1e-4))

    def comm_get_feature_extractor_inputs(self, with_annotations=False):
        feature_extractor = self.feature_extraction_class(
            **self.feat_extract_dict)
        # prepare image and target
        num_classes = 8
        batch_size = self.feature_extract_tester.batch_size
        annotations = None

        if with_annotations:
            annotations = [{
                "masks":
                np.random.rand(num_classes, 384, 384).astype(np.float32),
                "labels": (np.random.rand(num_classes) > 0.5).astype(np.int64),
            } for _ in range(batch_size)]

        image_inputs = prepare_image_inputs(self.feature_extract_tester,
                                            equal_resolution=False)

        inputs = feature_extractor(image_inputs,
                                   annotations,
                                   return_tensors="pt",
                                   pad_and_return_pixel_mask=True)

        return inputs

    def test_with_size_divisibility(self):
        size_divisibilities = [8, 16, 32]
        weird_input_sizes = [(407, 802), (582, 1094)]
        for size_divisibility in size_divisibilities:
            feat_extract_dict = {
                **self.feat_extract_dict,
                **{
                    "size_divisibility": size_divisibility
                }
            }
            feature_extractor = self.feature_extraction_class(
                **feat_extract_dict)
            for weird_input_size in weird_input_sizes:
                inputs = feature_extractor([np.ones((3, *weird_input_size))],
                                           return_tensors="pt")
                pixel_values = inputs["pixel_values"]
                # check if divisible
                self.assertTrue((pixel_values.shape[-1] %
                                 size_divisibility) == 0)
                self.assertTrue((pixel_values.shape[-2] %
                                 size_divisibility) == 0)

    def test_call_with_numpy_annotations(self):
        num_classes = 8
        batch_size = self.feature_extract_tester.batch_size

        inputs = self.comm_get_feature_extractor_inputs(with_annotations=True)

        # check the batch_size
        for el in inputs.values():
            self.assertEqual(el.shape[0], batch_size)

        pixel_values = inputs["pixel_values"]
        mask_labels = inputs["mask_labels"]
        class_labels = inputs["class_labels"]

        self.assertEqual(pixel_values.shape[-2], mask_labels.shape[-2])
        self.assertEqual(pixel_values.shape[-1], mask_labels.shape[-1])
        self.assertEqual(mask_labels.shape[1], class_labels.shape[1])
        self.assertEqual(mask_labels.shape[1], num_classes)

    def test_post_process_segmentation(self):
        fature_extractor = self.feature_extraction_class()
        outputs = self.feature_extract_tester.get_fake_maskformer_outputs()
        segmentation = fature_extractor.post_process_segmentation(outputs)

        self.assertEqual(
            segmentation.shape,
            (
                self.feature_extract_tester.batch_size,
                self.feature_extract_tester.num_classes,
                self.feature_extract_tester.height,
                self.feature_extract_tester.width,
            ),
        )

        target_size = (1, 4)
        segmentation = fature_extractor.post_process_segmentation(
            outputs, target_size=target_size)

        self.assertEqual(
            segmentation.shape,
            (self.feature_extract_tester.batch_size,
             self.feature_extract_tester.num_classes, *target_size),
        )

    def test_post_process_semantic_segmentation(self):
        fature_extractor = self.feature_extraction_class()
        outputs = self.feature_extract_tester.get_fake_maskformer_outputs()

        segmentation = fature_extractor.post_process_semantic_segmentation(
            outputs)

        self.assertEqual(
            segmentation.shape,
            (
                self.feature_extract_tester.batch_size,
                self.feature_extract_tester.height,
                self.feature_extract_tester.width,
            ),
        )

        target_size = (1, 4)

        segmentation = fature_extractor.post_process_semantic_segmentation(
            outputs, target_size=target_size)

        self.assertEqual(
            segmentation.shape,
            (self.feature_extract_tester.batch_size, *target_size))

    def test_post_process_panoptic_segmentation(self):
        fature_extractor = self.feature_extraction_class()
        outputs = self.feature_extract_tester.get_fake_maskformer_outputs()
        segmentation = fature_extractor.post_process_panoptic_segmentation(
            outputs, object_mask_threshold=0)

        self.assertTrue(
            len(segmentation) == self.feature_extract_tester.batch_size)
        for el in segmentation:
            self.assertTrue("segmentation" in el)
            self.assertTrue("segments" in el)
            self.assertEqual(type(el["segments"]), list)
            self.assertEqual(el["segmentation"].shape,
                             (self.feature_extract_tester.height,
                              self.feature_extract_tester.width))
class Data2VecVisionModelTest(ModelTesterMixin, unittest.TestCase):
    """
    Here we also overwrite some of the tests of test_modeling_common.py, as Data2VecVision does not use input_ids, inputs_embeds,
    attention_mask and seq_length.
    """

    all_model_classes = ((Data2VecVisionModel,
                          Data2VecVisionForImageClassification,
                          Data2VecVisionForSemanticSegmentation)
                         if is_torch_available() else ())

    test_pruning = False
    test_resize_embeddings = False
    test_head_masking = False

    def setUp(self):
        self.model_tester = Data2VecVisionModelTester(self)
        self.config_tester = ConfigTester(self,
                                          config_class=Data2VecVisionConfig,
                                          has_text_modality=False,
                                          hidden_size=37)

    def test_config(self):
        self.config_tester.run_common_tests()

    def test_inputs_embeds(self):
        # Data2VecVision does not use inputs_embeds
        pass

    def test_model_common_attributes(self):
        config, _ = self.model_tester.prepare_config_and_inputs_for_common()

        for model_class in self.all_model_classes:
            model = model_class(config)
            self.assertIsInstance(model.get_input_embeddings(), (nn.Module))
            x = model.get_output_embeddings()
            self.assertTrue(x is None or isinstance(x, nn.Linear))

    def test_forward_signature(self):
        config, _ = self.model_tester.prepare_config_and_inputs_for_common()

        for model_class in self.all_model_classes:
            model = model_class(config)
            signature = inspect.signature(model.forward)
            # signature.parameters is an OrderedDict => so arg_names order is deterministic
            arg_names = [*signature.parameters.keys()]

            expected_arg_names = ["pixel_values"]
            self.assertListEqual(arg_names[:1], expected_arg_names)

    def test_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_model(*config_and_inputs)

    def test_for_image_segmentation(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_image_segmentation(
            *config_and_inputs)

    def test_training(self):
        if not self.model_tester.is_training:
            return

        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )
        config.return_dict = True

        for model_class in self.all_model_classes:
            if model_class in [*get_values(MODEL_MAPPING)]:
                continue

            model = model_class(config)
            model.to(torch_device)
            model.train()
            inputs = self._prepare_for_class(inputs_dict,
                                             model_class,
                                             return_labels=True)
            loss = model(**inputs).loss
            loss.backward()

    def test_training_gradient_checkpointing(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )
        if not self.model_tester.is_training:
            return

        config.use_cache = False
        config.return_dict = True

        for model_class in self.all_model_classes:
            if model_class in [
                    *get_values(MODEL_MAPPING)
            ] or not model_class.supports_gradient_checkpointing:
                continue
            # TODO: remove the following 3 lines once we have a MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING
            # this can then be incorporated into _prepare_for_class in test_modeling_common.py
            elif model_class.__name__ == "Data2VecVisionForSemanticSegmentation":
                batch_size, num_channels, height, width = inputs_dict[
                    "pixel_values"].shape
                inputs_dict["labels"] = torch.zeros(
                    [self.model_tester.batch_size, height, width],
                    device=torch_device).long()
            model = model_class(config)
            model.gradient_checkpointing_enable()
            model.to(torch_device)
            model.train()
            inputs = self._prepare_for_class(inputs_dict,
                                             model_class,
                                             return_labels=True)
            loss = model(**inputs).loss
            loss.backward()

    def test_initialization(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )

        configs_no_init = _config_zero_init(config)
        for model_class in self.all_model_classes:
            model = model_class(config=configs_no_init)
            for name, param in model.named_parameters():
                # we skip lambda parameters as these require special initial values
                # determined by config.layer_scale_init_value
                if "lambda" in name:
                    continue
                if param.requires_grad:
                    self.assertIn(
                        ((param.data.mean() * 1e9).round() / 1e9).item(),
                        [0.0, 1.0],
                        msg=
                        f"Parameter {name} of model {model_class} seems not properly initialized",
                    )

    def check_pt_tf_outputs(self,
                            tf_outputs,
                            pt_outputs,
                            model_class,
                            tol=2e-4,
                            name="outputs",
                            attributes=None):
        # We override with a slightly higher tol value, as semseg models tend to diverge a bit more
        super().check_pt_tf_outputs(tf_outputs, pt_outputs, model_class, tol,
                                    name, attributes)

    def test_for_image_classification(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_image_classification(
            *config_and_inputs)

    @slow
    def test_model_from_pretrained(self):
        for model_name in DATA2VEC_VISION_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
            model = Data2VecVisionModel.from_pretrained(model_name)
            self.assertIsNotNone(model)
예제 #24
0
class DonutSwinModelTest(ModelTesterMixin, unittest.TestCase):

    all_model_classes = (DonutSwinModel, ) if is_torch_available() else ()
    fx_compatible = True

    test_pruning = False
    test_resize_embeddings = False
    test_head_masking = False

    def setUp(self):
        self.model_tester = DonutSwinModelTester(self)
        self.config_tester = ConfigTester(self,
                                          config_class=DonutSwinConfig,
                                          embed_dim=37)

    def test_config(self):
        self.create_and_test_config_common_properties()
        self.config_tester.create_and_test_config_to_json_string()
        self.config_tester.create_and_test_config_to_json_file()
        self.config_tester.create_and_test_config_from_and_save_pretrained()
        self.config_tester.create_and_test_config_with_num_labels()
        self.config_tester.check_config_can_be_init_without_params()
        self.config_tester.check_config_arguments_init()

    def create_and_test_config_common_properties(self):
        return

    def test_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_model(*config_and_inputs)

    def test_inputs_embeds(self):
        # DonutSwin does not use inputs_embeds
        pass

    def test_model_common_attributes(self):
        config, _ = self.model_tester.prepare_config_and_inputs_for_common()

        for model_class in self.all_model_classes:
            model = model_class(config)
            self.assertIsInstance(model.get_input_embeddings(), (nn.Module))
            x = model.get_output_embeddings()
            self.assertTrue(x is None or isinstance(x, nn.Linear))

    def test_forward_signature(self):
        config, _ = self.model_tester.prepare_config_and_inputs_for_common()

        for model_class in self.all_model_classes:
            model = model_class(config)
            signature = inspect.signature(model.forward)
            # signature.parameters is an OrderedDict => so arg_names order is deterministic
            arg_names = [*signature.parameters.keys()]

            expected_arg_names = ["pixel_values"]
            self.assertListEqual(arg_names[:1], expected_arg_names)

    def test_attention_outputs(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )
        config.return_dict = True

        for model_class in self.all_model_classes:
            inputs_dict["output_attentions"] = True
            inputs_dict["output_hidden_states"] = False
            config.return_dict = True
            model = model_class(config)
            model.to(torch_device)
            model.eval()
            with torch.no_grad():
                outputs = model(
                    **self._prepare_for_class(inputs_dict, model_class))
            attentions = outputs.attentions
            expected_num_attentions = len(self.model_tester.depths)
            self.assertEqual(len(attentions), expected_num_attentions)

            # check that output_attentions also work using config
            del inputs_dict["output_attentions"]
            config.output_attentions = True
            window_size_squared = config.window_size**2
            model = model_class(config)
            model.to(torch_device)
            model.eval()
            with torch.no_grad():
                outputs = model(
                    **self._prepare_for_class(inputs_dict, model_class))
            attentions = outputs.attentions
            self.assertEqual(len(attentions), expected_num_attentions)

            self.assertListEqual(
                list(attentions[0].shape[-3:]),
                [
                    self.model_tester.num_heads[0], window_size_squared,
                    window_size_squared
                ],
            )
            out_len = len(outputs)

            # Check attention is always last and order is fine
            inputs_dict["output_attentions"] = True
            inputs_dict["output_hidden_states"] = True
            model = model_class(config)
            model.to(torch_device)
            model.eval()
            with torch.no_grad():
                outputs = model(
                    **self._prepare_for_class(inputs_dict, model_class))

            if hasattr(self.model_tester, "num_hidden_states_types"):
                added_hidden_states = self.model_tester.num_hidden_states_types
            else:
                # also another +1 for reshaped_hidden_states
                added_hidden_states = 2
            self.assertEqual(out_len + added_hidden_states, len(outputs))

            self_attentions = outputs.attentions

            self.assertEqual(len(self_attentions), expected_num_attentions)

            self.assertListEqual(
                list(self_attentions[0].shape[-3:]),
                [
                    self.model_tester.num_heads[0], window_size_squared,
                    window_size_squared
                ],
            )

    def check_hidden_states_output(self, inputs_dict, config, model_class,
                                   image_size):
        model = model_class(config)
        model.to(torch_device)
        model.eval()

        with torch.no_grad():
            outputs = model(
                **self._prepare_for_class(inputs_dict, model_class))

        hidden_states = outputs.hidden_states

        expected_num_layers = getattr(self.model_tester,
                                      "expected_num_hidden_layers",
                                      len(self.model_tester.depths) + 1)
        self.assertEqual(len(hidden_states), expected_num_layers)

        # DonutSwin has a different seq_length
        patch_size = (config.patch_size if isinstance(
            config.patch_size, collections.abc.Iterable) else
                      (config.patch_size, config.patch_size))

        num_patches = (image_size[1] // patch_size[1]) * (image_size[0] //
                                                          patch_size[0])

        self.assertListEqual(
            list(hidden_states[0].shape[-2:]),
            [num_patches, self.model_tester.embed_dim],
        )

        reshaped_hidden_states = outputs.reshaped_hidden_states
        self.assertEqual(len(reshaped_hidden_states), expected_num_layers)

        batch_size, num_channels, height, width = reshaped_hidden_states[
            0].shape
        reshaped_hidden_states = (reshaped_hidden_states[0].view(
            batch_size, num_channels, height * width).permute(0, 2, 1))
        self.assertListEqual(
            list(reshaped_hidden_states.shape[-2:]),
            [num_patches, self.model_tester.embed_dim],
        )

    def test_hidden_states_output(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )

        image_size = (self.model_tester.image_size if isinstance(
            self.model_tester.image_size, collections.abc.Iterable) else
                      (self.model_tester.image_size,
                       self.model_tester.image_size))

        for model_class in self.all_model_classes:
            inputs_dict["output_hidden_states"] = True
            self.check_hidden_states_output(inputs_dict, config, model_class,
                                            image_size)

            # check that output_hidden_states also work using config
            del inputs_dict["output_hidden_states"]
            config.output_hidden_states = True

            self.check_hidden_states_output(inputs_dict, config, model_class,
                                            image_size)

    def test_hidden_states_output_with_padding(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )
        config.patch_size = 3

        image_size = (self.model_tester.image_size if isinstance(
            self.model_tester.image_size, collections.abc.Iterable) else
                      (self.model_tester.image_size,
                       self.model_tester.image_size))
        patch_size = (config.patch_size if isinstance(
            config.patch_size, collections.abc.Iterable) else
                      (config.patch_size, config.patch_size))

        padded_height = image_size[0] + patch_size[0] - (image_size[0] %
                                                         patch_size[0])
        padded_width = image_size[1] + patch_size[1] - (image_size[1] %
                                                        patch_size[1])

        for model_class in self.all_model_classes:
            inputs_dict["output_hidden_states"] = True
            self.check_hidden_states_output(inputs_dict, config, model_class,
                                            (padded_height, padded_width))

            # check that output_hidden_states also work using config
            del inputs_dict["output_hidden_states"]
            config.output_hidden_states = True
            self.check_hidden_states_output(inputs_dict, config, model_class,
                                            (padded_height, padded_width))

    @slow
    def test_model_from_pretrained(self):
        for model_name in DONUT_SWIN_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
            model = DonutSwinModel.from_pretrained(model_name)
            self.assertIsNotNone(model)

    def test_initialization(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )

        configs_no_init = _config_zero_init(config)
        for model_class in self.all_model_classes:
            model = model_class(config=configs_no_init)
            for name, param in model.named_parameters():
                if "embeddings" not in name and param.requires_grad:
                    self.assertIn(
                        ((param.data.mean() * 1e9).round() / 1e9).item(),
                        [0.0, 1.0],
                        msg=
                        f"Parameter {name} of model {model_class} seems not properly initialized",
                    )

    def _create_and_check_torch_fx_tracing(self,
                                           config,
                                           inputs_dict,
                                           output_loss=False):
        if not is_torch_fx_available() or not self.fx_compatible:
            return

        configs_no_init = _config_zero_init(
            config)  # To be sure we have no Nan
        configs_no_init.return_dict = False

        for model_class in self.all_model_classes:
            model = model_class(config=configs_no_init)
            model.to(torch_device)
            model.eval()
            inputs = self._prepare_for_class(inputs_dict,
                                             model_class,
                                             return_labels=output_loss)

            try:
                if model.config.is_encoder_decoder:
                    model.config.use_cache = False  # FSTM still requires this hack -> FSTM should probably be refactored similar to BART afterward
                    labels = inputs.get("labels", None)
                    input_names = [
                        "input_ids", "attention_mask", "decoder_input_ids",
                        "decoder_attention_mask"
                    ]
                    if labels is not None:
                        input_names.append("labels")

                    filtered_inputs = {
                        k: v
                        for (k, v) in inputs.items() if k in input_names
                    }
                    input_names = list(filtered_inputs.keys())

                    model_output = model(**filtered_inputs)

                    traced_model = symbolic_trace(model, input_names)
                    traced_output = traced_model(**filtered_inputs)
                else:
                    input_names = [
                        "input_ids", "attention_mask", "token_type_ids",
                        "pixel_values"
                    ]

                    labels = inputs.get("labels", None)
                    start_positions = inputs.get("start_positions", None)
                    end_positions = inputs.get("end_positions", None)
                    if labels is not None:
                        input_names.append("labels")
                    if start_positions is not None:
                        input_names.append("start_positions")
                    if end_positions is not None:
                        input_names.append("end_positions")

                    filtered_inputs = {
                        k: v
                        for (k, v) in inputs.items() if k in input_names
                    }
                    input_names = list(filtered_inputs.keys())

                    model_output = model(**filtered_inputs)

                    traced_model = symbolic_trace(model, input_names)
                    traced_output = traced_model(**filtered_inputs)

            except RuntimeError as e:
                self.fail(f"Couldn't trace module: {e}")

            def flatten_output(output):
                flatten = []
                for x in output:
                    if isinstance(x, (tuple, list)):
                        flatten += flatten_output(x)
                    elif not isinstance(x, torch.Tensor):
                        continue
                    else:
                        flatten.append(x)
                return flatten

            model_output = flatten_output(model_output)
            traced_output = flatten_output(traced_output)
            num_outputs = len(model_output)

            for i in range(num_outputs):
                self.assertTrue(
                    torch.allclose(model_output[i], traced_output[i]),
                    f"traced {i}th output doesn't match model {i}th output for {model_class}",
                )

            # Test that the model can be serialized and restored properly
            with tempfile.TemporaryDirectory() as tmp_dir_name:
                pkl_file_name = os.path.join(tmp_dir_name, "model.pkl")
                try:
                    with open(pkl_file_name, "wb") as f:
                        pickle.dump(traced_model, f)
                    with open(pkl_file_name, "rb") as f:
                        loaded = pickle.load(f)
                except Exception as e:
                    self.fail(
                        f"Couldn't serialize / deserialize the traced model: {e}"
                    )

                loaded_output = loaded(**filtered_inputs)
                loaded_output = flatten_output(loaded_output)

                for i in range(num_outputs):
                    self.assertTrue(
                        torch.allclose(model_output[i], loaded_output[i]),
                        f"serialized model {i}th output doesn't match model {i}th output for {model_class}",
                    )
예제 #25
0
class VanModelTest(ModelTesterMixin, unittest.TestCase):
    """
    Here we also overwrite some of the tests of test_modeling_common.py, as Van does not use input_ids, inputs_embeds,
    attention_mask and seq_length.
    """

    all_model_classes = (VanModel, VanForImageClassification) if is_torch_available() else ()

    test_pruning = False
    test_torchscript = False
    test_resize_embeddings = False
    test_head_masking = False
    has_attentions = False

    def setUp(self):
        self.model_tester = VanModelTester(self)
        self.config_tester = ConfigTester(self, config_class=VanConfig, has_text_modality=False, hidden_size=37)

    def test_config(self):
        self.create_and_test_config_common_properties()
        self.config_tester.create_and_test_config_to_json_string()
        self.config_tester.create_and_test_config_to_json_file()
        self.config_tester.create_and_test_config_from_and_save_pretrained()
        self.config_tester.create_and_test_config_with_num_labels()
        self.config_tester.check_config_can_be_init_without_params()
        self.config_tester.check_config_arguments_init()

    def create_and_test_config_common_properties(self):
        return

    @unittest.skip(reason="Van does not use inputs_embeds")
    def test_inputs_embeds(self):
        pass

    @unittest.skip(reason="Van does not support input and output embeddings")
    def test_model_common_attributes(self):
        pass

    def test_forward_signature(self):
        config, _ = self.model_tester.prepare_config_and_inputs_for_common()

        for model_class in self.all_model_classes:
            model = model_class(config)
            signature = inspect.signature(model.forward)
            # signature.parameters is an OrderedDict => so arg_names order is deterministic
            arg_names = [*signature.parameters.keys()]

            expected_arg_names = ["pixel_values"]
            self.assertListEqual(arg_names[:1], expected_arg_names)

    def test_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_model(*config_and_inputs)

    @require_scipy
    def test_initialization(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
        configs_no_init = _config_zero_init(config)

        for model_class in self.all_model_classes:
            model = model_class(config=configs_no_init)
            for name, module in model.named_modules():
                if isinstance(module, (nn.BatchNorm2d, nn.GroupNorm, nn.LayerNorm)):
                    self.assertTrue(
                        torch.all(module.weight == 1),
                        msg=f"Parameter {name} of model {model_class} seems not properly initialized",
                    )
                    self.assertTrue(
                        torch.all(module.bias == 0),
                        msg=f"Parameter {name} of model {model_class} seems not properly initialized",
                    )
                elif isinstance(module, nn.Conv2d):
                    fan_out = module.kernel_size[0] * module.kernel_size[1] * module.out_channels
                    fan_out //= module.groups
                    std = math.sqrt(2.0 / fan_out)
                    # divide by std -> mean = 0, std = 1
                    data = module.weight.data.cpu().flatten().numpy() / std
                    test = stats.anderson(data)
                    self.assertTrue(test.statistic > 0.05)

    def test_hidden_states_output(self):
        def check_hidden_states_output(inputs_dict, config, model_class):
            model = model_class(config)
            model.to(torch_device)
            model.eval()

            with torch.no_grad():
                outputs = model(**self._prepare_for_class(inputs_dict, model_class))

            hidden_states = outputs.encoder_hidden_states if config.is_encoder_decoder else outputs.hidden_states

            expected_num_stages = len(self.model_tester.hidden_sizes)
            # van has no embeddings
            self.assertEqual(len(hidden_states), expected_num_stages)

            # Van's feature maps are of shape (batch_size, num_channels, height, width)
            self.assertListEqual(
                list(hidden_states[0].shape[-2:]),
                [self.model_tester.image_size // 4, self.model_tester.image_size // 4],
            )

        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()

        for model_class in self.all_model_classes:
            inputs_dict["output_hidden_states"] = True
            check_hidden_states_output(inputs_dict, config, model_class)

            # check that output_hidden_states also work using config
            del inputs_dict["output_hidden_states"]
            config.output_hidden_states = True

            check_hidden_states_output(inputs_dict, config, model_class)

    def test_for_image_classification(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_image_classification(*config_and_inputs)

    @slow
    def test_model_from_pretrained(self):
        for model_name in VAN_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
            model = VanModel.from_pretrained(model_name)
            self.assertIsNotNone(model)
import inspect
import math
import tempfile
import unittest

import numpy as np

from transformers import ViTMAEConfig
from transformers.testing_utils import require_torch, require_vision, slow, torch_device
from transformers.utils import cached_property, is_torch_available, is_vision_available

from ...test_configuration_common import ConfigTester
from ...test_modeling_common import ModelTesterMixin, floats_tensor, ids_tensor

if is_torch_available():
    import torch
    from torch import nn

    from transformers import ViTMAEForPreTraining, ViTMAEModel
    from transformers.models.vit.modeling_vit import VIT_PRETRAINED_MODEL_ARCHIVE_LIST

if is_vision_available():
    from PIL import Image

    from transformers import ViTFeatureExtractor


class ViTMAEModelTester:
    def __init__(
        self,
예제 #27
0
class PerceiverModelTest(ModelTesterMixin, unittest.TestCase):

    all_model_classes = ((
        PerceiverModel,
        PerceiverForMaskedLM,
        PerceiverForImageClassificationLearned,
        PerceiverForImageClassificationConvProcessing,
        PerceiverForImageClassificationFourier,
        PerceiverForOpticalFlow,
        PerceiverForMultimodalAutoencoding,
        PerceiverForSequenceClassification,
    ) if is_torch_available() else ())
    test_pruning = False
    test_head_masking = False
    test_torchscript = False

    maxDiff = None

    def setUp(self):
        self.model_tester = PerceiverModelTester(self)
        self.config_tester = ConfigTester(self,
                                          config_class=PerceiverConfig,
                                          hidden_size=37)

    def _prepare_for_class(self,
                           inputs_dict,
                           model_class,
                           return_labels=False):
        inputs_dict = copy.deepcopy(inputs_dict)

        if model_class.__name__ == "PerceiverForMultimodalAutoencoding":
            inputs_dict[
                "subsampled_output_points"] = self.model_tester.subsampling

        if return_labels:
            if model_class in [
                    *get_values(MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING),
                    *get_values(MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING),
            ]:
                inputs_dict["labels"] = torch.zeros(
                    self.model_tester.batch_size,
                    dtype=torch.long,
                    device=torch_device)
            elif model_class in [
                    *get_values(MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING),
                    *get_values(MODEL_FOR_MASKED_LM_MAPPING),
            ]:
                inputs_dict["labels"] = torch.zeros(
                    (self.model_tester.batch_size,
                     self.model_tester.seq_length),
                    dtype=torch.long,
                    device=torch_device)
        return inputs_dict

    def test_config(self):
        # we don't test common_properties and arguments_init as these don't apply for Perceiver
        self.config_tester.create_and_test_config_to_json_string()
        self.config_tester.create_and_test_config_to_json_file()
        self.config_tester.create_and_test_config_from_and_save_pretrained()
        self.config_tester.create_and_test_config_with_num_labels()
        self.config_tester.check_config_can_be_init_without_params()

    def test_for_masked_lm(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs(
            model_class=PerceiverForMaskedLM)
        self.model_tester.create_and_check_for_masked_lm(*config_and_inputs)

    def test_for_sequence_classification(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs(
            model_class=PerceiverForSequenceClassification)
        self.model_tester.create_and_check_for_sequence_classification(
            *config_and_inputs)

    def test_for_image_classification_learned(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs(
            model_class=PerceiverForImageClassificationLearned)
        self.model_tester.create_and_check_for_image_classification_learned(
            *config_and_inputs)

    def test_for_image_classification_fourier(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs(
            model_class=PerceiverForImageClassificationFourier)
        self.model_tester.create_and_check_for_image_classification_fourier(
            *config_and_inputs)

    def test_for_image_classification_conv(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs(
            model_class=PerceiverForImageClassificationConvProcessing)
        self.model_tester.create_and_check_for_image_classification_conv(
            *config_and_inputs)

    def test_model_common_attributes(self):
        for model_class in self.all_model_classes:
            config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_model_class(
                model_class)
            model = model_class(config)
            # we overwrite this, as the embeddings of Perceiver are an instance of nn.Parameter
            # and Perceiver doesn't support get_output_embeddings
            self.assertIsInstance(model.get_input_embeddings(), (nn.Parameter))

    def test_training(self):
        if not self.model_tester.is_training:
            return

        for model_class in self.all_model_classes:
            if model_class in [
                    *get_values(MODEL_MAPPING),
                    PerceiverForOpticalFlow,
                    PerceiverForMultimodalAutoencoding,
            ]:
                continue

            config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_model_class(
                model_class)
            config.return_dict = True

            model = model_class(config)
            model.to(torch_device)
            model.train()
            inputs = self._prepare_for_class(inputs_dict,
                                             model_class,
                                             return_labels=True)
            loss = model(**inputs).loss
            loss.backward()

    def test_forward_signature(self):
        for model_class in self.all_model_classes:
            config, _ = self.model_tester.prepare_config_and_inputs_for_model_class(
                model_class)

            model = model_class(config)
            signature = inspect.signature(model.forward)
            # signature.parameters is an OrderedDict => so arg_names order is deterministic
            arg_names = [*signature.parameters.keys()]

            expected_arg_names = ["inputs"]
            self.assertListEqual(arg_names[:1], expected_arg_names)

    def test_determinism(self):
        for model_class in self.all_model_classes:
            config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_model_class(
                model_class)

            model = model_class(config)
            model.to(torch_device)
            model.eval()
            with torch.no_grad():
                inputs_dict = self._prepare_for_class(inputs_dict, model_class)
                first = model(**inputs_dict)[0]
                second = model(**inputs_dict)[0]

            if model_class.__name__ == "PerceiverForMultimodalAutoencoding":
                # model outputs a dictionary with logits per modality, let's verify each modality
                for modality in first.keys():
                    out_1 = first[modality].cpu().numpy()
                    out_2 = second[modality].cpu().numpy()
                    out_1 = out_1[~np.isnan(out_1)]
                    out_2 = out_2[~np.isnan(out_2)]
                    max_diff = np.amax(np.abs(out_1 - out_2))
                    self.assertLessEqual(max_diff, 1e-5)
            else:
                out_1 = first.cpu().numpy()
                out_2 = second.cpu().numpy()
                out_1 = out_1[~np.isnan(out_1)]
                out_2 = out_2[~np.isnan(out_2)]
                max_diff = np.amax(np.abs(out_1 - out_2))
                self.assertLessEqual(max_diff, 1e-5)

    def test_attention_outputs(self):
        seq_len = getattr(self.model_tester, "num_latents", None)

        for model_class in self.all_model_classes:
            config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_model_class(
                model_class)
            config.return_dict = True

            inputs_dict["output_attentions"] = True
            inputs_dict["output_hidden_states"] = False
            config.return_dict = True
            model = model_class(config)
            model.to(torch_device)
            model.eval()
            with torch.no_grad():
                outputs = model(
                    **self._prepare_for_class(inputs_dict, model_class))
            self_attentions = outputs.attentions
            cross_attentions = outputs.cross_attentions

            # check expected number of attentions depending on model class
            expected_num_self_attentions = self.model_tester.num_blocks * self.model_tester.num_self_attends_per_block
            if model.__class__.__name__ == "PerceiverModel":
                # we expect to have 2 cross-attentions, namely one in the PerceiverEncoder, and one in PerceiverBasicDecoder
                expected_num_cross_attentions = 1
            else:
                # we expect to have 2 cross-attentions, namely one in the PerceiverEncoder, and one in PerceiverBasicDecoder
                expected_num_cross_attentions = 2
            self.assertEqual(len(self_attentions),
                             expected_num_self_attentions)
            self.assertEqual(len(cross_attentions),
                             expected_num_cross_attentions)

            # check that output_attentions also work using config
            del inputs_dict["output_attentions"]
            config.output_attentions = True
            model = model_class(config)
            model.to(torch_device)
            model.eval()
            with torch.no_grad():
                outputs = model(
                    **self._prepare_for_class(inputs_dict, model_class))
            self_attentions = outputs.attentions
            cross_attentions = outputs.cross_attentions
            self.assertEqual(len(self_attentions),
                             expected_num_self_attentions)
            self.assertEqual(len(cross_attentions),
                             expected_num_cross_attentions)

            self.assertListEqual(
                list(self_attentions[0].shape[-3:]),
                [self.model_tester.num_self_attention_heads, seq_len, seq_len],
            )
            out_len = len(outputs)

            # Check attention is always last and order is fine
            inputs_dict["output_attentions"] = True
            inputs_dict["output_hidden_states"] = True
            model = model_class(config)
            model.to(torch_device)
            model.eval()
            with torch.no_grad():
                outputs = model(
                    **self._prepare_for_class(inputs_dict, model_class))

            self.assertEqual(out_len + 1, len(outputs))

            self_attentions = outputs.attentions

            self.assertEqual(len(self_attentions),
                             expected_num_self_attentions)
            self.assertListEqual(
                list(self_attentions[0].shape[-3:]),
                [self.model_tester.num_self_attention_heads, seq_len, seq_len],
            )

    def test_hidden_states_output(self):
        def check_hidden_states_output(inputs_dict, config, model_class):
            model = model_class(config)
            model.to(torch_device)
            model.eval()

            with torch.no_grad():
                outputs = model(
                    **self._prepare_for_class(inputs_dict, model_class))

            hidden_states = outputs.hidden_states

            expected_num_layers = self.model_tester.num_blocks * self.model_tester.num_self_attends_per_block + 1
            self.assertEqual(len(hidden_states), expected_num_layers)

            seq_length = self.model_tester.num_latents

            self.assertListEqual(
                list(hidden_states[0].shape[-2:]),
                [seq_length, self.model_tester.d_latents],
            )

        for model_class in self.all_model_classes:
            config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_model_class(
                model_class)

            inputs_dict["output_hidden_states"] = True
            check_hidden_states_output(inputs_dict, config, model_class)

            # check that output_hidden_states also work using config
            del inputs_dict["output_hidden_states"]
            config.output_hidden_states = True

            check_hidden_states_output(inputs_dict, config, model_class)

    def test_model_outputs_equivalence(self):
        def set_nan_tensor_to_zero(t):
            t[t != t] = 0
            return t

        def check_equivalence(model,
                              tuple_inputs,
                              dict_inputs,
                              additional_kwargs={}):
            with torch.no_grad():
                tuple_output = model(**tuple_inputs,
                                     return_dict=False,
                                     **additional_kwargs)
                dict_output = model(**dict_inputs,
                                    return_dict=True,
                                    **additional_kwargs).to_tuple()

                def recursive_check(tuple_object, dict_object):
                    if isinstance(tuple_object, (List, Tuple)):
                        for tuple_iterable_value, dict_iterable_value in zip(
                                tuple_object, dict_object):
                            recursive_check(tuple_iterable_value,
                                            dict_iterable_value)
                    elif isinstance(tuple_object, Dict):
                        for tuple_iterable_value, dict_iterable_value in zip(
                                tuple_object.values(), dict_object.values()):
                            recursive_check(tuple_iterable_value,
                                            dict_iterable_value)
                    elif tuple_object is None:
                        return
                    else:
                        self.assertTrue(
                            torch.allclose(
                                set_nan_tensor_to_zero(tuple_object),
                                set_nan_tensor_to_zero(dict_object),
                                atol=1e-5),
                            msg=
                            f"Tuple and dict output are not equal. Difference: {torch.max(torch.abs(tuple_object - dict_object))}. "
                            f"Tuple has `nan`: {torch.isnan(tuple_object).any()} and `inf`: {torch.isinf(tuple_object)}. "
                            f"Dict has `nan`: {torch.isnan(dict_object).any()} and `inf`: {torch.isinf(dict_object)}.",
                        )

                recursive_check(tuple_output, dict_output)

        for model_class in self.all_model_classes:
            config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_model_class(
                model_class)

            model = model_class(config)
            model.to(torch_device)
            model.eval()

            tuple_inputs = self._prepare_for_class(inputs_dict, model_class)
            dict_inputs = self._prepare_for_class(inputs_dict, model_class)
            check_equivalence(model, tuple_inputs, dict_inputs)

            if model_class.__name__ not in [
                    "PerceiverForOpticalFlow",
                    "PerceiverForMultimodalAutoencoding"
            ]:
                # optical flow + multimodal models don't support training for now
                tuple_inputs = self._prepare_for_class(inputs_dict,
                                                       model_class,
                                                       return_labels=True)
                dict_inputs = self._prepare_for_class(inputs_dict,
                                                      model_class,
                                                      return_labels=True)
                check_equivalence(model, tuple_inputs, dict_inputs)

            tuple_inputs = self._prepare_for_class(inputs_dict, model_class)
            dict_inputs = self._prepare_for_class(inputs_dict, model_class)

            check_equivalence(model, tuple_inputs, dict_inputs,
                              {"output_hidden_states": True})

            tuple_inputs = self._prepare_for_class(inputs_dict, model_class)
            dict_inputs = self._prepare_for_class(inputs_dict, model_class)
            check_equivalence(model, tuple_inputs, dict_inputs,
                              {"output_attentions": True})

            if model_class.__name__ not in [
                    "PerceiverForOpticalFlow",
                    "PerceiverForMultimodalAutoencoding"
            ]:
                # optical flow + multimodal models don't support training for now
                tuple_inputs = self._prepare_for_class(inputs_dict,
                                                       model_class,
                                                       return_labels=True)
                dict_inputs = self._prepare_for_class(inputs_dict,
                                                      model_class,
                                                      return_labels=True)
                check_equivalence(model, tuple_inputs, dict_inputs,
                                  {"output_hidden_states": True})

            if model_class.__name__ not in [
                    "PerceiverForOpticalFlow",
                    "PerceiverForMultimodalAutoencoding"
            ]:
                # optical flow + multimodal models don't support training for now
                tuple_inputs = self._prepare_for_class(inputs_dict,
                                                       model_class,
                                                       return_labels=True)
                dict_inputs = self._prepare_for_class(inputs_dict,
                                                      model_class,
                                                      return_labels=True)
                check_equivalence(model, tuple_inputs, dict_inputs,
                                  {"output_attentions": True})

            if model_class.__name__ not in [
                    "PerceiverForOpticalFlow",
                    "PerceiverForMultimodalAutoencoding"
            ]:
                # optical flow + multimodal models don't support training for now
                tuple_inputs = self._prepare_for_class(inputs_dict,
                                                       model_class,
                                                       return_labels=True)
                dict_inputs = self._prepare_for_class(inputs_dict,
                                                      model_class,
                                                      return_labels=True)
                check_equivalence(model, tuple_inputs, dict_inputs, {
                    "output_hidden_states": True,
                    "output_attentions": True
                })

    def test_retain_grad_hidden_states_attentions(self):
        # no need to test all models as different heads yield the same functionality
        model_class = PerceiverForMaskedLM
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_model_class(
            model_class)
        config.output_hidden_states = True
        config.output_attentions = True

        model = model_class(config)
        model.to(torch_device)

        inputs = self._prepare_for_class(inputs_dict, model_class)

        outputs = model(**inputs)

        output = outputs[0]

        # Encoder-only model
        hidden_states = outputs.hidden_states[0]
        attentions = outputs.attentions[0]

        hidden_states.retain_grad()
        attentions.retain_grad()

        output.flatten()[0].backward(retain_graph=True)

        self.assertIsNotNone(hidden_states.grad)
        self.assertIsNotNone(attentions.grad)

    def test_feed_forward_chunking(self):
        for model_class in self.all_model_classes:
            original_config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_model_class(
                model_class)
            torch.manual_seed(0)
            config = copy.deepcopy(original_config)
            model = model_class(config)
            model.to(torch_device)
            model.eval()

            hidden_states_no_chunk = model(
                **self._prepare_for_class(inputs_dict, model_class))[0]

            torch.manual_seed(0)
            config.chunk_size_feed_forward = 1
            model = model_class(config)
            model.to(torch_device)
            model.eval()

            hidden_states_with_chunk = model(
                **self._prepare_for_class(inputs_dict, model_class))[0]
            if model_class.__name__ == "PerceiverForMultimodalAutoencoding":
                # model outputs a dictionary with logits for each modality
                for modality in hidden_states_no_chunk.keys():
                    self.assertTrue(
                        torch.allclose(hidden_states_no_chunk[modality],
                                       hidden_states_with_chunk[modality],
                                       atol=1e-3))
            else:
                self.assertTrue(
                    torch.allclose(hidden_states_no_chunk,
                                   hidden_states_with_chunk,
                                   atol=1e-3))

    def test_save_load(self):
        for model_class in self.all_model_classes:
            config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_model_class(
                model_class)

            model = model_class(config)
            model.to(torch_device)
            model.eval()
            with torch.no_grad():
                outputs = model(
                    **self._prepare_for_class(inputs_dict, model_class))

            if model_class.__name__ == "PerceiverForMultimodalAutoencoding":
                for modality in outputs[0].keys():
                    out_2 = outputs[0][modality].cpu().numpy()
                    out_2[np.isnan(out_2)] = 0

                    with tempfile.TemporaryDirectory() as tmpdirname:
                        model.save_pretrained(tmpdirname)
                        model = model_class.from_pretrained(tmpdirname)
                        model.to(torch_device)
                        with torch.no_grad():
                            after_outputs = model(**self._prepare_for_class(
                                inputs_dict, model_class))

                        # Make sure we don't have nans
                        out_1 = after_outputs[0][modality].cpu().numpy()
                        out_1[np.isnan(out_1)] = 0
                        max_diff = np.amax(np.abs(out_1 - out_2))
                        self.assertLessEqual(max_diff, 1e-5)

            else:
                out_2 = outputs[0].cpu().numpy()
                out_2[np.isnan(out_2)] = 0

                with tempfile.TemporaryDirectory() as tmpdirname:
                    model.save_pretrained(tmpdirname)
                    model = model_class.from_pretrained(tmpdirname)
                    model.to(torch_device)
                    with torch.no_grad():
                        after_outputs = model(**self._prepare_for_class(
                            inputs_dict, model_class))

                    # Make sure we don't have nans
                    out_1 = after_outputs[0].cpu().numpy()
                    out_1[np.isnan(out_1)] = 0
                    max_diff = np.amax(np.abs(out_1 - out_2))
                    self.assertLessEqual(max_diff, 1e-5)

    def test_correct_missing_keys(self):
        if not self.test_missing_keys:
            return
        config, _ = self.model_tester.prepare_config_and_inputs_for_common()

        for model_class in self.all_model_classes:
            # most Perceiver models don't have a typical head like is the case with BERT
            if model_class in [
                    PerceiverForOpticalFlow,
                    PerceiverForMultimodalAutoencoding,
                    *get_values(MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING),
                    *get_values(MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING),
            ]:
                continue

            model = model_class(config)
            base_model_prefix = model.base_model_prefix

            if hasattr(model, base_model_prefix):
                with tempfile.TemporaryDirectory() as temp_dir_name:
                    model.base_model.save_pretrained(temp_dir_name)
                    model, loading_info = model_class.from_pretrained(
                        temp_dir_name, output_loading_info=True)
                    with self.subTest(
                            msg=f"Missing keys for {model.__class__.__name__}"
                    ):
                        self.assertGreater(len(loading_info["missing_keys"]),
                                           0)

    def test_problem_types(self):
        problem_types = [
            {
                "title": "multi_label_classification",
                "num_labels": 2,
                "dtype": torch.float
            },
            {
                "title": "single_label_classification",
                "num_labels": 1,
                "dtype": torch.long
            },
            {
                "title": "regression",
                "num_labels": 1,
                "dtype": torch.float
            },
        ]

        for model_class in self.all_model_classes:
            if model_class not in get_values(
                    MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING):
                continue

            config, inputs, input_mask, _, _ = self.model_tester.prepare_config_and_inputs(
                model_class=model_class)
            inputs_dict = dict(inputs=inputs, attention_mask=input_mask)

            for problem_type in problem_types:
                with self.subTest(
                        msg=
                        f"Testing {model_class} with {problem_type['title']}"):

                    config.problem_type = problem_type["title"]
                    config.num_labels = problem_type["num_labels"]

                    model = model_class(config)
                    model.to(torch_device)
                    model.train()

                    inputs = self._prepare_for_class(inputs_dict,
                                                     model_class,
                                                     return_labels=True)

                    if problem_type["num_labels"] > 1:
                        inputs["labels"] = inputs["labels"].unsqueeze(
                            1).repeat(1, problem_type["num_labels"])

                    inputs["labels"] = inputs["labels"].to(
                        problem_type["dtype"])

                    # This tests that we do not trigger the warning form PyTorch "Using a target size that is different
                    # to the input size. This will likely lead to incorrect results due to broadcasting. Please ensure
                    # they have the same size." which is a symptom something in wrong for the regression problem.
                    # See https://github.com/huggingface/transformers/issues/11780
                    with warnings.catch_warnings(record=True) as warning_list:
                        loss = model(**inputs).loss
                    for w in warning_list:
                        if "Using a target size that is different to the input size" in str(
                                w.message):
                            raise ValueError(
                                f"Something is going wrong in the regression problem: intercepted {w.message}"
                            )

                    loss.backward()

    @require_torch_multi_gpu
    @unittest.skip(
        reason=
        "Perceiver does not work with data parallel (DP) because of a bug in PyTorch: https://github.com/pytorch/pytorch/issues/36035"
    )
    def test_multi_gpu_data_parallel_forward(self):
        pass

    @unittest.skip(
        reason=
        "Perceiver models don't have a typical head like is the case with BERT"
    )
    def test_save_load_fast_init_from_base(self):
        pass

    @unittest.skip(
        reason=
        "Perceiver models don't have a typical head like is the case with BERT"
    )
    def test_save_load_fast_init_to_base(self):
        pass

    @unittest.skip(reason="Perceiver doesn't support resize_token_embeddings")
    def test_resize_tokens_embeddings(self):
        pass

    @unittest.skip(reason="Perceiver doesn't support resize_token_embeddings")
    def test_resize_embeddings_untied(self):
        pass

    @unittest.skip(reason="Perceiver doesn't support inputs_embeds")
    def test_inputs_embeds(self):
        pass

    @unittest.skip(reason="Perceiver doesn't support the AutoModel API")
    def test_load_with_mismatched_shapes(self):
        pass

    @slow
    def test_model_from_pretrained(self):
        for model_name in PERCEIVER_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
            model = PerceiverModel.from_pretrained(model_name)
            self.assertIsNotNone(model)
예제 #28
0
class SwinModelTest(ModelTesterMixin, unittest.TestCase):

    all_model_classes = ((
        SwinModel,
        SwinForImageClassification,
        SwinForMaskedImageModeling,
    ) if is_torch_available() else ())
    fx_compatible = True

    test_pruning = False
    test_resize_embeddings = False
    test_head_masking = False

    def setUp(self):
        self.model_tester = SwinModelTester(self)
        self.config_tester = ConfigTester(self,
                                          config_class=SwinConfig,
                                          embed_dim=37)

    def test_config(self):
        self.create_and_test_config_common_properties()
        self.config_tester.create_and_test_config_to_json_string()
        self.config_tester.create_and_test_config_to_json_file()
        self.config_tester.create_and_test_config_from_and_save_pretrained()
        self.config_tester.create_and_test_config_with_num_labels()
        self.config_tester.check_config_can_be_init_without_params()
        self.config_tester.check_config_arguments_init()

    def create_and_test_config_common_properties(self):
        return

    def test_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_model(*config_and_inputs)

    def test_inputs_embeds(self):
        # Swin does not use inputs_embeds
        pass

    def test_model_common_attributes(self):
        config, _ = self.model_tester.prepare_config_and_inputs_for_common()

        for model_class in self.all_model_classes:
            model = model_class(config)
            self.assertIsInstance(model.get_input_embeddings(), (nn.Module))
            x = model.get_output_embeddings()
            self.assertTrue(x is None or isinstance(x, nn.Linear))

    def test_forward_signature(self):
        config, _ = self.model_tester.prepare_config_and_inputs_for_common()

        for model_class in self.all_model_classes:
            model = model_class(config)
            signature = inspect.signature(model.forward)
            # signature.parameters is an OrderedDict => so arg_names order is deterministic
            arg_names = [*signature.parameters.keys()]

            expected_arg_names = ["pixel_values"]
            self.assertListEqual(arg_names[:1], expected_arg_names)

    def test_attention_outputs(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )
        config.return_dict = True

        for model_class in self.all_model_classes:
            inputs_dict["output_attentions"] = True
            inputs_dict["output_hidden_states"] = False
            config.return_dict = True
            model = model_class(config)
            model.to(torch_device)
            model.eval()
            with torch.no_grad():
                outputs = model(
                    **self._prepare_for_class(inputs_dict, model_class))
            attentions = outputs.attentions
            expected_num_attentions = len(self.model_tester.depths)
            self.assertEqual(len(attentions), expected_num_attentions)

            # check that output_attentions also work using config
            del inputs_dict["output_attentions"]
            config.output_attentions = True
            window_size_squared = config.window_size**2
            model = model_class(config)
            model.to(torch_device)
            model.eval()
            with torch.no_grad():
                outputs = model(
                    **self._prepare_for_class(inputs_dict, model_class))
            attentions = outputs.attentions
            self.assertEqual(len(attentions), expected_num_attentions)

            self.assertListEqual(
                list(attentions[0].shape[-3:]),
                [
                    self.model_tester.num_heads[0], window_size_squared,
                    window_size_squared
                ],
            )
            out_len = len(outputs)

            # Check attention is always last and order is fine
            inputs_dict["output_attentions"] = True
            inputs_dict["output_hidden_states"] = True
            model = model_class(config)
            model.to(torch_device)
            model.eval()
            with torch.no_grad():
                outputs = model(
                    **self._prepare_for_class(inputs_dict, model_class))

            if hasattr(self.model_tester, "num_hidden_states_types"):
                added_hidden_states = self.model_tester.num_hidden_states_types
            else:
                # also another +1 for reshaped_hidden_states
                added_hidden_states = 2
            self.assertEqual(out_len + added_hidden_states, len(outputs))

            self_attentions = outputs.attentions

            self.assertEqual(len(self_attentions), expected_num_attentions)

            self.assertListEqual(
                list(self_attentions[0].shape[-3:]),
                [
                    self.model_tester.num_heads[0], window_size_squared,
                    window_size_squared
                ],
            )

    def check_hidden_states_output(self, inputs_dict, config, model_class,
                                   image_size):
        model = model_class(config)
        model.to(torch_device)
        model.eval()

        with torch.no_grad():
            outputs = model(
                **self._prepare_for_class(inputs_dict, model_class))

        hidden_states = outputs.hidden_states

        expected_num_layers = getattr(self.model_tester,
                                      "expected_num_hidden_layers",
                                      len(self.model_tester.depths) + 1)
        self.assertEqual(len(hidden_states), expected_num_layers)

        # Swin has a different seq_length
        patch_size = to_2tuple(config.patch_size)

        num_patches = (image_size[1] // patch_size[1]) * (image_size[0] //
                                                          patch_size[0])

        self.assertListEqual(
            list(hidden_states[0].shape[-2:]),
            [num_patches, self.model_tester.embed_dim],
        )

        reshaped_hidden_states = outputs.reshaped_hidden_states
        self.assertEqual(len(reshaped_hidden_states), expected_num_layers)

        batch_size, num_channels, height, width = reshaped_hidden_states[
            0].shape
        reshaped_hidden_states = (reshaped_hidden_states[0].view(
            batch_size, num_channels, height * width).permute(0, 2, 1))
        self.assertListEqual(
            list(reshaped_hidden_states.shape[-2:]),
            [num_patches, self.model_tester.embed_dim],
        )

    def test_hidden_states_output(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )

        image_size = to_2tuple(self.model_tester.image_size)

        for model_class in self.all_model_classes:
            inputs_dict["output_hidden_states"] = True
            self.check_hidden_states_output(inputs_dict, config, model_class,
                                            image_size)

            # check that output_hidden_states also work using config
            del inputs_dict["output_hidden_states"]
            config.output_hidden_states = True

            self.check_hidden_states_output(inputs_dict, config, model_class,
                                            image_size)

    def test_hidden_states_output_with_padding(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )
        config.patch_size = 3

        image_size = to_2tuple(self.model_tester.image_size)
        patch_size = to_2tuple(config.patch_size)

        padded_height = image_size[0] + patch_size[0] - (image_size[0] %
                                                         patch_size[0])
        padded_width = image_size[1] + patch_size[1] - (image_size[1] %
                                                        patch_size[1])

        for model_class in self.all_model_classes:
            inputs_dict["output_hidden_states"] = True
            self.check_hidden_states_output(inputs_dict, config, model_class,
                                            (padded_height, padded_width))

            # check that output_hidden_states also work using config
            del inputs_dict["output_hidden_states"]
            config.output_hidden_states = True
            self.check_hidden_states_output(inputs_dict, config, model_class,
                                            (padded_height, padded_width))

    def test_for_image_classification(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_image_classification(
            *config_and_inputs)

    @slow
    def test_model_from_pretrained(self):
        for model_name in SWIN_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
            model = SwinModel.from_pretrained(model_name)
            self.assertIsNotNone(model)

    def test_initialization(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )

        configs_no_init = _config_zero_init(config)
        for model_class in self.all_model_classes:
            model = model_class(config=configs_no_init)
            for name, param in model.named_parameters():
                if "embeddings" not in name and param.requires_grad:
                    self.assertIn(
                        ((param.data.mean() * 1e9).round() / 1e9).item(),
                        [0.0, 1.0],
                        msg=
                        f"Parameter {name} of model {model_class} seems not properly initialized",
                    )
class ViTMAEModelTest(ModelTesterMixin, unittest.TestCase):
    """
    Here we also overwrite some of the tests of test_modeling_common.py, as ViTMAE does not use input_ids, inputs_embeds,
    attention_mask and seq_length.
    """

    all_model_classes = (ViTMAEModel,
                         ViTMAEForPreTraining) if is_torch_available() else ()

    test_pruning = False
    test_torchscript = False
    test_resize_embeddings = False
    test_head_masking = False

    def setUp(self):
        self.model_tester = ViTMAEModelTester(self)
        self.config_tester = ConfigTester(self,
                                          config_class=ViTMAEConfig,
                                          has_text_modality=False,
                                          hidden_size=37)

    def test_config(self):
        self.config_tester.run_common_tests()

    @unittest.skip(reason="ViTMAE does not use inputs_embeds")
    def test_inputs_embeds(self):
        pass

    def test_model_common_attributes(self):
        config, _ = self.model_tester.prepare_config_and_inputs_for_common()

        for model_class in self.all_model_classes:
            model = model_class(config)
            self.assertIsInstance(model.get_input_embeddings(), (nn.Module))
            x = model.get_output_embeddings()
            self.assertTrue(x is None or isinstance(x, nn.Linear))

    def test_forward_signature(self):
        config, _ = self.model_tester.prepare_config_and_inputs_for_common()

        for model_class in self.all_model_classes:
            model = model_class(config)
            signature = inspect.signature(model.forward)
            # signature.parameters is an OrderedDict => so arg_names order is deterministic
            arg_names = [*signature.parameters.keys()]

            expected_arg_names = ["pixel_values"]
            self.assertListEqual(arg_names[:1], expected_arg_names)

    def test_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_model(*config_and_inputs)

    def test_for_pretraining(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_pretraining(*config_and_inputs)

    # overwrite from common since ViTMAEForPretraining has random masking, we need to fix the noise
    # to generate masks during test
    def check_pt_tf_models(self, tf_model, pt_model, pt_inputs_dict):

        # make masks reproducible
        np.random.seed(2)

        num_patches = int(
            (pt_model.config.image_size // pt_model.config.patch_size)**2)
        noise = np.random.uniform(size=(self.model_tester.batch_size,
                                        num_patches))
        pt_noise = torch.from_numpy(noise)

        # Add `noise` argument.
        # PT inputs will be prepared in `super().check_pt_tf_models()` with this added `noise` argument
        pt_inputs_dict["noise"] = pt_noise

        super().check_pt_tf_models(tf_model, pt_model, pt_inputs_dict)

    def test_save_load(self):

        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )

        for model_class in self.all_model_classes:
            model = model_class(config)
            model.to(torch_device)
            model.eval()
            # make random mask reproducible
            torch.manual_seed(2)
            with torch.no_grad():
                outputs = model(
                    **self._prepare_for_class(inputs_dict, model_class))

            out_2 = outputs[0].cpu().numpy()
            out_2[np.isnan(out_2)] = 0

            with tempfile.TemporaryDirectory() as tmpdirname:
                model.save_pretrained(tmpdirname)
                model = model_class.from_pretrained(tmpdirname)
                model.to(torch_device)
                # make random mask reproducible
                torch.manual_seed(2)
                with torch.no_grad():
                    after_outputs = model(
                        **self._prepare_for_class(inputs_dict, model_class))

                # Make sure we don't have nans
                out_1 = after_outputs[0].cpu().numpy()
                out_1[np.isnan(out_1)] = 0
                max_diff = np.amax(np.abs(out_1 - out_2))
                self.assertLessEqual(max_diff, 1e-5)

    @unittest.skip(
        reason=
        """ViTMAE returns a random mask + ids_restore in each forward pass. See test_save_load
    to get deterministic results.""")
    def test_determinism(self):
        pass

    @unittest.skip(
        reason=
        """ViTMAE returns a random mask + ids_restore in each forward pass. See test_save_load
    to get deterministic results.""")
    def test_save_load_fast_init_from_base(self):
        pass

    @unittest.skip(
        reason=
        """ViTMAE returns a random mask + ids_restore in each forward pass. See test_save_load
    to get deterministic results.""")
    def test_save_load_fast_init_to_base(self):
        pass

    @unittest.skip(
        reason=
        """ViTMAE returns a random mask + ids_restore in each forward pass. See test_save_load"""
    )
    def test_model_outputs_equivalence(self):
        pass

    @slow
    def test_model_from_pretrained(self):
        for model_name in VIT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
            model = ViTMAEModel.from_pretrained(model_name)
            self.assertIsNotNone(model)
class BeitModelTest(ModelTesterMixin, unittest.TestCase):
    """
    Here we also overwrite some of the tests of test_modeling_common.py, as BEiT does not use input_ids, inputs_embeds,
    attention_mask and seq_length.
    """

    all_model_classes = ((BeitModel, BeitForImageClassification,
                          BeitForMaskedImageModeling,
                          BeitForSemanticSegmentation)
                         if is_torch_available() else ())

    test_pruning = False
    test_resize_embeddings = False
    test_head_masking = False

    def setUp(self):
        self.model_tester = BeitModelTester(self)
        self.config_tester = ConfigTester(self,
                                          config_class=BeitConfig,
                                          has_text_modality=False,
                                          hidden_size=37)

    def test_config(self):
        self.config_tester.run_common_tests()

    @unittest.skip(reason="BEiT does not use inputs_embeds")
    def test_inputs_embeds(self):
        pass

    def test_model_common_attributes(self):
        config, _ = self.model_tester.prepare_config_and_inputs_for_common()

        for model_class in self.all_model_classes:
            model = model_class(config)
            self.assertIsInstance(model.get_input_embeddings(), (nn.Module))
            x = model.get_output_embeddings()
            self.assertTrue(x is None or isinstance(x, nn.Linear))

    def test_forward_signature(self):
        config, _ = self.model_tester.prepare_config_and_inputs_for_common()

        for model_class in self.all_model_classes:
            model = model_class(config)
            signature = inspect.signature(model.forward)
            # signature.parameters is an OrderedDict => so arg_names order is deterministic
            arg_names = [*signature.parameters.keys()]

            expected_arg_names = ["pixel_values"]
            self.assertListEqual(arg_names[:1], expected_arg_names)

    def test_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_model(*config_and_inputs)

    def test_for_masked_lm(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_masked_lm(*config_and_inputs)

    def test_for_image_classification(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_image_classification(
            *config_and_inputs)

    def test_for_semantic_segmentation(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_semantic_segmentation(
            *config_and_inputs)

    def test_training(self):
        if not self.model_tester.is_training:
            return

        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )
        config.return_dict = True

        for model_class in self.all_model_classes:
            # we don't test BeitForMaskedImageModeling
            if model_class in [
                    *get_values(MODEL_MAPPING), BeitForMaskedImageModeling
            ]:
                continue

            model = model_class(config)
            model.to(torch_device)
            model.train()
            inputs = self._prepare_for_class(inputs_dict,
                                             model_class,
                                             return_labels=True)
            loss = model(**inputs).loss
            loss.backward()

    def test_training_gradient_checkpointing(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )
        if not self.model_tester.is_training:
            return

        config.use_cache = False
        config.return_dict = True

        for model_class in self.all_model_classes:
            # we don't test BeitForMaskedImageModeling
            if (model_class in [
                    *get_values(MODEL_MAPPING), BeitForMaskedImageModeling
            ] or not model_class.supports_gradient_checkpointing):
                continue

            model = model_class(config)
            model.gradient_checkpointing_enable()
            model.to(torch_device)
            model.train()
            inputs = self._prepare_for_class(inputs_dict,
                                             model_class,
                                             return_labels=True)
            loss = model(**inputs).loss
            loss.backward()

    def test_initialization(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )

        configs_no_init = _config_zero_init(config)
        for model_class in self.all_model_classes:
            model = model_class(config=configs_no_init)
            for name, param in model.named_parameters():
                # we skip lambda parameters as these require special initial values
                # determined by config.layer_scale_init_value
                if "lambda" in name:
                    continue
                if param.requires_grad:
                    self.assertIn(
                        ((param.data.mean() * 1e9).round() / 1e9).item(),
                        [0.0, 1.0],
                        msg=
                        f"Parameter {name} of model {model_class} seems not properly initialized",
                    )

    @slow
    def test_model_from_pretrained(self):
        for model_name in BEIT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
            model = BeitModel.from_pretrained(model_name)
            self.assertIsNotNone(model)