예제 #1
0
    def test_training(self):
        if not self.model_tester.is_training:
            return

        for model_class in self.all_model_classes:
            if model_class in [
                    *get_values(MODEL_MAPPING),
                    PerceiverForOpticalFlow,
                    PerceiverForMultimodalAutoencoding,
            ]:
                continue

            config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_model_class(
                model_class)
            config.return_dict = True

            model = model_class(config)
            model.to(torch_device)
            model.train()
            inputs = self._prepare_for_class(inputs_dict,
                                             model_class,
                                             return_labels=True)
            loss = model(**inputs).loss
            loss.backward()
    def test_training(self):
        if not self.model_tester.is_training:
            return

        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
        config.return_dict = True

        for model_class in self.all_model_classes:
            # we don't test BeitForMaskedImageModeling
            if model_class in [*get_values(MODEL_MAPPING), BeitForMaskedImageModeling]:
                continue
            # TODO: remove the following 3 lines once we have a MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING
            # this can then be incorporated into _prepare_for_class in test_modeling_common.py
            elif model_class.__name__ == "BeitForSemanticSegmentation":
                batch_size, num_channels, height, width = inputs_dict["pixel_values"].shape
                inputs_dict["labels"] = torch.zeros(
                    [self.model_tester.batch_size, height, width], device=torch_device
                ).long()
            model = model_class(config)
            model.to(torch_device)
            model.train()
            inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True)
            loss = model(**inputs).loss
            loss.backward()
예제 #3
0
    def test_problem_types(self):
        problem_types = [
            {
                "title": "multi_label_classification",
                "num_labels": 2,
                "dtype": torch.float
            },
            {
                "title": "single_label_classification",
                "num_labels": 1,
                "dtype": torch.long
            },
            {
                "title": "regression",
                "num_labels": 1,
                "dtype": torch.float
            },
        ]

        for model_class in self.all_model_classes:
            if model_class not in get_values(
                    MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING):
                continue

            config, inputs, input_mask, _, _ = self.model_tester.prepare_config_and_inputs(
                model_class=model_class)
            inputs_dict = dict(inputs=inputs, attention_mask=input_mask)

            for problem_type in problem_types:
                with self.subTest(
                        msg=
                        f"Testing {model_class} with {problem_type['title']}"):

                    config.problem_type = problem_type["title"]
                    config.num_labels = problem_type["num_labels"]

                    model = model_class(config)
                    model.to(torch_device)
                    model.train()

                    inputs = self._prepare_for_class(inputs_dict,
                                                     model_class,
                                                     return_labels=True)

                    if problem_type["num_labels"] > 1:
                        inputs["labels"] = inputs["labels"].unsqueeze(
                            1).repeat(1, problem_type["num_labels"])

                    inputs["labels"] = inputs["labels"].to(
                        problem_type["dtype"])

                    # This tests that we do not trigger the warning form PyTorch "Using a target size that is different
                    # to the input size. This will likely lead to incorrect results due to broadcasting. Please ensure
                    # they have the same size." which is a symptom something in wrong for the regression problem.
                    # See https://github.com/huggingface/transformers/issues/11780
                    with warnings.catch_warnings(record=True) as warning_list:
                        loss = model(**inputs).loss
                    for w in warning_list:
                        if "Using a target size that is different to the input size" in str(
                                w.message):
                            raise ValueError(
                                f"Something is going wrong in the regression problem: intercepted {w.message}"
                            )

                    loss.backward()
예제 #4
0
    def _prepare_for_class(self,
                           inputs_dict,
                           model_class,
                           return_labels=False):
        inputs_dict = copy.deepcopy(inputs_dict)
        if model_class in get_values(MODEL_FOR_MULTIPLE_CHOICE_MAPPING):
            inputs_dict = {
                k: v.unsqueeze(1).expand(-1, self.model_tester.num_choices,
                                         -1).contiguous()
                if isinstance(v, torch.Tensor) and v.ndim > 1 else v
                for k, v in inputs_dict.items()
            }

        if return_labels:
            if model_class in get_values(MODEL_FOR_MULTIPLE_CHOICE_MAPPING):
                inputs_dict["labels"] = torch.ones(
                    self.model_tester.batch_size,
                    dtype=torch.long,
                    device=torch_device)
            elif model_class in get_values(
                    MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING):
                inputs_dict["labels"] = torch.zeros(
                    (self.model_tester.batch_size,
                     self.model_tester.seq_length),
                    dtype=torch.long,
                    device=torch_device)
                inputs_dict["aggregation_labels"] = torch.zeros(
                    self.model_tester.batch_size,
                    dtype=torch.long,
                    device=torch_device)
                inputs_dict["numeric_values"] = torch.zeros(
                    (self.model_tester.batch_size,
                     self.model_tester.seq_length),
                    dtype=torch.float,
                    device=torch_device,
                )
                inputs_dict["numeric_values_scale"] = torch.zeros(
                    (self.model_tester.batch_size,
                     self.model_tester.seq_length),
                    dtype=torch.float,
                    device=torch_device,
                )
                inputs_dict["float_answer"] = torch.zeros(
                    self.model_tester.batch_size,
                    dtype=torch.float,
                    device=torch_device)
            elif model_class in [
                    *get_values(MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING),
                    *get_values(MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPING),
            ]:
                inputs_dict["labels"] = torch.zeros(
                    self.model_tester.batch_size,
                    dtype=torch.long,
                    device=torch_device)
            elif model_class in [
                    *get_values(MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING),
                    *get_values(MODEL_FOR_CAUSAL_LM_MAPPING),
                    *get_values(MODEL_FOR_MASKED_LM_MAPPING),
                    *get_values(MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING),
            ]:
                inputs_dict["labels"] = torch.zeros(
                    (self.model_tester.batch_size,
                     self.model_tester.seq_length),
                    dtype=torch.long,
                    device=torch_device)
        return inputs_dict
예제 #5
0
    def test_attention_outputs(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
        config.return_dict = True

        seq_length = getattr(self.model_tester, "seq_length", None)
        decoder_seq_length = getattr(self.model_tester, "decoder_seq_length", seq_length)
        encoder_seq_length = getattr(self.model_tester, "encoder_seq_length", seq_length)
        decoder_key_length = getattr(self.model_tester, "decoder_key_length", decoder_seq_length)
        encoder_key_length = getattr(self.model_tester, "key_length", encoder_seq_length)

        for model_class in self.all_model_classes:
            inputs_dict["output_attentions"] = True
            inputs_dict["output_hidden_states"] = False
            model = model_class(config)
            outputs = model(**self._prepare_for_class(inputs_dict, model_class))
            attentions = outputs.encoder_attentions if config.is_encoder_decoder else outputs.attentions
            self.assertEqual(len(attentions), self.model_tester.num_hidden_layers)

            # check that output_attentions also work using config
            del inputs_dict["output_attentions"]
            config.output_attentions = True
            model = model_class(config)
            outputs = model(**self._prepare_for_class(inputs_dict, model_class))
            attentions = outputs.encoder_attentions if config.is_encoder_decoder else outputs.attentions
            self.assertEqual(len(attentions), self.model_tester.num_hidden_layers)

            self.assertListEqual(
                list(attentions[0].shape[-3:]),
                [self.model_tester.num_attention_heads, encoder_seq_length, encoder_key_length],
            )
            out_len = len(outputs)

            if self.is_encoder_decoder:
                correct_outlen = 5

                # Question Answering model returns start_logits and end_logits
                if model_class in get_values(FLAX_MODEL_FOR_QUESTION_ANSWERING_MAPPING):
                    correct_outlen += 1  # start_logits and end_logits instead of only 1 output

                self.assertEqual(out_len, correct_outlen)

                # decoder attentions
                decoder_attentions = outputs.decoder_attentions
                self.assertIsInstance(decoder_attentions, (list, tuple))
                self.assertEqual(len(decoder_attentions), self.model_tester.num_hidden_layers)
                self.assertListEqual(
                    list(decoder_attentions[0].shape[-3:]),
                    [self.model_tester.num_attention_heads, decoder_seq_length, decoder_key_length],
                )

                # cross attentions
                cross_attentions = outputs.cross_attentions
                self.assertIsInstance(cross_attentions, (list, tuple))
                self.assertEqual(len(cross_attentions), self.model_tester.num_hidden_layers)
                self.assertListEqual(
                    list(cross_attentions[0].shape[-3:]),
                    [
                        self.model_tester.num_attention_heads,
                        decoder_seq_length,
                        encoder_key_length,
                    ],
                )

            # Check attention is always last and order is fine
            inputs_dict["output_attentions"] = True
            inputs_dict["output_hidden_states"] = True
            model = model_class(config)
            outputs = model(**self._prepare_for_class(inputs_dict, model_class))

            if hasattr(self.model_tester, "num_hidden_states_types"):
                added_hidden_states = self.model_tester.num_hidden_states_types
            elif self.is_encoder_decoder:
                added_hidden_states = 2
            else:
                added_hidden_states = 1
            self.assertEqual(out_len + added_hidden_states, len(outputs))

            self_attentions = outputs.encoder_attentions if config.is_encoder_decoder else outputs.attentions
            self.assertEqual(len(self_attentions), self.model_tester.num_hidden_layers)

            self.assertListEqual(
                list(self_attentions[0].shape[-3:]),
                [self.model_tester.num_attention_heads, encoder_seq_length, encoder_key_length],
            )
    def test_attention_outputs(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )
        config.return_dict = True

        seq_len = getattr(self.model_tester, "seq_length", None)
        decoder_seq_length = getattr(self.model_tester, "decoder_seq_length",
                                     seq_len)
        encoder_seq_length = getattr(self.model_tester, "encoder_seq_length",
                                     seq_len)
        decoder_key_length = getattr(self.model_tester, "decoder_key_length",
                                     decoder_seq_length)
        encoder_key_length = getattr(self.model_tester, "key_length",
                                     encoder_seq_length)
        chunk_length = getattr(self.model_tester, "chunk_length", None)
        if chunk_length is not None and hasattr(self.model_tester,
                                                "num_hashes"):
            encoder_seq_length = encoder_seq_length * self.model_tester.num_hashes

        for model_class in self.all_model_classes:
            inputs_dict["output_attentions"] = True
            inputs_dict["output_hidden_states"] = False
            config.return_dict = True
            model = model_class(config)
            model.to(torch_device)
            model.eval()
            with torch.no_grad():
                outputs = model(
                    **self._prepare_for_class(inputs_dict, model_class))
            attentions = outputs.encoder_attentions if config.is_encoder_decoder else outputs.attentions
            self.assertEqual(len(attentions),
                             self.model_tester.num_hidden_layers)

            # check that output_attentions also work using config
            del inputs_dict["output_attentions"]
            config.output_attentions = True
            model = model_class(config)
            model.to(torch_device)
            model.eval()
            with torch.no_grad():
                outputs = model(
                    **self._prepare_for_class(inputs_dict, model_class))
            attentions = outputs.encoder_attentions if config.is_encoder_decoder else outputs.attentions
            self.assertEqual(len(attentions),
                             self.model_tester.num_hidden_layers)

            if chunk_length is not None:
                self.assertListEqual(
                    list(attentions[0].shape[-4:]),
                    [
                        self.model_tester.num_attention_heads / 2,
                        encoder_seq_length, chunk_length, encoder_key_length
                    ],
                )
            else:
                self.assertListEqual(
                    list(attentions[0].shape[-3:]),
                    [
                        self.model_tester.num_attention_heads / 2,
                        encoder_seq_length, encoder_key_length
                    ],
                )
            out_len = len(outputs)

            if self.is_encoder_decoder:
                correct_outlen = 5

                # loss is at first position
                if "labels" in inputs_dict:
                    correct_outlen += 1  # loss is added to beginning
                # Question Answering model returns start_logits and end_logits
                if model_class in get_values(
                        MODEL_FOR_QUESTION_ANSWERING_MAPPING):
                    correct_outlen += 1  # start_logits and end_logits instead of only 1 output
                if "past_key_values" in outputs:
                    correct_outlen += 1  # past_key_values have been returned

                self.assertEqual(out_len, correct_outlen)

                # decoder attentions
                decoder_attentions = outputs.decoder_attentions
                self.assertIsInstance(decoder_attentions, (list, tuple))
                self.assertEqual(len(decoder_attentions),
                                 self.model_tester.num_hidden_layers)
                self.assertListEqual(
                    list(decoder_attentions[0].shape[-3:]),
                    [
                        self.model_tester.num_attention_heads,
                        decoder_seq_length, decoder_key_length
                    ],
                )

                # cross attentions
                cross_attentions = outputs.cross_attentions
                self.assertIsInstance(cross_attentions, (list, tuple))
                self.assertEqual(len(cross_attentions),
                                 self.model_tester.num_hidden_layers)
                self.assertListEqual(
                    list(cross_attentions[0].shape[-3:]),
                    [
                        self.model_tester.num_attention_heads,
                        decoder_seq_length,
                        encoder_key_length,
                    ],
                )

            # Check attention is always last and order is fine
            inputs_dict["output_attentions"] = True
            inputs_dict["output_hidden_states"] = True
            model = model_class(config)
            model.to(torch_device)
            model.eval()
            with torch.no_grad():
                outputs = model(
                    **self._prepare_for_class(inputs_dict, model_class))

            if hasattr(self.model_tester, "num_hidden_states_types"):
                added_hidden_states = self.model_tester.num_hidden_states_types
            elif self.is_encoder_decoder:
                added_hidden_states = 2
            else:
                added_hidden_states = 1
            self.assertEqual(out_len + added_hidden_states, len(outputs))

            self_attentions = outputs.encoder_attentions if config.is_encoder_decoder else outputs.attentions

            self.assertEqual(len(self_attentions),
                             self.model_tester.num_hidden_layers)
            if chunk_length is not None:
                self.assertListEqual(
                    list(self_attentions[0].shape[-4:]),
                    [
                        self.model_tester.num_attention_heads / 2,
                        encoder_seq_length, chunk_length, encoder_key_length
                    ],
                )
            else:
                self.assertListEqual(
                    list(self_attentions[0].shape[-3:]),
                    [
                        self.model_tester.num_attention_heads / 2,
                        encoder_seq_length, encoder_key_length
                    ],
                )
    def _prepare_for_class(self,
                           inputs_dict,
                           model_class,
                           return_labels=False) -> dict:
        inputs_dict = copy.deepcopy(inputs_dict)

        if model_class in get_values(TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING):
            inputs_dict = {
                k: tf.tile(tf.expand_dims(v, 1),
                           (1, self.model_tester.num_choices) + (1, ) *
                           (v.ndim - 1))
                if isinstance(v, tf.Tensor) and v.ndim > 0 else v
                for k, v in inputs_dict.items()
            }

        if return_labels:
            if model_class in get_values(TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING):
                inputs_dict["labels"] = tf.ones(self.model_tester.batch_size,
                                                dtype=tf.int32)
            elif model_class in get_values(
                    TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING):
                inputs_dict["start_positions"] = tf.zeros(
                    self.model_tester.batch_size, dtype=tf.int32)
                inputs_dict["end_positions"] = tf.zeros(
                    self.model_tester.batch_size, dtype=tf.int32)
            elif model_class in [
                    *get_values(TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING),
                    *get_values(TF_MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING),
            ]:
                inputs_dict["labels"] = tf.zeros(self.model_tester.batch_size,
                                                 dtype=tf.int32)
            elif model_class in get_values(
                    TF_MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPING):
                inputs_dict["next_sentence_label"] = tf.zeros(
                    self.model_tester.batch_size, dtype=tf.int32)
            elif model_class in [
                    *get_values(TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING),
                    *get_values(TF_MODEL_FOR_CAUSAL_LM_MAPPING),
                    *get_values(TF_MODEL_FOR_MASKED_LM_MAPPING),
                    *get_values(TF_MODEL_FOR_PRETRAINING_MAPPING),
                    *get_values(TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING),
            ]:
                inputs_dict["labels"] = tf.zeros(
                    (self.model_tester.batch_size,
                     self.model_tester.seq_length),
                    dtype=tf.int32)
        return inputs_dict