def test_training(self):
        if not self.model_tester.is_training:
            return

        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
        config.return_dict = True

        for model_class in self.all_model_classes:
            if model_class in MODEL_MAPPING.values():
                continue
            model = model_class(config)
            model.to(torch_device)
            model.train()
            inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True)
            loss = model(**inputs).loss
            loss.backward()
    def test_training(self):
        if not self.model_tester.is_training:
            return

        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )
        config.return_dict = True

        for model_class in self.all_model_classes:
            # DeiTForImageClassificationWithTeacher supports inference-only
            if (model_class in MODEL_MAPPING.values() or model_class.__name__
                    == "DeiTForImageClassificationWithTeacher"):
                continue
            model = model_class(config)
            model.to(torch_device)
            model.train()
            inputs = self._prepare_for_class(inputs_dict,
                                             model_class,
                                             return_labels=True)
            loss = model(**inputs).loss
            loss.backward()
    def test_training_gradient_checkpointing(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )
        if not self.model_tester.is_training or not hasattr(
                config, "gradient_checkpointing"):
            return

        config.gradient_checkpointing = True
        config.return_dict = True

        for model_class in self.all_model_classes:
            if model_class in MODEL_MAPPING.values(
            ) or model_class in MODEL_WITH_HEADS_MAPPING.values():
                continue
            model = model_class(config)
            model.to(torch_device)
            model.train()
            inputs = self._prepare_for_class(inputs_dict,
                                             model_class,
                                             return_labels=True)
            loss = model(**inputs).loss
            loss.backward()