def test_training(self): if not self.model_tester.is_training: return for model_class in self.all_model_classes: if model_class in [ *get_values(MODEL_MAPPING), PerceiverForOpticalFlow, PerceiverForMultimodalAutoencoding, ]: continue config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_model_class( model_class) config.return_dict = True model = model_class(config) model.to(torch_device) model.train() inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True) loss = model(**inputs).loss loss.backward()
def test_training(self): if not self.model_tester.is_training: return config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() config.return_dict = True for model_class in self.all_model_classes: # we don't test BeitForMaskedImageModeling if model_class in [*get_values(MODEL_MAPPING), BeitForMaskedImageModeling]: continue # TODO: remove the following 3 lines once we have a MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING # this can then be incorporated into _prepare_for_class in test_modeling_common.py elif model_class.__name__ == "BeitForSemanticSegmentation": batch_size, num_channels, height, width = inputs_dict["pixel_values"].shape inputs_dict["labels"] = torch.zeros( [self.model_tester.batch_size, height, width], device=torch_device ).long() model = model_class(config) model.to(torch_device) model.train() inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True) loss = model(**inputs).loss loss.backward()
def test_problem_types(self): problem_types = [ { "title": "multi_label_classification", "num_labels": 2, "dtype": torch.float }, { "title": "single_label_classification", "num_labels": 1, "dtype": torch.long }, { "title": "regression", "num_labels": 1, "dtype": torch.float }, ] for model_class in self.all_model_classes: if model_class not in get_values( MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING): continue config, inputs, input_mask, _, _ = self.model_tester.prepare_config_and_inputs( model_class=model_class) inputs_dict = dict(inputs=inputs, attention_mask=input_mask) for problem_type in problem_types: with self.subTest( msg= f"Testing {model_class} with {problem_type['title']}"): config.problem_type = problem_type["title"] config.num_labels = problem_type["num_labels"] model = model_class(config) model.to(torch_device) model.train() inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True) if problem_type["num_labels"] > 1: inputs["labels"] = inputs["labels"].unsqueeze( 1).repeat(1, problem_type["num_labels"]) inputs["labels"] = inputs["labels"].to( problem_type["dtype"]) # This tests that we do not trigger the warning form PyTorch "Using a target size that is different # to the input size. This will likely lead to incorrect results due to broadcasting. Please ensure # they have the same size." which is a symptom something in wrong for the regression problem. # See https://github.com/huggingface/transformers/issues/11780 with warnings.catch_warnings(record=True) as warning_list: loss = model(**inputs).loss for w in warning_list: if "Using a target size that is different to the input size" in str( w.message): raise ValueError( f"Something is going wrong in the regression problem: intercepted {w.message}" ) loss.backward()
def _prepare_for_class(self, inputs_dict, model_class, return_labels=False): inputs_dict = copy.deepcopy(inputs_dict) if model_class in get_values(MODEL_FOR_MULTIPLE_CHOICE_MAPPING): inputs_dict = { k: v.unsqueeze(1).expand(-1, self.model_tester.num_choices, -1).contiguous() if isinstance(v, torch.Tensor) and v.ndim > 1 else v for k, v in inputs_dict.items() } if return_labels: if model_class in get_values(MODEL_FOR_MULTIPLE_CHOICE_MAPPING): inputs_dict["labels"] = torch.ones( self.model_tester.batch_size, dtype=torch.long, device=torch_device) elif model_class in get_values( MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING): inputs_dict["labels"] = torch.zeros( (self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device) inputs_dict["aggregation_labels"] = torch.zeros( self.model_tester.batch_size, dtype=torch.long, device=torch_device) inputs_dict["numeric_values"] = torch.zeros( (self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.float, device=torch_device, ) inputs_dict["numeric_values_scale"] = torch.zeros( (self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.float, device=torch_device, ) inputs_dict["float_answer"] = torch.zeros( self.model_tester.batch_size, dtype=torch.float, device=torch_device) elif model_class in [ *get_values(MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING), *get_values(MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPING), ]: inputs_dict["labels"] = torch.zeros( self.model_tester.batch_size, dtype=torch.long, device=torch_device) elif model_class in [ *get_values(MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING), *get_values(MODEL_FOR_CAUSAL_LM_MAPPING), *get_values(MODEL_FOR_MASKED_LM_MAPPING), *get_values(MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING), ]: inputs_dict["labels"] = torch.zeros( (self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device) return inputs_dict
def test_attention_outputs(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() config.return_dict = True seq_length = getattr(self.model_tester, "seq_length", None) decoder_seq_length = getattr(self.model_tester, "decoder_seq_length", seq_length) encoder_seq_length = getattr(self.model_tester, "encoder_seq_length", seq_length) decoder_key_length = getattr(self.model_tester, "decoder_key_length", decoder_seq_length) encoder_key_length = getattr(self.model_tester, "key_length", encoder_seq_length) for model_class in self.all_model_classes: inputs_dict["output_attentions"] = True inputs_dict["output_hidden_states"] = False model = model_class(config) outputs = model(**self._prepare_for_class(inputs_dict, model_class)) attentions = outputs.encoder_attentions if config.is_encoder_decoder else outputs.attentions self.assertEqual(len(attentions), self.model_tester.num_hidden_layers) # check that output_attentions also work using config del inputs_dict["output_attentions"] config.output_attentions = True model = model_class(config) outputs = model(**self._prepare_for_class(inputs_dict, model_class)) attentions = outputs.encoder_attentions if config.is_encoder_decoder else outputs.attentions self.assertEqual(len(attentions), self.model_tester.num_hidden_layers) self.assertListEqual( list(attentions[0].shape[-3:]), [self.model_tester.num_attention_heads, encoder_seq_length, encoder_key_length], ) out_len = len(outputs) if self.is_encoder_decoder: correct_outlen = 5 # Question Answering model returns start_logits and end_logits if model_class in get_values(FLAX_MODEL_FOR_QUESTION_ANSWERING_MAPPING): correct_outlen += 1 # start_logits and end_logits instead of only 1 output self.assertEqual(out_len, correct_outlen) # decoder attentions decoder_attentions = outputs.decoder_attentions self.assertIsInstance(decoder_attentions, (list, tuple)) self.assertEqual(len(decoder_attentions), self.model_tester.num_hidden_layers) self.assertListEqual( list(decoder_attentions[0].shape[-3:]), [self.model_tester.num_attention_heads, decoder_seq_length, decoder_key_length], ) # cross attentions cross_attentions = outputs.cross_attentions self.assertIsInstance(cross_attentions, (list, tuple)) self.assertEqual(len(cross_attentions), self.model_tester.num_hidden_layers) self.assertListEqual( list(cross_attentions[0].shape[-3:]), [ self.model_tester.num_attention_heads, decoder_seq_length, encoder_key_length, ], ) # Check attention is always last and order is fine inputs_dict["output_attentions"] = True inputs_dict["output_hidden_states"] = True model = model_class(config) outputs = model(**self._prepare_for_class(inputs_dict, model_class)) if hasattr(self.model_tester, "num_hidden_states_types"): added_hidden_states = self.model_tester.num_hidden_states_types elif self.is_encoder_decoder: added_hidden_states = 2 else: added_hidden_states = 1 self.assertEqual(out_len + added_hidden_states, len(outputs)) self_attentions = outputs.encoder_attentions if config.is_encoder_decoder else outputs.attentions self.assertEqual(len(self_attentions), self.model_tester.num_hidden_layers) self.assertListEqual( list(self_attentions[0].shape[-3:]), [self.model_tester.num_attention_heads, encoder_seq_length, encoder_key_length], )
def test_attention_outputs(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common( ) config.return_dict = True seq_len = getattr(self.model_tester, "seq_length", None) decoder_seq_length = getattr(self.model_tester, "decoder_seq_length", seq_len) encoder_seq_length = getattr(self.model_tester, "encoder_seq_length", seq_len) decoder_key_length = getattr(self.model_tester, "decoder_key_length", decoder_seq_length) encoder_key_length = getattr(self.model_tester, "key_length", encoder_seq_length) chunk_length = getattr(self.model_tester, "chunk_length", None) if chunk_length is not None and hasattr(self.model_tester, "num_hashes"): encoder_seq_length = encoder_seq_length * self.model_tester.num_hashes for model_class in self.all_model_classes: inputs_dict["output_attentions"] = True inputs_dict["output_hidden_states"] = False config.return_dict = True model = model_class(config) model.to(torch_device) model.eval() with torch.no_grad(): outputs = model( **self._prepare_for_class(inputs_dict, model_class)) attentions = outputs.encoder_attentions if config.is_encoder_decoder else outputs.attentions self.assertEqual(len(attentions), self.model_tester.num_hidden_layers) # check that output_attentions also work using config del inputs_dict["output_attentions"] config.output_attentions = True model = model_class(config) model.to(torch_device) model.eval() with torch.no_grad(): outputs = model( **self._prepare_for_class(inputs_dict, model_class)) attentions = outputs.encoder_attentions if config.is_encoder_decoder else outputs.attentions self.assertEqual(len(attentions), self.model_tester.num_hidden_layers) if chunk_length is not None: self.assertListEqual( list(attentions[0].shape[-4:]), [ self.model_tester.num_attention_heads / 2, encoder_seq_length, chunk_length, encoder_key_length ], ) else: self.assertListEqual( list(attentions[0].shape[-3:]), [ self.model_tester.num_attention_heads / 2, encoder_seq_length, encoder_key_length ], ) out_len = len(outputs) if self.is_encoder_decoder: correct_outlen = 5 # loss is at first position if "labels" in inputs_dict: correct_outlen += 1 # loss is added to beginning # Question Answering model returns start_logits and end_logits if model_class in get_values( MODEL_FOR_QUESTION_ANSWERING_MAPPING): correct_outlen += 1 # start_logits and end_logits instead of only 1 output if "past_key_values" in outputs: correct_outlen += 1 # past_key_values have been returned self.assertEqual(out_len, correct_outlen) # decoder attentions decoder_attentions = outputs.decoder_attentions self.assertIsInstance(decoder_attentions, (list, tuple)) self.assertEqual(len(decoder_attentions), self.model_tester.num_hidden_layers) self.assertListEqual( list(decoder_attentions[0].shape[-3:]), [ self.model_tester.num_attention_heads, decoder_seq_length, decoder_key_length ], ) # cross attentions cross_attentions = outputs.cross_attentions self.assertIsInstance(cross_attentions, (list, tuple)) self.assertEqual(len(cross_attentions), self.model_tester.num_hidden_layers) self.assertListEqual( list(cross_attentions[0].shape[-3:]), [ self.model_tester.num_attention_heads, decoder_seq_length, encoder_key_length, ], ) # Check attention is always last and order is fine inputs_dict["output_attentions"] = True inputs_dict["output_hidden_states"] = True model = model_class(config) model.to(torch_device) model.eval() with torch.no_grad(): outputs = model( **self._prepare_for_class(inputs_dict, model_class)) if hasattr(self.model_tester, "num_hidden_states_types"): added_hidden_states = self.model_tester.num_hidden_states_types elif self.is_encoder_decoder: added_hidden_states = 2 else: added_hidden_states = 1 self.assertEqual(out_len + added_hidden_states, len(outputs)) self_attentions = outputs.encoder_attentions if config.is_encoder_decoder else outputs.attentions self.assertEqual(len(self_attentions), self.model_tester.num_hidden_layers) if chunk_length is not None: self.assertListEqual( list(self_attentions[0].shape[-4:]), [ self.model_tester.num_attention_heads / 2, encoder_seq_length, chunk_length, encoder_key_length ], ) else: self.assertListEqual( list(self_attentions[0].shape[-3:]), [ self.model_tester.num_attention_heads / 2, encoder_seq_length, encoder_key_length ], )
def _prepare_for_class(self, inputs_dict, model_class, return_labels=False) -> dict: inputs_dict = copy.deepcopy(inputs_dict) if model_class in get_values(TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING): inputs_dict = { k: tf.tile(tf.expand_dims(v, 1), (1, self.model_tester.num_choices) + (1, ) * (v.ndim - 1)) if isinstance(v, tf.Tensor) and v.ndim > 0 else v for k, v in inputs_dict.items() } if return_labels: if model_class in get_values(TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING): inputs_dict["labels"] = tf.ones(self.model_tester.batch_size, dtype=tf.int32) elif model_class in get_values( TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING): inputs_dict["start_positions"] = tf.zeros( self.model_tester.batch_size, dtype=tf.int32) inputs_dict["end_positions"] = tf.zeros( self.model_tester.batch_size, dtype=tf.int32) elif model_class in [ *get_values(TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING), *get_values(TF_MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING), ]: inputs_dict["labels"] = tf.zeros(self.model_tester.batch_size, dtype=tf.int32) elif model_class in get_values( TF_MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPING): inputs_dict["next_sentence_label"] = tf.zeros( self.model_tester.batch_size, dtype=tf.int32) elif model_class in [ *get_values(TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING), *get_values(TF_MODEL_FOR_CAUSAL_LM_MAPPING), *get_values(TF_MODEL_FOR_MASKED_LM_MAPPING), *get_values(TF_MODEL_FOR_PRETRAINING_MAPPING), *get_values(TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING), ]: inputs_dict["labels"] = tf.zeros( (self.model_tester.batch_size, self.model_tester.seq_length), dtype=tf.int32) return inputs_dict