def _prepare_for_class(self, inputs_dict, model_class, return_labels=False): inputs_dict = copy.deepcopy(inputs_dict) if model_class in MODEL_FOR_MULTIPLE_CHOICE_MAPPING.values(): inputs_dict = { k: v.unsqueeze(1).expand(-1, self.model_tester.num_choices, -1).contiguous() if isinstance(v, torch.Tensor) and v.ndim > 1 else v for k, v in inputs_dict.items() } if return_labels: if model_class in MODEL_FOR_MULTIPLE_CHOICE_MAPPING.values(): inputs_dict["labels"] = torch.ones(self.model_tester.batch_size, dtype=torch.long, device=torch_device) elif model_class in MODEL_FOR_QUESTION_ANSWERING_MAPPING.values(): inputs_dict["start_positions"] = torch.zeros( self.model_tester.batch_size, dtype=torch.long, device=torch_device ) inputs_dict["end_positions"] = torch.zeros( self.model_tester.batch_size, dtype=torch.long, device=torch_device ) elif model_class in MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING.values(): inputs_dict["labels"] = torch.zeros( self.model_tester.batch_size, dtype=torch.long, device=torch_device ) elif model_class in [ *MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING.values(), *MODEL_FOR_CAUSAL_LM_MAPPING.values(), *MODEL_FOR_MASKED_LM_MAPPING.values(), *MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING.values(), ]: inputs_dict["labels"] = torch.zeros( (self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device ) return inputs_dict
def _prepare_for_class(self, inputs_dict, model_class, return_labels=False): inputs_dict = copy.deepcopy(inputs_dict) if return_labels: if model_class in MODEL_FOR_QUESTION_ANSWERING_MAPPING.values(): inputs_dict["labels"] = torch.zeros( self.model_tester.batch_size, dtype=torch.long, device=torch_device ) elif model_class in MODEL_FOR_PRETRAINING_MAPPING.values(): # special case for models like BERT that use multi-loss training for PreTraining inputs_dict["labels"] = torch.zeros( (self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device ) return inputs_dict
) from transformers.data.metrics.squad_metrics import ( compute_predictions_log_probs, compute_predictions_logits, squad_evaluate, ) from transformers.data.processors.squad import SquadResult, SquadV1Processor, SquadV2Processor try: from torch.utils.tensorboard import SummaryWriter except ImportError: from tensorboardX import SummaryWriter logger = logging.getLogger(__name__) MODEL_CONFIG_CLASSES = list(MODEL_FOR_QUESTION_ANSWERING_MAPPING.keys()) MODEL_TYPES = tuple(conf.model_type for conf in MODEL_CONFIG_CLASSES) def set_seed(args): random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) if args.n_gpu > 0: torch.cuda.manual_seed_all(args.seed) def to_list(tensor): return tensor.detach().cpu().tolist()
def test_attention_outputs(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() seq_len = getattr(self.model_tester, "seq_length", None) decoder_seq_length = getattr(self.model_tester, "decoder_seq_length", seq_len) encoder_seq_length = getattr(self.model_tester, "encoder_seq_length", seq_len) decoder_key_length = getattr(self.model_tester, "key_length", decoder_seq_length) encoder_key_length = getattr(self.model_tester, "key_length", encoder_seq_length) chunk_length = getattr(self.model_tester, "chunk_length", None) if chunk_length is not None and hasattr(self.model_tester, "num_hashes"): encoder_seq_length = encoder_seq_length * self.model_tester.num_hashes for model_class in self.all_model_classes: inputs_dict["output_attentions"] = True inputs_dict["output_hidden_states"] = False model = model_class(config) model.to(torch_device) model.eval() with torch.no_grad(): outputs = model(**self._prepare_for_class(inputs_dict, model_class)) attentions = outputs[-1] self.assertEqual(len(attentions), self.model_tester.num_hidden_layers) # check that output_attentions also work using config del inputs_dict["output_attentions"] config.output_attentions = True model = model_class(config) model.to(torch_device) model.eval() with torch.no_grad(): outputs = model(**self._prepare_for_class(inputs_dict, model_class)) attentions = outputs[-1] self.assertEqual(len(attentions), self.model_tester.num_hidden_layers) if chunk_length is not None: self.assertListEqual( list(attentions[0].shape[-4:]), [self.model_tester.num_attention_heads, encoder_seq_length, chunk_length, encoder_key_length], ) else: self.assertListEqual( list(attentions[0].shape[-3:]), [self.model_tester.num_attention_heads, encoder_seq_length, encoder_key_length], ) out_len = len(outputs) if self.is_encoder_decoder: correct_outlen = 4 decoder_attention_idx = 1 # loss is at first position if "labels" in inputs_dict: correct_outlen += 1 # loss is added to beginning decoder_attention_idx += 1 # Question Answering model returns start_logits and end_logits if model_class in MODEL_FOR_QUESTION_ANSWERING_MAPPING.values(): correct_outlen += 1 # start_logits and end_logits instead of only 1 output decoder_attention_idx += 1 self.assertEqual(out_len, correct_outlen) decoder_attentions = outputs[decoder_attention_idx] self.assertIsInstance(decoder_attentions, (list, tuple)) self.assertEqual(len(decoder_attentions), self.model_tester.num_hidden_layers) self.assertListEqual( list(decoder_attentions[0].shape[-3:]), [self.model_tester.num_attention_heads, decoder_seq_length, decoder_key_length], ) # Check attention is always last and order is fine inputs_dict["output_attentions"] = True inputs_dict["output_hidden_states"] = True model = model_class(config) model.to(torch_device) model.eval() with torch.no_grad(): outputs = model(**self._prepare_for_class(inputs_dict, model_class)) self.assertEqual(out_len + (2 if self.is_encoder_decoder else 1), len(outputs)) self_attentions = outputs[-1] self.assertEqual(len(self_attentions), self.model_tester.num_hidden_layers) if chunk_length is not None: self.assertListEqual( list(self_attentions[0].shape[-4:]), [self.model_tester.num_attention_heads, encoder_seq_length, chunk_length, encoder_key_length], ) else: self.assertListEqual( list(self_attentions[0].shape[-3:]), [self.model_tester.num_attention_heads, encoder_seq_length, encoder_key_length], )
def register_bert_model(bert_cls): """ This function wraps a BertModel inherited cls and automatically: 1. Creates an associated BertConfig 2. Creates an associated BertForMaskedLM 3. Creates an associated BertForSequenceClassification 4. Creates an associated BertForQuestionAnswering 5. Registers these classes with Transformers model mappings This last step ensures that the resulting config and models may be used by AutoConfig, AutoModelForMaskedLM, and AutoModelForSequenceClassification. Assumptions are made to auto-name these classes and the corresponding model type. For instance, SparseBertModel will have model_type="sparse_bert" and associated classes like SparseBertConfig. To customize the the inputs to the model's config, include the dataclass `bert_cls.ConfigKWargs`. This is, in fact, required. Upon initialization of the config, the fields of that dataclass will be used to extract extra keyword arguments and assign them as attributes to the config. Example ``` @register_bert_model class SparseBertModel(BertModel): @dataclass class ConfigKWargs: # Keyword arguments to configure sparsity. sparsity: float = 0.9 # Define __init__, ect. ... # Model is ready to auto load. config = AutoConfig.for_model("sparse_bert", sparsity=0.5) model = AutoModelForMaskedLM.from_config(model) config.sparsity >>> 0.5 type(model) >>> SparseBertModelForMaskedLM """ assert bert_cls.__name__.endswith("BertModel") # Get first part of name e.g. StaticSparseBertModel -> StaticSparse name_prefix = bert_cls.__name__.replace("BertModel", "") # Create new bert config and models based off of `bert_cls`. config_cls = create_config_class(bert_cls, name_prefix) masked_lm_cls = create_masked_lm_class(bert_cls, name_prefix) seq_classification_cls = create_sequence_classification_class(bert_cls, name_prefix) question_answering_cls = create_question_answering_class(bert_cls, name_prefix) # Specify the correct config class bert_cls.config_class = config_cls masked_lm_cls.config_class = config_cls seq_classification_cls.config_class = config_cls question_answering_cls.config_class = config_cls # Update Transformers mappings to auto-load these new models. CONFIG_MAPPING.update({ config_cls.model_type: config_cls }) TOKENIZER_MAPPING.update({ config_cls: (BertTokenizer, BertTokenizerFast), }) MODEL_FOR_MASKED_LM_MAPPING.update({ config_cls: masked_lm_cls, }) MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING.update({ config_cls: seq_classification_cls }) MODEL_FOR_QUESTION_ANSWERING_MAPPING.update({ config_cls: question_answering_cls }) # Update the `models` modules so that these classes may be imported. __models_dict__.update({ config_cls.__name__: config_cls, masked_lm_cls.__name__: masked_lm_cls, seq_classification_cls.__name__: seq_classification_cls, question_answering_cls.__name__: question_answering_cls, })
MarkupLMTokenizer, MarkupLMForQuestionAnswering, MarkupLMForTokenClassification, MarkupLMTokenizerFast, ) CONFIG_MAPPING.update( [ ("markuplm", MarkupLMConfig), ] ) MODEL_NAMES_MAPPING.update([("markuplm", "MarkupLM")]) TOKENIZER_MAPPING.update( [ (MarkupLMConfig, (MarkupLMTokenizer, MarkupLMTokenizerFast)), ] ) SLOW_TO_FAST_CONVERTERS.update( {"MarkupLMTokenizer": RobertaConverter} ) MODEL_FOR_QUESTION_ANSWERING_MAPPING.update( [(MarkupLMConfig, MarkupLMForQuestionAnswering)] ) MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING.update( [(MarkupLMConfig, MarkupLMForTokenClassification)] )
from transformers import ( MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING, MODEL_FOR_QUESTION_ANSWERING_MAPPING, BertConfig, DistilBertConfig, ) from .modeling_bert import ( BertForSequenceClassification, BertForQuestionAnswering, ) from .modeling_distilbert import ( DistilBertForSequenceClassification, DistilBertForQuestionAnswering, ) MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING.update([ (BertConfig, BertForSequenceClassification), (DistilBertConfig, DistilBertForSequenceClassification), ]) MODEL_FOR_QUESTION_ANSWERING_MAPPING.update([ (BertConfig, BertForQuestionAnswering), (DistilBertConfig, DistilBertForQuestionAnswering), ]) from .training_args import TrainingArguments from .drop_and_restore_utils import LengthDropArguments, SearchArguments from .trainer import LengthDropTrainer