def _prepare_for_class(self, inputs_dict, model_class, return_labels=False): inputs_dict = copy.deepcopy(inputs_dict) if model_class in MODEL_FOR_MULTIPLE_CHOICE_MAPPING.values(): inputs_dict = { k: v.unsqueeze(1).expand(-1, self.model_tester.num_choices, -1).contiguous() if isinstance(v, torch.Tensor) and v.ndim > 1 else v for k, v in inputs_dict.items() } if return_labels: if model_class in MODEL_FOR_MULTIPLE_CHOICE_MAPPING.values(): inputs_dict["labels"] = torch.ones(self.model_tester.batch_size, dtype=torch.long, device=torch_device) elif model_class in MODEL_FOR_QUESTION_ANSWERING_MAPPING.values(): inputs_dict["start_positions"] = torch.zeros( self.model_tester.batch_size, dtype=torch.long, device=torch_device ) inputs_dict["end_positions"] = torch.zeros( self.model_tester.batch_size, dtype=torch.long, device=torch_device ) elif model_class in MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING.values(): inputs_dict["labels"] = torch.zeros( self.model_tester.batch_size, dtype=torch.long, device=torch_device ) elif model_class in [ *MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING.values(), *MODEL_FOR_CAUSAL_LM_MAPPING.values(), *MODEL_FOR_MASKED_LM_MAPPING.values(), *MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING.values(), ]: inputs_dict["labels"] = torch.zeros( (self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device ) return inputs_dict
AutoModelForTokenClassification, AutoTokenizer, get_linear_schedule_with_warmup, ) from utils_ner import convert_examples_to_features, get_labels, read_examples_from_file try: from torch.utils.tensorboard import SummaryWriter except ImportError: from tensorboardX import SummaryWriter logger = logging.getLogger(__name__) MODEL_CONFIG_CLASSES = list(MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING.keys()) MODEL_TYPES = tuple(conf.model_type for conf in MODEL_CONFIG_CLASSES) ALL_MODELS = sum((tuple(conf.pretrained_config_archive_map.keys()) for conf in MODEL_CONFIG_CLASSES), ()) TOKENIZER_ARGS = ["do_lower_case", "strip_accents", "keep_accents", "use_fast"] def set_seed(args): random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) if args.n_gpu > 0: torch.cuda.manual_seed_all(args.seed)
from transformers import CONFIG_MAPPING, MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING, MODEL_NAMES_MAPPING, TOKENIZER_MAPPING from transformers.convert_slow_tokenizer import SLOW_TO_FAST_CONVERTERS, BertConverter from transformers.models.auto.modeling_auto import auto_class_factory from .modeling.layoutlmv2 import ( LayoutLMv2Config, LayoutLMv2ForTokenClassification, LayoutLMv2Tokenizer, LayoutLMv2TokenizerFast, ) CONFIG_MAPPING.update([("layoutlmv2", LayoutLMv2Config)]) MODEL_NAMES_MAPPING.update([("layoutlmv2", "LayoutLMv2")]) TOKENIZER_MAPPING.update([(LayoutLMv2Config, (LayoutLMv2Tokenizer, LayoutLMv2TokenizerFast))]) SLOW_TO_FAST_CONVERTERS.update({"LayoutLMv2Tokenizer": BertConverter}) MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING.update([ (LayoutLMv2Config, LayoutLMv2ForTokenClassification) ]) AutoModelForTokenClassification = auto_class_factory( "AutoModelForTokenClassification", MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING, head_doc="token classification")
MarkupLMTokenizer, MarkupLMForQuestionAnswering, MarkupLMForTokenClassification, MarkupLMTokenizerFast, ) CONFIG_MAPPING.update( [ ("markuplm", MarkupLMConfig), ] ) MODEL_NAMES_MAPPING.update([("markuplm", "MarkupLM")]) TOKENIZER_MAPPING.update( [ (MarkupLMConfig, (MarkupLMTokenizer, MarkupLMTokenizerFast)), ] ) SLOW_TO_FAST_CONVERTERS.update( {"MarkupLMTokenizer": RobertaConverter} ) MODEL_FOR_QUESTION_ANSWERING_MAPPING.update( [(MarkupLMConfig, MarkupLMForQuestionAnswering)] ) MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING.update( [(MarkupLMConfig, MarkupLMForTokenClassification)] )