def big_model(self): """Cached property means this code will only be executed once.""" checkpoint_path = "microsoft/layoutlm-large-uncased" model = LayoutLMForMaskedLM.from_pretrained(checkpoint_path).to( torch_device ) # test whether AutoModel can determine your model_class from checkpoint name if torch_device == "cuda": model.half()
def create_and_check_for_masked_lm( self, config, input_ids, bbox, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels ): model = LayoutLMForMaskedLM(config=config) model.to(torch_device) model.eval() result = model(input_ids, bbox, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels) self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.vocab_size))
args = Args(args) Path(args.output_dir).mkdir(parents=True, exist_ok=True) logging.basicConfig( filename=os.path.join(args.output_dir, "train.log"), format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", datefmt="%m/%d/%Y %H:%M:%S", level=logging.INFO, ) logger.addHandler(logging.StreamHandler()) if not args.test_only: if args.load_pretrain: model = LayoutLMForMaskedLM.from_pretrained(args.layoutlm_model, return_dict=True) tokenizer = LayoutLMTokenizer.from_pretrained(args.layoutlm_model) print('Loading pre-trained model from', args.layoutlm_model) else: config = LayoutLMConfig.from_pretrained(args.model_name_or_path, return_dict=True) if args.bert_model is not None: tokenizer = AutoTokenizer.from_pretrained(args.bert_model) config.vocab_size = tokenizer.vocab_size model = LayoutLMForMaskedLM(config) if args.bert_model is None: tokenizer = LayoutLMTokenizer.from_pretrained(args.layoutlm_model, do_lower_case=True) else: