def prepare_config_and_inputs(self): input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size) # convert bbox to numpy since TF does not support item assignment bbox = ids_tensor([self.batch_size, self.seq_length, 4], self.range_bbox).numpy() # Ensure that bbox is legal for i in range(bbox.shape[0]): for j in range(bbox.shape[1]): if bbox[i, j, 3] < bbox[i, j, 1]: t = bbox[i, j, 3] bbox[i, j, 3] = bbox[i, j, 1] bbox[i, j, 1] = t if bbox[i, j, 2] < bbox[i, j, 0]: t = bbox[i, j, 2] bbox[i, j, 2] = bbox[i, j, 0] bbox[i, j, 0] = t bbox = tf.convert_to_tensor(bbox) input_mask = None if self.use_input_mask: input_mask = random_attention_mask( [self.batch_size, self.seq_length]) token_type_ids = None if self.use_token_type_ids: token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size) sequence_labels = None token_labels = None choice_labels = None if self.use_labels: sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size) token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels) choice_labels = ids_tensor([self.batch_size], self.num_choices) config = LayoutLMConfig( vocab_size=self.vocab_size, hidden_size=self.hidden_size, num_hidden_layers=self.num_hidden_layers, num_attention_heads=self.num_attention_heads, intermediate_size=self.intermediate_size, hidden_act=self.hidden_act, hidden_dropout_prob=self.hidden_dropout_prob, attention_probs_dropout_prob=self.attention_probs_dropout_prob, max_position_embeddings=self.max_position_embeddings, type_vocab_size=self.type_vocab_size, initializer_range=self.initializer_range, ) return config, input_ids, bbox, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
def get_config(self): return LayoutLMConfig( vocab_size=self.vocab_size, hidden_size=self.hidden_size, num_hidden_layers=self.num_hidden_layers, num_attention_heads=self.num_attention_heads, intermediate_size=self.intermediate_size, hidden_act=self.hidden_act, hidden_dropout_prob=self.hidden_dropout_prob, attention_probs_dropout_prob=self.attention_probs_dropout_prob, max_position_embeddings=self.max_position_embeddings, type_vocab_size=self.type_vocab_size, initializer_range=self.initializer_range, )
def __init__(self, image_path, model_path, config_path, num_labels=13, args=None): super(LayoutLM, self).__init__() self.image = openImage(image_path) self.args = args self.tokenizer = LayoutLMTokenizer.from_pretrained( "microsoft/layoutlm-base-uncased") config = LayoutLMConfig.from_pretrained(config_path) self.model = LayoutLMForTokenClassification.from_pretrained( model_path, config=config) self.model.to(device) self.input_ids = None self.attention_mask = None self.token_type_ids = None self.bboxes = None self.token_actual_boxes = None
def train(dataset_path, loader_type='combined_loader', batch_size=4, num_workers=16, dataset_rand_seq=True, dataset_rand_seq_prob=0.5, dataset_exlude_image_mask=True, state_dict_path=None, weight_path='weights/extract/', max_epoch=5, lr=0.001, valcheck_interval=2000, num_gpus=1, log_freq=100, resume_checkpoint_path=None, checkpoint_saved_path="checkpoints/v3/", logs_path="logs/v3/", prefix_name='layoutlm-v2', manual_seed=1261): logging.basicConfig(level=logging.INFO) #load tokensizer logging.info("Load BertTokenizer with indobenchmark/indobert-base-p2") tokenizer = BertTokenizer.from_pretrained( "indobenchmark/indobert-base-p2", do_lower_case=True, cache_dir=None, ) path = dataset_path if loader_type == 'combined_loader': logging.info(f"Load Combined Loader with path {dataset_path}") train_loader, valid_loader = loader.get_loader( path, tokenizer=tokenizer, batch_size=batch_size, num_workers=num_workers, rand_seq=dataset_rand_seq, rand_seq_prob=dataset_rand_seq_prob, excluce_image_mask=dataset_exlude_image_mask) else: logging.info(f"Load Base Loader with path {dataset_path}") train_loader, valid_loader = loader.get_base_loader( path, tokenizer=tokenizer, batch_size=batch_size, num_workers=num_workers, rand_seq=dataset_rand_seq, rand_seq_prob=dataset_rand_seq_prob, excluce_image_mask=dataset_exlude_image_mask) logging.info(f"Load LayoutLMConfig for LayoutLMForTokenClassification") config = LayoutLMConfig.from_pretrained("microsoft/layoutlm-base-uncased", num_labels=label_cfg.num_labels, cache_dir=None) logging.info( f"Load LayoutLMForTokenClassification from_pretrained microsoft/layoutlm-base-uncased" ) model = LayoutLMForTokenClassification.from_pretrained( 'microsoft/layoutlm-base-uncased', config=config, # return_dict=True ) model.resize_token_embeddings(len(tokenizer)) if state_dict_path: logging.info(f"Load state_dict from path {state_dict_path}") state_dict = torch.load(state_dict_path, map_location=torch.device("cpu")) model.load_state_dict(state_dict) # model = model.to(device) #prepare the task task = TaskLayoutLM(model, tokenizer) # DEFAULTS used by the Trainer checkpoint_callback = pl.callbacks.ModelCheckpoint( dirpath=checkpoint_saved_path, save_top_k=1, verbose=True, monitor='val_loss', mode='min', prefix=prefix_name) tb_logger = pl_loggers.TensorBoardLogger(logs_path) pl.trainer.seed_everything(manual_seed) trainer = pl.Trainer( weights_summary="top", max_epochs=max_epoch, val_check_interval=valcheck_interval, gpus=num_gpus, log_every_n_steps=log_freq, deterministic=True, benchmark=True, logger=tb_logger, checkpoint_callback=checkpoint_callback, resume_from_checkpoint=resume_checkpoint_path, ) trainer.fit(task, train_loader, valid_loader) # metrics = trainer.test(task, valid_loader) metrics = trainer.logged_metrics #prepare to save result # print(task._results.keys()) vacc, vloss = metrics['val_acc'], metrics['val_loss'] # tacc, tloss = metrics['trn_acc'], metrics['trn_loss'] last_epoch = metrics['epoch'] dirname = Path(dataset_path).name filename = f'layoutlm_v2_ktp_{dirname}_vacc{vacc:.4}_vloss{vloss:.4}_epoch{last_epoch}_cli.pth' saved_filename = str(Path(weight_path).joinpath(filename)) logging.info(f"Prepare to save training results to path {saved_filename}") torch.save(model.state_dict(), saved_filename)
filename=os.path.join(args.output_dir, "train.log"), format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", datefmt="%m/%d/%Y %H:%M:%S", level=logging.INFO, ) logger.addHandler(logging.StreamHandler()) if not args.test_only: if args.load_pretrain: model = LayoutLMForMaskedLM.from_pretrained(args.layoutlm_model, return_dict=True) tokenizer = LayoutLMTokenizer.from_pretrained(args.layoutlm_model) print('Loading pre-trained model from', args.layoutlm_model) else: config = LayoutLMConfig.from_pretrained(args.model_name_or_path, return_dict=True) if args.bert_model is not None: tokenizer = AutoTokenizer.from_pretrained(args.bert_model) config.vocab_size = tokenizer.vocab_size model = LayoutLMForMaskedLM(config) if args.bert_model is None: tokenizer = LayoutLMTokenizer.from_pretrained(args.layoutlm_model, do_lower_case=True) else: bert = BertModel.from_pretrained(args.bert_model) model.layoutlm.embeddings.word_embeddings = \ copy.deepcopy(bert.embeddings.word_embeddings) del bert
def _load_config(self): self.config = LayoutLMConfig.from_pretrained( "microsoft/layoutlm-base-uncased", num_labels=label_cfg.num_labels, cache_dir=None)