예제 #1
0
    def prepare_config_and_inputs(self):
        input_ids = ids_tensor([self.batch_size, self.seq_length],
                               self.vocab_size)

        # convert bbox to numpy since TF does not support item assignment
        bbox = ids_tensor([self.batch_size, self.seq_length, 4],
                          self.range_bbox).numpy()
        # Ensure that bbox is legal
        for i in range(bbox.shape[0]):
            for j in range(bbox.shape[1]):
                if bbox[i, j, 3] < bbox[i, j, 1]:
                    t = bbox[i, j, 3]
                    bbox[i, j, 3] = bbox[i, j, 1]
                    bbox[i, j, 1] = t
                if bbox[i, j, 2] < bbox[i, j, 0]:
                    t = bbox[i, j, 2]
                    bbox[i, j, 2] = bbox[i, j, 0]
                    bbox[i, j, 0] = t
        bbox = tf.convert_to_tensor(bbox)

        input_mask = None
        if self.use_input_mask:
            input_mask = random_attention_mask(
                [self.batch_size, self.seq_length])

        token_type_ids = None
        if self.use_token_type_ids:
            token_type_ids = ids_tensor([self.batch_size, self.seq_length],
                                        self.type_vocab_size)

        sequence_labels = None
        token_labels = None
        choice_labels = None
        if self.use_labels:
            sequence_labels = ids_tensor([self.batch_size],
                                         self.type_sequence_label_size)
            token_labels = ids_tensor([self.batch_size, self.seq_length],
                                      self.num_labels)
            choice_labels = ids_tensor([self.batch_size], self.num_choices)

        config = LayoutLMConfig(
            vocab_size=self.vocab_size,
            hidden_size=self.hidden_size,
            num_hidden_layers=self.num_hidden_layers,
            num_attention_heads=self.num_attention_heads,
            intermediate_size=self.intermediate_size,
            hidden_act=self.hidden_act,
            hidden_dropout_prob=self.hidden_dropout_prob,
            attention_probs_dropout_prob=self.attention_probs_dropout_prob,
            max_position_embeddings=self.max_position_embeddings,
            type_vocab_size=self.type_vocab_size,
            initializer_range=self.initializer_range,
        )

        return config, input_ids, bbox, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
 def get_config(self):
     return LayoutLMConfig(
         vocab_size=self.vocab_size,
         hidden_size=self.hidden_size,
         num_hidden_layers=self.num_hidden_layers,
         num_attention_heads=self.num_attention_heads,
         intermediate_size=self.intermediate_size,
         hidden_act=self.hidden_act,
         hidden_dropout_prob=self.hidden_dropout_prob,
         attention_probs_dropout_prob=self.attention_probs_dropout_prob,
         max_position_embeddings=self.max_position_embeddings,
         type_vocab_size=self.type_vocab_size,
         initializer_range=self.initializer_range,
     )
예제 #3
0
    def __init__(self,
                 image_path,
                 model_path,
                 config_path,
                 num_labels=13,
                 args=None):
        super(LayoutLM, self).__init__()
        self.image = openImage(image_path)
        self.args = args
        self.tokenizer = LayoutLMTokenizer.from_pretrained(
            "microsoft/layoutlm-base-uncased")

        config = LayoutLMConfig.from_pretrained(config_path)
        self.model = LayoutLMForTokenClassification.from_pretrained(
            model_path, config=config)
        self.model.to(device)

        self.input_ids = None
        self.attention_mask = None
        self.token_type_ids = None
        self.bboxes = None
        self.token_actual_boxes = None
예제 #4
0
def train(dataset_path,
          loader_type='combined_loader',
          batch_size=4,
          num_workers=16,
          dataset_rand_seq=True,
          dataset_rand_seq_prob=0.5,
          dataset_exlude_image_mask=True,
          state_dict_path=None,
          weight_path='weights/extract/',
          max_epoch=5,
          lr=0.001,
          valcheck_interval=2000,
          num_gpus=1,
          log_freq=100,
          resume_checkpoint_path=None,
          checkpoint_saved_path="checkpoints/v3/",
          logs_path="logs/v3/",
          prefix_name='layoutlm-v2',
          manual_seed=1261):

    logging.basicConfig(level=logging.INFO)

    #load tokensizer
    logging.info("Load BertTokenizer with indobenchmark/indobert-base-p2")
    tokenizer = BertTokenizer.from_pretrained(
        "indobenchmark/indobert-base-p2",
        do_lower_case=True,
        cache_dir=None,
    )

    path = dataset_path
    if loader_type == 'combined_loader':
        logging.info(f"Load Combined Loader with path {dataset_path}")
        train_loader, valid_loader = loader.get_loader(
            path,
            tokenizer=tokenizer,
            batch_size=batch_size,
            num_workers=num_workers,
            rand_seq=dataset_rand_seq,
            rand_seq_prob=dataset_rand_seq_prob,
            excluce_image_mask=dataset_exlude_image_mask)
    else:
        logging.info(f"Load Base Loader with path {dataset_path}")
        train_loader, valid_loader = loader.get_base_loader(
            path,
            tokenizer=tokenizer,
            batch_size=batch_size,
            num_workers=num_workers,
            rand_seq=dataset_rand_seq,
            rand_seq_prob=dataset_rand_seq_prob,
            excluce_image_mask=dataset_exlude_image_mask)

    logging.info(f"Load LayoutLMConfig for LayoutLMForTokenClassification")
    config = LayoutLMConfig.from_pretrained("microsoft/layoutlm-base-uncased",
                                            num_labels=label_cfg.num_labels,
                                            cache_dir=None)

    logging.info(
        f"Load LayoutLMForTokenClassification from_pretrained microsoft/layoutlm-base-uncased"
    )
    model = LayoutLMForTokenClassification.from_pretrained(
        'microsoft/layoutlm-base-uncased',
        config=config,
        #     return_dict=True
    )
    model.resize_token_embeddings(len(tokenizer))

    if state_dict_path:
        logging.info(f"Load state_dict from path {state_dict_path}")
        state_dict = torch.load(state_dict_path,
                                map_location=torch.device("cpu"))
        model.load_state_dict(state_dict)
        # model = model.to(device)

    #prepare the task
    task = TaskLayoutLM(model, tokenizer)

    # DEFAULTS used by the Trainer
    checkpoint_callback = pl.callbacks.ModelCheckpoint(
        dirpath=checkpoint_saved_path,
        save_top_k=1,
        verbose=True,
        monitor='val_loss',
        mode='min',
        prefix=prefix_name)

    tb_logger = pl_loggers.TensorBoardLogger(logs_path)
    pl.trainer.seed_everything(manual_seed)

    trainer = pl.Trainer(
        weights_summary="top",
        max_epochs=max_epoch,
        val_check_interval=valcheck_interval,
        gpus=num_gpus,
        log_every_n_steps=log_freq,
        deterministic=True,
        benchmark=True,
        logger=tb_logger,
        checkpoint_callback=checkpoint_callback,
        resume_from_checkpoint=resume_checkpoint_path,
    )
    trainer.fit(task, train_loader, valid_loader)
    # metrics = trainer.test(task, valid_loader)
    metrics = trainer.logged_metrics

    #prepare to save result
    # print(task._results.keys())
    vacc, vloss = metrics['val_acc'], metrics['val_loss']
    # tacc, tloss = metrics['trn_acc'], metrics['trn_loss']
    last_epoch = metrics['epoch']

    dirname = Path(dataset_path).name
    filename = f'layoutlm_v2_ktp_{dirname}_vacc{vacc:.4}_vloss{vloss:.4}_epoch{last_epoch}_cli.pth'
    saved_filename = str(Path(weight_path).joinpath(filename))

    logging.info(f"Prepare to save training results to path {saved_filename}")
    torch.save(model.state_dict(), saved_filename)
예제 #5
0
    filename=os.path.join(args.output_dir, "train.log"),
    format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
    datefmt="%m/%d/%Y %H:%M:%S",
    level=logging.INFO,
)

logger.addHandler(logging.StreamHandler())

if not args.test_only:
    if args.load_pretrain:
        model = LayoutLMForMaskedLM.from_pretrained(args.layoutlm_model,
                                                    return_dict=True)
        tokenizer = LayoutLMTokenizer.from_pretrained(args.layoutlm_model)
        print('Loading pre-trained model from', args.layoutlm_model)
    else:
        config = LayoutLMConfig.from_pretrained(args.model_name_or_path,
                                                return_dict=True)
        if args.bert_model is not None:
            tokenizer = AutoTokenizer.from_pretrained(args.bert_model)
            config.vocab_size = tokenizer.vocab_size

        model = LayoutLMForMaskedLM(config)

    if args.bert_model is None:
        tokenizer = LayoutLMTokenizer.from_pretrained(args.layoutlm_model,
                                                      do_lower_case=True)
    else:
        bert = BertModel.from_pretrained(args.bert_model)
        model.layoutlm.embeddings.word_embeddings = \
            copy.deepcopy(bert.embeddings.word_embeddings)
        del bert
예제 #6
0
 def _load_config(self):
     self.config = LayoutLMConfig.from_pretrained(
         "microsoft/layoutlm-base-uncased",
         num_labels=label_cfg.num_labels,
         cache_dir=None)