コード例 #1
0
    def __init__(self, hparams):
        super().__init__()
        self.hparams = hparams

        self.use_radam = getattr(self.hparams, "use_radam", False)
        self.cnnt5_only = getattr(self.hparams, "cnnt5_only", False)
        self.hparams.tgt_seq_len = getattr(self.hparams, "tgt_seq_len",
                                           self.hparams.seq_len)

        if not self.cnnt5_only:
            if not self.hparams.t5_only:
                print("Initializing LayoutLM...")
                self.encoder = LayoutLMModel.from_pretrained(
                    self.hparams.layoutlm_str)
                if self.hparams.freeze_layoutlm:
                    for param in tqdm(self.encoder.parameters(),
                                      desc="Freezing LayoutLM...",
                                      leave=True):
                        param.requires_grad = False

            print("Initializing T5...")
            self.t5 = T5ForConditionalGeneration.from_pretrained(
                self.hparams.t5_str)
            self.use_llm_emb = getattr(self.hparams, "llm_emb", False)
            if self.use_llm_emb:
                print("Initializing layoutlm embeddings")
                self.llm_emb = LayoutLMEmbeddings(
                    LayoutLMModel.from_pretrained(
                        self.hparams.layoutlm_str).config)

        if not self.hparams.no_image:
            print("Using images, CNNT5 small initialized.")
            self.cnnt5 = CNNT5({
                "t5": "t5-small",
                "pre_train": False,
                "initial_ckpt":
                "models/wikipedia_pre_train_continue-epoch=1-val_exact_match=0.58-val_f1=0.98.ckpt",
                "seq_len": self.hparams.seq_len,
                "tgt_seq_len": self.hparams.tgt_seq_len
            })
            if self.cnnt5_only:
                print("Fine-tuning CNNT5.")
            else:
                for param in tqdm(
                        self.cnnt5.parameters(),
                        desc=
                        "Freezing CNNT5 as an image Embedding extractor...",
                        leave=True):
                    param.requires_grad = False
                self.adapt_cnnt5_features = nn.Linear(512, 768)

        if self.hparams.t5_only:
            self.tokenizer = T5Tokenizer.from_pretrained(self.hparams.t5_str)
        elif self.cnnt5_only:
            self.tokenizer = self.cnnt5.tokenizer
        else:
            self.tokenizer = LayoutLMTokenizer.from_pretrained(
                self.hparams.layoutlm_str)
        self.detokenizer = T5Tokenizer.from_pretrained(self.hparams.t5_str)
コード例 #2
0
 def _test_TFLayoutLM(self, size, large=False):
     from transformers import LayoutLMTokenizer, TFLayoutLMModel
     tokenizer = LayoutLMTokenizer.from_pretrained(size)
     model = TFLayoutLMModel.from_pretrained(size)
     words = ["Hello", "world"]
     normalized_word_boxes = [637, 773, 693, 782], [698, 773, 733, 782]
     token_boxes = []
     for word, box in zip(words, normalized_word_boxes):
         word_tokens = tokenizer.tokenize(word)
         token_boxes.extend([box] * len(word_tokens))
     # add bounding boxes of cls + sep tokens
     token_boxes = [[0, 0, 0, 0]] + token_boxes + [[1000, 1000, 1000, 1000]]
     input_dict = tokenizer(' '.join(words), return_tensors="tf")
     spec, input_dict = self.spec_and_pad(input_dict)
     outputs = ["last_hidden_state"]
     self.run_test(model, input_dict, input_signature=spec, outputs=outputs, large=large)
コード例 #3
0
    def __init__(self,
                 image_path,
                 model_path,
                 config_path,
                 num_labels=13,
                 args=None):
        super(LayoutLM, self).__init__()
        self.image = openImage(image_path)
        self.args = args
        self.tokenizer = LayoutLMTokenizer.from_pretrained(
            "microsoft/layoutlm-base-uncased")

        config = LayoutLMConfig.from_pretrained(config_path)
        self.model = LayoutLMForTokenClassification.from_pretrained(
            model_path, config=config)
        self.model.to(device)

        self.input_ids = None
        self.attention_mask = None
        self.token_type_ids = None
        self.bboxes = None
        self.token_actual_boxes = None
 def get_tokenizer(self, **kwargs):
     return LayoutLMTokenizer.from_pretrained(self.tmpdirname, **kwargs)
コード例 #5
0
ファイル: run_mvlm.py プロジェクト: thanhhau097/unilm
Path(args.output_dir).mkdir(parents=True, exist_ok=True)

logging.basicConfig(
    filename=os.path.join(args.output_dir, "train.log"),
    format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
    datefmt="%m/%d/%Y %H:%M:%S",
    level=logging.INFO,
)

logger.addHandler(logging.StreamHandler())

if not args.test_only:
    if args.load_pretrain:
        model = LayoutLMForMaskedLM.from_pretrained(args.layoutlm_model,
                                                    return_dict=True)
        tokenizer = LayoutLMTokenizer.from_pretrained(args.layoutlm_model)
        print('Loading pre-trained model from', args.layoutlm_model)
    else:
        config = LayoutLMConfig.from_pretrained(args.model_name_or_path,
                                                return_dict=True)
        if args.bert_model is not None:
            tokenizer = AutoTokenizer.from_pretrained(args.bert_model)
            config.vocab_size = tokenizer.vocab_size

        model = LayoutLMForMaskedLM(config)

    if args.bert_model is None:
        tokenizer = LayoutLMTokenizer.from_pretrained(args.layoutlm_model,
                                                      do_lower_case=True)
    else:
        bert = BertModel.from_pretrained(args.bert_model)
            x, y, w, h = tuple(row) # the row comes in (left, top, width, height) format
            actual_box = [x, y, x+w, y+h] # we turn it into (left, top, left+width, top+height) to get the actual box 
            actual_boxes.append(actual_box)
        
        # normalize the bounding boxes
        boxes = []
        for box in actual_boxes:
            boxes.append(normalize_box(box, width, height))
        
        # add as extra columns 
        assert len(words) == len(boxes)
        example['words'] = words
        example['bbox'] = boxes
        return example

tokenizer = LayoutLMTokenizer.from_pretrained("microsoft/layoutlm-base-uncased")

def encode_example(example, max_seq_length=512, pad_token_box=[0, 0, 0, 0]):
  words = example['words']
  normalized_word_boxes = example['bbox']

  assert len(words) == len(normalized_word_boxes)

  token_boxes = []
  for word, box in zip(words, normalized_word_boxes):
      word_tokens = tokenizer.tokenize(word)
      token_boxes.extend([box] * len(word_tokens))
  
  # Truncation of token_boxes
  special_tokens_count = 2 
  if len(token_boxes) > max_seq_length - special_tokens_count: