Example #1
0
 def read_txt(self, file: str, number: int = -1) -> List[Instance]:
     print(
         f"[Data Info] Reading file: {file}, labels will be converted to IOBES encoding"
     )
     print(
         f"[Data Info] Modify src/data/ner_dataset.read_txt function if you have other requirements"
     )
     insts = []
     with open(file, 'r', encoding='utf-8') as f:
         words = []
         ori_words = []
         labels = []
         for line in tqdm(f.readlines()):
             line = line.rstrip()
             if line == "":
                 labels = convert_iobes(labels)
                 insts.append(
                     Instance(words=words,
                              ori_words=ori_words,
                              labels=labels))
                 words = []
                 ori_words = []
                 labels = []
                 if len(insts) == number:
                     break
                 continue
             ls = line.split()
             word, label = ls[0], ls[-1]
             ori_words.append(word)
             word = re.sub('\d', '0', word)
             words.append(word)
             labels.append(label)
     print("number of sentences: {}".format(len(insts)))
     return insts
Example #2
0
 def read_from_sentences(self, sents: List[List[str]]):
     """
     sents = [['word_a', 'word_b'], ['word_aaa', 'word_bccc', 'word_ccc']]
     """
     insts = []
     for sent in sents:
         insts.append(Instance(words=sent, ori_words=sent))
     return insts