def run_cli_loop(ner): from process_poleval import align_tokens_to_text from utils import wrap_annotations while True: text = input("Enter text to process: ").strip().replace("\"", "'") if len(text) == 0: print("closing...") break try: tokens = word_tokenize(text) labels = ner.process_sentence(tokens) offsets = align_tokens_to_text([tokens], text) for an in wrap_annotations([labels]): begin = offsets[an.token_ids[0]][0] end = offsets[an.token_ids[-1]][1] orth = text[begin:end] print("[%3s:%3s] %-20s %s" % (begin, end, an.annotation, orth)) except Exception as e: print("Failed to process the text due the following error: %s" % e)
def get_poleval_dict(doc_id, text, sentences, labels): ''' Returns PolEval dict { text: id: answers: } Note that arguments it takes is FILE, PATH, FILE as utils.load_data_and_labels opens file itself ''' annotations = wrap_annotations(labels) offsets = align_tokens_to_text(sentences, text) answers = [] for an in annotations: begin = offsets[an.token_ids[0]][0] end = offsets[an.token_ids[-1]][1] orth = text[begin:end] answers.append("%s %d %d\t%s" % (an.annotation.replace("-", "_"), begin, end, orth)) return ({'text': text, 'id': doc_id, 'answers': "\n".join(answers)})
parser = argparse.ArgumentParser( description='Process IOB file, recognize NE and save the output to another IOB file.') parser.add_argument('-i', required=True, metavar='PATH', help='path to a plain text') parser.add_argument('-m', required=False, metavar='NAME', help='name of a model pack') args = parser.parse_args() try: print("Loading the tokenization model ...") nltk.download('punkt') print("Loading the NER model ...") model = load_pretrained_model(args.m) ner = PolDeepNer(model) print("ready.") text = " ".join(codecs.open(args.i, "r", "utf8").readlines()) tokens = word_tokenize(text) labels = ner.process_sentence(tokens) offsets = align_tokens_to_text([tokens], text) for an in wrap_annotations([labels]): begin = offsets[an.token_ids[0]][0] end = offsets[an.token_ids[-1]][1] orth = text[begin:end] print("[%3s:%3s] %-20s %s" % (begin, end, an.annotation, orth)) except Exception as e: print("[ERROR] %s" % str(e))