def main(args): from poldeepner import PolDeepNer from pretrained import load_pretrained_model from utils import NestedReport from wrapper import Sequence try: model = [Sequence.load(args.model, args.embeddings) ] if args.embeddings else load_pretrained_model(args.model) ner = PolDeepNer(model) label_true, label_pred = [], [] x_test, y_test, _ = iob.load_data_and_labels(args.input) n = 0 for x, y in zip(x_test, y_test): pred = ner.process_sentence(x) label_true.append(y) label_pred.append(pred) if n % 1000 == 0: print("Sentences processed: %d / %d" % (n, len(y_test))) n += 1 print("Sentences processed: %d / %d" % (n, len(y_test))) report = NestedReport(label_true, label_pred) print(str(report)) except Exception as e: print("[ERROR] %s" % str(e))
def main(argv=None): args = get_args(argv) print("\nLoading the NER model ...") model = load_pretrained_model(args.m) ner = PolDeepNer(model) print("NER model loaded.") print("Annotating ...") annotate(args.fileindex, ner) print("Annotation finished.")
def main(args): from poldeepner import PolDeepNer from pretrained import load_pretrained_model try: print("Loading the tokenization model ...") nltk.download('punkt') print("Loading the NER model ...") model = load_pretrained_model(args.model) ner = PolDeepNer(model) print("ready.") run_cli_loop(ner) except Exception as e: print("[ERROR] %s" % str(e))
description='Evaluate given model against annotated document in IOB format.' ) parser.add_argument('-m', required=True, metavar='name', help='model name', default='n82') args = parser.parse_args() root = os.path.dirname(os.path.abspath(__file__)) path_data = os.path.join(root, "..", "data") path_eval = os.path.join(path_data, "kpwr-ner-n82-test.iob") try: model = load_pretrained_model(args.m) ner = PolDeepNer(model) label_true, label_pred = [], [] x_test, y_test = iob.load_data_and_labels(path_eval) for x, y in zip(x_test, y_test): pred = ner.process_sentence(x) label_true.append(y) label_pred.append(pred) report = NestedReport(label_true, label_pred) print(str(report)) #score = f1_score(label_true, label_pred) #print(score) except Exception as e:
tokens = word_tokenize(text) labels = ner.process_sentence(tokens) offsets = align_tokens_to_text([tokens], text) for an in wrap_annotations([labels]): begin = offsets[an.token_ids[0]][0] end = offsets[an.token_ids[-1]][1] orth = text[begin:end] print("[%3s:%3s] %-20s %s" % (begin, end, an.annotation, orth)) except Exception as e: print("Failed to process the text due the following error: %s" % e) try: print("Loading the tokenization model ...") nltk.download('punkt') print("Loading the NER model ...") model = load_pretrained_model(args.m) ner = PolDeepNer(model) print("ready.") run_cli_loop(ner) except Exception as e: print("[ERROR] %s" % str(e))
help='path to a file with a list of files') parser.add_argument('-o', required=True, metavar='PATH', help='path to a json output file') parser.add_argument('-m', required=True, metavar='PATH', help='path to the model') args = parser.parse_args() path = args.i parent = os.path.dirname(path) ner = PolDeepNer(args.m) dict_list = [] paths = codecs.open(path, "r", "utf8").readlines() paths_count = len(paths) for n, rel_path in enumerate(paths): abs_path = os.path.abspath(os.path.join(parent, rel_path.strip())) namext = os.path.basename(abs_path) name = os.path.splitext(namext)[0] path = os.path.dirname(abs_path) text = codecs.open(os.path.join(path, name + ".txt"), "r", "utf8").read() doc_id = get_id(os.path.join(path, name + ".ini")) print("%d from %d: %s" % (n, paths_count, doc_id))
required=True, metavar='PATH', help='path to a file with a list of files') parser.add_argument('-o', required=True, metavar='PATH', help='path to a json output file') parser.add_argument('-m', required=True, metavar='PATH', help='model name') args = parser.parse_args() parent = os.path.dirname(args.i) try: print("Loading the NER model ...") model = load_pretrained_model(args.m) ner = PolDeepNer(model) dict_list = [] paths = codecs.open(args.i, "r", "utf8").readlines() paths_count = len(paths) for n, rel_path in enumerate(paths): abs_path = os.path.abspath(os.path.join(parent, rel_path.strip())) namext = os.path.basename(abs_path) name = os.path.splitext(namext)[0] path = os.path.dirname(abs_path) text = codecs.open(os.path.join(path, name + ".txt"), "r", "utf8").read() doc_id = get_id(os.path.join(path, name + ".ini")) print("%d from %d: %s" % (n, paths_count, doc_id))
if __name__ == '__main__': parser = argparse.ArgumentParser( description='Process IOB file, recognize NE and save the output to another IOB file.') parser.add_argument('-i', required=True, metavar='PATH', help='path to a plain text') parser.add_argument('-m', required=False, metavar='NAME', help='name of a model pack') args = parser.parse_args() try: print("Loading the tokenization model ...") nltk.download('punkt') print("Loading the NER model ...") model = load_pretrained_model(args.m) ner = PolDeepNer(model) print("ready.") text = " ".join(codecs.open(args.i, "r", "utf8").readlines()) tokens = word_tokenize(text) labels = ner.process_sentence(tokens) offsets = align_tokens_to_text([tokens], text) for an in wrap_annotations([labels]): begin = offsets[an.token_ids[0]][0] end = offsets[an.token_ids[-1]][1] orth = text[begin:end] print("[%3s:%3s] %-20s %s" % (begin, end, an.annotation, orth))
parser = argparse.ArgumentParser( description= 'Process IOB file, recognize NE and save the output to another IOB file.') parser.add_argument('-i', required=True, metavar='PATH', help='input IOB file') parser.add_argument('-m', required=True, metavar='PATH', help='path to the model') parser.add_argument('-o', required=True, metavar='PATH', help='output IOB file') args = parser.parse_args() ner = PolDeepNer(args.m) def process_file(input, output, ner): with codecs.open(input, "r", "utf8") as f: fo = codecs.open(output, "w", "utf8") lines, words = [], [] for line in f: line = line.rstrip() if "-DOCSTART " in line: fo.write(line + "\n") pass elif line: cols = line.split('\t') words.append(cols[0]) lines.append("\t".join(cols[:-1]))