Esempio n. 1
0
def main(args):
    from poldeepner import PolDeepNer
    from pretrained import load_pretrained_model
    from utils import NestedReport
    from wrapper import Sequence

    try:
        model = [Sequence.load(args.model, args.embeddings)
                 ] if args.embeddings else load_pretrained_model(args.model)
        ner = PolDeepNer(model)

        label_true, label_pred = [], []
        x_test, y_test, _ = iob.load_data_and_labels(args.input)
        n = 0
        for x, y in zip(x_test, y_test):
            pred = ner.process_sentence(x)
            label_true.append(y)
            label_pred.append(pred)
            if n % 1000 == 0:
                print("Sentences processed: %d / %d" % (n, len(y_test)))
            n += 1
        print("Sentences processed: %d / %d" % (n, len(y_test)))

        report = NestedReport(label_true, label_pred)
        print(str(report))

    except Exception as e:
        print("[ERROR] %s" % str(e))
Esempio n. 2
0
parser.add_argument('-m',
                    required=True,
                    metavar='name',
                    help='model name',
                    default='n82')
args = parser.parse_args()

root = os.path.dirname(os.path.abspath(__file__))
path_data = os.path.join(root, "..", "data")
path_eval = os.path.join(path_data, "kpwr-ner-n82-test.iob")

try:
    model = load_pretrained_model(args.m)
    ner = PolDeepNer(model)

    label_true, label_pred = [], []
    x_test, y_test = iob.load_data_and_labels(path_eval)
    for x, y in zip(x_test, y_test):
        pred = ner.process_sentence(x)
        label_true.append(y)
        label_pred.append(pred)

    report = NestedReport(label_true, label_pred)
    print(str(report))

    #score = f1_score(label_true, label_pred)
    #print(score)

except Exception as e:
    print("[ERROR] %s" % str(e))
Esempio n. 3
0
    parser = argparse.ArgumentParser(
        description='Process IOB file, recognize NE and save the output to another IOB file.')
    parser.add_argument('-i', required=True, metavar='PATH', help='path to a plain text')
    parser.add_argument('-m', required=False, metavar='NAME', help='name of a model pack')
    args = parser.parse_args()

    try:
        print("Loading the tokenization model ...")
        nltk.download('punkt')

        print("Loading the NER model ...")
        model = load_pretrained_model(args.m)
        ner = PolDeepNer(model)

        print("ready.")

        text = " ".join(codecs.open(args.i, "r", "utf8").readlines())
        tokens = word_tokenize(text)
        labels = ner.process_sentence(tokens)
        offsets = align_tokens_to_text([tokens], text)

        for an in wrap_annotations([labels]):
            begin = offsets[an.token_ids[0]][0]
            end = offsets[an.token_ids[-1]][1]
            orth = text[begin:end]

            print("[%3s:%3s] %-20s %s" % (begin, end, an.annotation, orth))

    except Exception as e:
        print("[ERROR] %s" % str(e))