Esempio n. 1
0
def main(args):
    from poldeepner import PolDeepNer
    from pretrained import load_pretrained_model
    from utils import NestedReport
    from wrapper import Sequence

    try:
        model = [Sequence.load(args.model, args.embeddings)
                 ] if args.embeddings else load_pretrained_model(args.model)
        ner = PolDeepNer(model)

        label_true, label_pred = [], []
        x_test, y_test, _ = iob.load_data_and_labels(args.input)
        n = 0
        for x, y in zip(x_test, y_test):
            pred = ner.process_sentence(x)
            label_true.append(y)
            label_pred.append(pred)
            if n % 1000 == 0:
                print("Sentences processed: %d / %d" % (n, len(y_test)))
            n += 1
        print("Sentences processed: %d / %d" % (n, len(y_test)))

        report = NestedReport(label_true, label_pred)
        print(str(report))

    except Exception as e:
        print("[ERROR] %s" % str(e))
Esempio n. 2
0
def main(argv=None):
    args = get_args(argv)

    print("\nLoading the NER model ...")
    model = load_pretrained_model(args.m)
    ner = PolDeepNer(model)
    print("NER model loaded.")

    print("Annotating ...")
    annotate(args.fileindex, ner)
    print("Annotation finished.")
Esempio n. 3
0
def main(args):
    from poldeepner import PolDeepNer
    from pretrained import load_pretrained_model

    try:
        print("Loading the tokenization model ...")
        nltk.download('punkt')

        print("Loading the NER model ...")
        model = load_pretrained_model(args.model)
        ner = PolDeepNer(model)

        print("ready.")
        run_cli_loop(ner)

    except Exception as e:
        print("[ERROR] %s" % str(e))
Esempio n. 4
0
    description='Evaluate given model against annotated document in IOB format.'
)
parser.add_argument('-m',
                    required=True,
                    metavar='name',
                    help='model name',
                    default='n82')
args = parser.parse_args()

root = os.path.dirname(os.path.abspath(__file__))
path_data = os.path.join(root, "..", "data")
path_eval = os.path.join(path_data, "kpwr-ner-n82-test.iob")

try:
    model = load_pretrained_model(args.m)
    ner = PolDeepNer(model)

    label_true, label_pred = [], []
    x_test, y_test = iob.load_data_and_labels(path_eval)
    for x, y in zip(x_test, y_test):
        pred = ner.process_sentence(x)
        label_true.append(y)
        label_pred.append(pred)

    report = NestedReport(label_true, label_pred)
    print(str(report))

    #score = f1_score(label_true, label_pred)
    #print(score)

except Exception as e:
Esempio n. 5
0
            tokens = word_tokenize(text)
            labels = ner.process_sentence(tokens)
            offsets = align_tokens_to_text([tokens], text)

            for an in wrap_annotations([labels]):
                begin = offsets[an.token_ids[0]][0]
                end = offsets[an.token_ids[-1]][1]
                orth = text[begin:end]

                print("[%3s:%3s] %-20s %s" % (begin, end, an.annotation, orth))

        except Exception as e:
            print("Failed to process the text due the following error: %s" % e)


try:
    print("Loading the tokenization model ...")
    nltk.download('punkt')

    print("Loading the NER model ...")
    model = load_pretrained_model(args.m)
    ner = PolDeepNer(model)

    print("ready.")
    run_cli_loop(ner)

except Exception as e:
    print("[ERROR] %s" % str(e))


Esempio n. 6
0
                        help='path to a file with a list of files')
    parser.add_argument('-o',
                        required=True,
                        metavar='PATH',
                        help='path to a json output file')
    parser.add_argument('-m',
                        required=True,
                        metavar='PATH',
                        help='path to the model')

    args = parser.parse_args()
    path = args.i

    parent = os.path.dirname(path)

    ner = PolDeepNer(args.m)

    dict_list = []
    paths = codecs.open(path, "r", "utf8").readlines()
    paths_count = len(paths)
    for n, rel_path in enumerate(paths):
        abs_path = os.path.abspath(os.path.join(parent, rel_path.strip()))
        namext = os.path.basename(abs_path)
        name = os.path.splitext(namext)[0]
        path = os.path.dirname(abs_path)

        text = codecs.open(os.path.join(path, name + ".txt"), "r",
                           "utf8").read()
        doc_id = get_id(os.path.join(path, name + ".ini"))
        print("%d from %d: %s" % (n, paths_count, doc_id))
Esempio n. 7
0
                        required=True,
                        metavar='PATH',
                        help='path to a file with a list of files')
    parser.add_argument('-o',
                        required=True,
                        metavar='PATH',
                        help='path to a json output file')
    parser.add_argument('-m', required=True, metavar='PATH', help='model name')
    args = parser.parse_args()

    parent = os.path.dirname(args.i)

    try:
        print("Loading the NER model ...")
        model = load_pretrained_model(args.m)
        ner = PolDeepNer(model)

        dict_list = []
        paths = codecs.open(args.i, "r", "utf8").readlines()
        paths_count = len(paths)
        for n, rel_path in enumerate(paths):
            abs_path = os.path.abspath(os.path.join(parent, rel_path.strip()))
            namext = os.path.basename(abs_path)
            name = os.path.splitext(namext)[0]
            path = os.path.dirname(abs_path)

            text = codecs.open(os.path.join(path, name + ".txt"), "r",
                               "utf8").read()
            doc_id = get_id(os.path.join(path, name + ".ini"))
            print("%d from %d: %s" % (n, paths_count, doc_id))
Esempio n. 8
0

if __name__ == '__main__':
    parser = argparse.ArgumentParser(
        description='Process IOB file, recognize NE and save the output to another IOB file.')
    parser.add_argument('-i', required=True, metavar='PATH', help='path to a plain text')
    parser.add_argument('-m', required=False, metavar='NAME', help='name of a model pack')
    args = parser.parse_args()

    try:
        print("Loading the tokenization model ...")
        nltk.download('punkt')

        print("Loading the NER model ...")
        model = load_pretrained_model(args.m)
        ner = PolDeepNer(model)

        print("ready.")

        text = " ".join(codecs.open(args.i, "r", "utf8").readlines())
        tokens = word_tokenize(text)
        labels = ner.process_sentence(tokens)
        offsets = align_tokens_to_text([tokens], text)

        for an in wrap_annotations([labels]):
            begin = offsets[an.token_ids[0]][0]
            end = offsets[an.token_ids[-1]][1]
            orth = text[begin:end]

            print("[%3s:%3s] %-20s %s" % (begin, end, an.annotation, orth))
Esempio n. 9
0
parser = argparse.ArgumentParser(
    description=
    'Process IOB file, recognize NE and save the output to another IOB file.')
parser.add_argument('-i', required=True, metavar='PATH', help='input IOB file')
parser.add_argument('-m',
                    required=True,
                    metavar='PATH',
                    help='path to the model')
parser.add_argument('-o',
                    required=True,
                    metavar='PATH',
                    help='output IOB file')

args = parser.parse_args()

ner = PolDeepNer(args.m)


def process_file(input, output, ner):
    with codecs.open(input, "r", "utf8") as f:
        fo = codecs.open(output, "w", "utf8")
        lines, words = [], []
        for line in f:
            line = line.rstrip()
            if "-DOCSTART " in line:
                fo.write(line + "\n")
                pass
            elif line:
                cols = line.split('\t')
                words.append(cols[0])
                lines.append("\t".join(cols[:-1]))