コード例 #1
0
def run_cli_loop(ner):
    from process_poleval import align_tokens_to_text
    from utils import wrap_annotations

    while True:
        text = input("Enter text to process: ").strip().replace("\"", "'")

        if len(text) == 0:
            print("closing...")
            break

        try:
            tokens = word_tokenize(text)
            labels = ner.process_sentence(tokens)
            offsets = align_tokens_to_text([tokens], text)

            for an in wrap_annotations([labels]):
                begin = offsets[an.token_ids[0]][0]
                end = offsets[an.token_ids[-1]][1]
                orth = text[begin:end]

                print("[%3s:%3s] %-20s %s" % (begin, end, an.annotation, orth))

        except Exception as e:
            print("Failed to process the text due the following error: %s" % e)
コード例 #2
0
ファイル: process_poleval.py プロジェクト: ptomaszewska/ner
def get_poleval_dict(doc_id, text, sentences, labels):
    ''' Returns PolEval dict
    {
        text:
        id:
        answers:
    }
    Note that arguments it takes is FILE, PATH, FILE as utils.load_data_and_labels opens file itself
    '''
    annotations = wrap_annotations(labels)
    offsets = align_tokens_to_text(sentences, text)
    answers = []
    for an in annotations:
        begin = offsets[an.token_ids[0]][0]
        end = offsets[an.token_ids[-1]][1]
        orth = text[begin:end]
        answers.append("%s %d %d\t%s" %
                       (an.annotation.replace("-", "_"), begin, end, orth))
    return ({'text': text, 'id': doc_id, 'answers': "\n".join(answers)})
コード例 #3
0
    parser = argparse.ArgumentParser(
        description='Process IOB file, recognize NE and save the output to another IOB file.')
    parser.add_argument('-i', required=True, metavar='PATH', help='path to a plain text')
    parser.add_argument('-m', required=False, metavar='NAME', help='name of a model pack')
    args = parser.parse_args()

    try:
        print("Loading the tokenization model ...")
        nltk.download('punkt')

        print("Loading the NER model ...")
        model = load_pretrained_model(args.m)
        ner = PolDeepNer(model)

        print("ready.")

        text = " ".join(codecs.open(args.i, "r", "utf8").readlines())
        tokens = word_tokenize(text)
        labels = ner.process_sentence(tokens)
        offsets = align_tokens_to_text([tokens], text)

        for an in wrap_annotations([labels]):
            begin = offsets[an.token_ids[0]][0]
            end = offsets[an.token_ids[-1]][1]
            orth = text[begin:end]

            print("[%3s:%3s] %-20s %s" % (begin, end, an.annotation, orth))

    except Exception as e:
        print("[ERROR] %s" % str(e))