Beispiel #1
0
def getSentences(text):
    nlp = Spanish()
    nlp.add_pipe(nlp.create_pipe('sentencizer'))
    document = nlp(text)
    return [sent.string.strip() for sent in document.sents]
Beispiel #2
0
    # Output the annotation file
    annotation_filepath = './evaluation/NER/' + doc[:-3] + 'ann'

    with open(annotation_filepath, 'w', encoding='utf-8') as annotation_file:
        annotation_file.write(output)
        annotation_file.close()


if __name__ == "__main__":
    # load the model you trained
    model = SequenceTagger.load(
        'resources/taggers/medium_updated/final-model.pt')

    ## Create spanish sentence segmenter with Spacy
    # the sentence segmentation by spacy is non-destructive, i.e., the empty lines are considered when getting a span of a given word/entity
    nlp = Spanish()
    sentencizer = nlp.create_pipe("sentencizer")
    nlp.add_pipe(sentencizer)

    test_dir = "./data/datasets/test-background-set-to-publish/"

    if not os.path.exists("./evaluation/NER/"):
        os.makedirs("./evaluation/NER/")

    for doc in os.listdir(
            test_dir
    ):  #For each document in test_dir build the respective annotation file with predicted entities
        doc_filepath = test_dir + doc
        build_annotation_file(doc, doc_filepath, model)