Exemple #1
0
def chunkstring_spacy(text):
    chunck_sentences = []
    nlp = Portuguese()
    nlp.add_pipe(nlp.create_pipe('sentencizer'))
    doc = nlp(text)
    for sent in doc.sents:
        chunck_sentences.append('>>en<<' + ' ' + sent.text)

    return chunck_sentences
Exemple #2
0
import json
from pathlib import Path

from spacy.lang.pt import Portuguese

nlp = Portuguese()

ruler = nlp.add_pipe("entity_ruler")
patterns = json.load(open("data/states/states_label.json"))

ruler.add_patterns(patterns)

Path("models/").mkdir(parents=True, exist_ok=True)
nlp.to_disk("models/pt_core_news_sm_addresses")