def chunkstring_spacy(text): chunck_sentences = [] nlp = Portuguese() nlp.add_pipe(nlp.create_pipe('sentencizer')) doc = nlp(text) for sent in doc.sents: chunck_sentences.append('>>en<<' + ' ' + sent.text) return chunck_sentences
import json from pathlib import Path from spacy.lang.pt import Portuguese nlp = Portuguese() ruler = nlp.add_pipe("entity_ruler") patterns = json.load(open("data/states/states_label.json")) ruler.add_patterns(patterns) Path("models/").mkdir(parents=True, exist_ok=True) nlp.to_disk("models/pt_core_news_sm_addresses")