def main(path, name="bert-base-uncased", lang="en"):
    msg = Printer()
    msg.info(f"Creating model for '{name}' ({lang})")
    with msg.loading(f"Setting up the pipeline..."):
        nlp = PyTT_Language(pytt_name=name, meta={"lang": lang})
        nlp.add_pipe(nlp.create_pipe("sentencizer"))
        nlp.add_pipe(PyTT_WordPiecer.from_pretrained(nlp.vocab, name))
        nlp.add_pipe(PyTT_TokenVectorEncoder.from_pretrained(nlp.vocab, name))
    msg.good("Initialized the model pipeline")
    nlp.to_disk(path)
    msg.good(f"Saved '{name}' ({lang})")
    msg.text(f"Pipeline: {nlp.pipe_names}")
    msg.text(f"Location: {path}")
Esempio n. 2
0
from spacy_pytorch_transformers import PyTT_Language, PyTT_WordPiecer, PyTT_TokenVectorEncoder
from pathlib import Path

pytorch_path = str(Path.home() / "pytorch-rubert")
spacy_path = str(Path.home() / "spacy-rubert")
name = "ru_pytt_rubert_cased"

nlp = PyTT_Language(pytt_name=name, meta={"lang": "ru"})
nlp.add_pipe(nlp.create_pipe("sentencizer"))
nlp.add_pipe(PyTT_WordPiecer.from_pretrained(nlp.vocab, pytorch_path))
nlp.add_pipe(PyTT_TokenVectorEncoder.from_pretrained(nlp.vocab, pytorch_path))
print(nlp.pipe_names)
nlp.to_disk(spacy_path)