Example #1
0
def test_language_to_from_disk(nlp, name):
    doc = nlp("hello world")
    assert is_valid_tensor(doc.tensor)
    with make_tempdir() as tempdir:
        nlp.to_disk(tempdir)
        new_nlp = PyTT_Language()
        new_nlp.add_pipe(new_nlp.create_pipe("sentencizer"))
        wordpiecer = PyTT_WordPiecer(new_nlp.vocab, pytt_name=name)
        tok2vec = PyTT_TokenVectorEncoder(new_nlp.vocab, pytt_name=name)
        new_nlp.add_pipe(wordpiecer)
        new_nlp.add_pipe(tok2vec)
        new_nlp.from_disk(tempdir)
    assert new_nlp.pipe_names == nlp.pipe_names
    new_doc = new_nlp("hello world")
    assert is_valid_tensor(new_doc.tensor)
    assert_equal(doc.tensor, new_doc.tensor)
Example #2
0
def main(path, name="bert-base-uncased", lang="en"):
    msg = Printer()
    msg.info(f"Creating model for '{name}' ({lang})")
    with msg.loading(f"Setting up the pipeline..."):
        nlp = PyTT_Language(pytt_name=name, meta={"lang": lang})
        nlp.add_pipe(nlp.create_pipe("sentencizer"))
        nlp.add_pipe(PyTT_WordPiecer.from_pretrained(nlp.vocab, name))
        nlp.add_pipe(PyTT_TokenVectorEncoder.from_pretrained(nlp.vocab, name))
    msg.good("Initialized the model pipeline")
    nlp.to_disk(path)
    msg.good(f"Saved '{name}' ({lang})")
    msg.text(f"Pipeline: {nlp.pipe_names}")
    msg.text(f"Location: {path}")
    with msg.loading("Verifying model loads..."):
        nlp.from_disk(path)
    msg.good("Model loads!")