def test_serialize_language_exclude(meta_data): name = "name-in-fixture" nlp = Language(meta=meta_data) assert nlp.meta["name"] == name new_nlp = Language().from_bytes(nlp.to_bytes()) assert new_nlp.meta["name"] == name new_nlp = Language().from_bytes(nlp.to_bytes(), exclude=["meta"]) assert not new_nlp.meta["name"] == name new_nlp = Language().from_bytes(nlp.to_bytes(exclude=["meta"])) assert not new_nlp.meta["name"] == name
def test_serialize_language_exclude(meta_data): name = "name-in-fixture" nlp = Language(meta=meta_data) assert nlp.meta["name"] == name new_nlp = Language().from_bytes(nlp.to_bytes()) assert nlp.meta["name"] == name new_nlp = Language().from_bytes(nlp.to_bytes(), exclude=["meta"]) assert not new_nlp.meta["name"] == name new_nlp = Language().from_bytes(nlp.to_bytes(exclude=["meta"])) assert not new_nlp.meta["name"] == name with pytest.raises(ValueError): nlp.to_bytes(meta=False) with pytest.raises(ValueError): Language().from_bytes(nlp.to_bytes(), meta=False)
def test_serialize_language_exclude(meta_data): name = "name-in-fixture" nlp = Language(meta=meta_data) assert nlp.meta["name"] == name new_nlp = Language().from_bytes(nlp.to_bytes()) assert nlp.meta["name"] == name new_nlp = Language().from_bytes(nlp.to_bytes(), exclude=["meta"]) assert not new_nlp.meta["name"] == name new_nlp = Language().from_bytes(nlp.to_bytes(exclude=["meta"])) assert not new_nlp.meta["name"] == name with pytest.raises(ValueError): nlp.to_bytes(meta=False) with pytest.raises(ValueError): Language().from_bytes(nlp.to_bytes(), meta=False)
def test_lemmatizer_reflects_lookups_changes(): """Test for an issue that'd cause lookups available in a model loaded from disk to not be reflected in the lemmatizer.""" nlp = Language() assert Doc(nlp.vocab, words=["foo"])[0].lemma_ == "foo" table = nlp.vocab.lookups.add_table("lemma_lookup") table["foo"] = "bar" assert Doc(nlp.vocab, words=["foo"])[0].lemma_ == "bar" table = nlp.vocab.lookups.get_table("lemma_lookup") table["hello"] = "world" # The update to the table should be reflected in the lemmatizer assert Doc(nlp.vocab, words=["hello"])[0].lemma_ == "world" new_nlp = Language() table = new_nlp.vocab.lookups.add_table("lemma_lookup") table["hello"] = "hi" assert Doc(new_nlp.vocab, words=["hello"])[0].lemma_ == "hi" nlp_bytes = nlp.to_bytes() new_nlp.from_bytes(nlp_bytes) # Make sure we have the previously saved lookup table assert len(new_nlp.vocab.lookups) == 1 assert len(new_nlp.vocab.lookups.get_table("lemma_lookup")) == 2 assert new_nlp.vocab.lookups.get_table("lemma_lookup")["hello"] == "world" assert Doc(new_nlp.vocab, words=["foo"])[0].lemma_ == "bar" assert Doc(new_nlp.vocab, words=["hello"])[0].lemma_ == "world"