def test_kb_to_bytes(): # Test that the KB's to_bytes method works correctly nlp = English() kb_1 = KnowledgeBase(nlp.vocab, entity_vector_length=3) kb_1.add_entity(entity="Q2146908", freq=12, entity_vector=[6, -4, 3]) kb_1.add_entity(entity="Q66", freq=9, entity_vector=[1, 2, 3]) kb_1.add_alias(alias="Russ Cochran", entities=["Q2146908"], probabilities=[0.8]) kb_1.add_alias(alias="Boeing", entities=["Q66"], probabilities=[0.5]) kb_1.add_alias(alias="Randomness", entities=["Q66", "Q2146908"], probabilities=[0.1, 0.2]) assert kb_1.contains_alias("Russ Cochran") kb_bytes = kb_1.to_bytes() kb_2 = KnowledgeBase(nlp.vocab, entity_vector_length=3) assert not kb_2.contains_alias("Russ Cochran") kb_2 = kb_2.from_bytes(kb_bytes) # check that both KBs are exactly the same assert kb_1.get_size_entities() == kb_2.get_size_entities() assert kb_1.entity_vector_length == kb_2.entity_vector_length assert kb_1.get_entity_strings() == kb_2.get_entity_strings() assert kb_1.get_vector("Q2146908") == kb_2.get_vector("Q2146908") assert kb_1.get_vector("Q66") == kb_2.get_vector("Q66") assert kb_2.contains_alias("Russ Cochran") assert kb_1.get_size_aliases() == kb_2.get_size_aliases() assert kb_1.get_alias_strings() == kb_2.get_alias_strings() assert len(kb_1.get_alias_candidates("Russ Cochran")) == len( kb_2.get_alias_candidates("Russ Cochran")) assert len(kb_1.get_alias_candidates("Randomness")) == len( kb_2.get_alias_candidates("Randomness"))
def test_kb_pickle(): # Test that the KB can be pickled nlp = English() kb_1 = KnowledgeBase(nlp.vocab, entity_vector_length=3) kb_1.add_entity(entity="Q2146908", freq=12, entity_vector=[6, -4, 3]) assert not kb_1.contains_alias("Russ Cochran") kb_1.add_alias(alias="Russ Cochran", entities=["Q2146908"], probabilities=[0.8]) assert kb_1.contains_alias("Russ Cochran") data = pickle.dumps(kb_1) kb_2 = pickle.loads(data) assert kb_2.contains_alias("Russ Cochran")
def test_issue6730(en_vocab): """Ensure that the KB does not accept empty strings, but otherwise IO works fine.""" from spacy.kb import KnowledgeBase kb = KnowledgeBase(en_vocab, entity_vector_length=3) kb.add_entity(entity="1", freq=148, entity_vector=[1, 2, 3]) with pytest.raises(ValueError): kb.add_alias(alias="", entities=["1"], probabilities=[0.4]) assert kb.contains_alias("") is False kb.add_alias(alias="x", entities=["1"], probabilities=[0.2]) kb.add_alias(alias="y", entities=["1"], probabilities=[0.1]) with make_tempdir() as tmp_dir: kb.to_disk(tmp_dir) kb.from_disk(tmp_dir) assert kb.get_size_aliases() == 2 assert set(kb.get_alias_strings()) == {"x", "y"}