Esempi in Python per KnowledgeBase.add_entity, esempi in Python per spacy.kb.KnowledgeBase.add_entity

Esempio n. 1

0

Mostra file

File: test_entity_linker.py Progetto: paolodedios/spaCy

def test_kb_to_bytes():
    # Test that the KB's to_bytes method works correctly
    nlp = English()
    kb_1 = KnowledgeBase(nlp.vocab, entity_vector_length=3)
    kb_1.add_entity(entity="Q2146908", freq=12, entity_vector=[6, -4, 3])
    kb_1.add_entity(entity="Q66", freq=9, entity_vector=[1, 2, 3])
    kb_1.add_alias(alias="Russ Cochran",
                   entities=["Q2146908"],
                   probabilities=[0.8])
    kb_1.add_alias(alias="Boeing", entities=["Q66"], probabilities=[0.5])
    kb_1.add_alias(alias="Randomness",
                   entities=["Q66", "Q2146908"],
                   probabilities=[0.1, 0.2])
    assert kb_1.contains_alias("Russ Cochran")
    kb_bytes = kb_1.to_bytes()
    kb_2 = KnowledgeBase(nlp.vocab, entity_vector_length=3)
    assert not kb_2.contains_alias("Russ Cochran")
    kb_2 = kb_2.from_bytes(kb_bytes)
    # check that both KBs are exactly the same
    assert kb_1.get_size_entities() == kb_2.get_size_entities()
    assert kb_1.entity_vector_length == kb_2.entity_vector_length
    assert kb_1.get_entity_strings() == kb_2.get_entity_strings()
    assert kb_1.get_vector("Q2146908") == kb_2.get_vector("Q2146908")
    assert kb_1.get_vector("Q66") == kb_2.get_vector("Q66")
    assert kb_2.contains_alias("Russ Cochran")
    assert kb_1.get_size_aliases() == kb_2.get_size_aliases()
    assert kb_1.get_alias_strings() == kb_2.get_alias_strings()
    assert len(kb_1.get_alias_candidates("Russ Cochran")) == len(
        kb_2.get_alias_candidates("Russ Cochran"))
    assert len(kb_1.get_alias_candidates("Randomness")) == len(
        kb_2.get_alias_candidates("Randomness"))

Esempio n. 2

0

Mostra file

File: test_entity_linker.py Progetto: paolodedios/spaCy

 def create_kb(vocab):
     kb = KnowledgeBase(vocab, entity_vector_length=3)
     kb.add_entity(entity="Q2146908", freq=12, entity_vector=[6, -4, 3])
     kb.add_alias(alias="Russ Cochran",
                  entities=["Q2146908"],
                  probabilities=[0.8])
     return kb

Esempio n. 3

0

Mostra file

def test_candidate_generation(nlp):
    """Test correct candidate generation"""
    mykb = KnowledgeBase(nlp.vocab, entity_vector_length=1)

    # adding entities
    mykb.add_entity(entity="Q1", freq=27, entity_vector=[1])
    mykb.add_entity(entity="Q2", freq=12, entity_vector=[2])
    mykb.add_entity(entity="Q3", freq=5, entity_vector=[3])

    # adding aliases
    mykb.add_alias(alias="douglas",
                   entities=["Q2", "Q3"],
                   probabilities=[0.8, 0.1])
    mykb.add_alias(alias="adam", entities=["Q2"], probabilities=[0.9])

    # test the size of the relevant candidates
    assert len(mykb.get_candidates("douglas")) == 2
    assert len(mykb.get_candidates("adam")) == 1
    assert len(mykb.get_candidates("shrubbery")) == 0

    # test the content of the candidates
    assert mykb.get_candidates("adam")[0].entity_ == "Q2"
    assert mykb.get_candidates("adam")[0].alias_ == "adam"
    assert_almost_equal(mykb.get_candidates("adam")[0].entity_freq, 12)
    assert_almost_equal(mykb.get_candidates("adam")[0].prior_prob, 0.9)

Esempio n. 4

0

Mostra file

def test_append_alias(nlp):
    """Test that we can append additional alias-entity pairs"""
    mykb = KnowledgeBase(nlp.vocab, entity_vector_length=1)

    # adding entities
    mykb.add_entity(entity="Q1", freq=27, entity_vector=[1])
    mykb.add_entity(entity="Q2", freq=12, entity_vector=[2])
    mykb.add_entity(entity="Q3", freq=5, entity_vector=[3])

    # adding aliases
    mykb.add_alias(alias="douglas",
                   entities=["Q2", "Q3"],
                   probabilities=[0.4, 0.1])
    mykb.add_alias(alias="adam", entities=["Q2"], probabilities=[0.9])

    # test the size of the relevant candidates
    assert len(mykb.get_alias_candidates("douglas")) == 2

    # append an alias
    mykb.append_alias(alias="douglas", entity="Q1", prior_prob=0.2)

    # test the size of the relevant candidates has been incremented
    assert len(mykb.get_alias_candidates("douglas")) == 3

    # append the same alias-entity pair again should not work (will throw a warning)
    with pytest.warns(UserWarning):
        mykb.append_alias(alias="douglas", entity="Q1", prior_prob=0.3)

    # test the size of the relevant candidates remained unchanged
    assert len(mykb.get_alias_candidates("douglas")) == 3

Esempio n. 5

0

Mostra file

def test_candidate_generation(nlp):
    """Test correct candidate generation"""
    mykb = KnowledgeBase(nlp.vocab, entity_vector_length=1)
    doc = nlp("douglas adam Adam shrubbery")

    douglas_ent = doc[0:1]
    adam_ent = doc[1:2]
    Adam_ent = doc[2:3]
    shrubbery_ent = doc[3:4]

    # adding entities
    mykb.add_entity(entity="Q1", freq=27, entity_vector=[1])
    mykb.add_entity(entity="Q2", freq=12, entity_vector=[2])
    mykb.add_entity(entity="Q3", freq=5, entity_vector=[3])

    # adding aliases
    mykb.add_alias(alias="douglas",
                   entities=["Q2", "Q3"],
                   probabilities=[0.8, 0.1])
    mykb.add_alias(alias="adam", entities=["Q2"], probabilities=[0.9])

    # test the size of the relevant candidates
    assert len(get_candidates(mykb, douglas_ent)) == 2
    assert len(get_candidates(mykb, adam_ent)) == 1
    assert len(get_candidates(mykb, Adam_ent)) == 0  # default case sensitive
    assert len(get_candidates(mykb, shrubbery_ent)) == 0

    # test the content of the candidates
    assert get_candidates(mykb, adam_ent)[0].entity_ == "Q2"
    assert get_candidates(mykb, adam_ent)[0].alias_ == "adam"
    assert_almost_equal(get_candidates(mykb, adam_ent)[0].entity_freq, 12)
    assert_almost_equal(get_candidates(mykb, adam_ent)[0].prior_prob, 0.9)

Esempio n. 6

0

Mostra file

def test_vocab_serialization(nlp):
    """Test that string information is retained across storage"""
    mykb = KnowledgeBase(nlp.vocab, entity_vector_length=1)

    # adding entities
    mykb.add_entity(entity="Q1", freq=27, entity_vector=[1])
    q2_hash = mykb.add_entity(entity="Q2", freq=12, entity_vector=[2])
    mykb.add_entity(entity="Q3", freq=5, entity_vector=[3])

    # adding aliases
    mykb.add_alias(alias="douglas",
                   entities=["Q2", "Q3"],
                   probabilities=[0.4, 0.1])
    adam_hash = mykb.add_alias(alias="adam",
                               entities=["Q2"],
                               probabilities=[0.9])

    candidates = mykb.get_alias_candidates("adam")
    assert len(candidates) == 1
    assert candidates[0].entity == q2_hash
    assert candidates[0].entity_ == "Q2"
    assert candidates[0].alias == adam_hash
    assert candidates[0].alias_ == "adam"

    with make_tempdir() as d:
        mykb.to_disk(d / "kb")
        kb_new_vocab = KnowledgeBase(Vocab(), entity_vector_length=1)
        kb_new_vocab.from_disk(d / "kb")

        candidates = kb_new_vocab.get_alias_candidates("adam")
        assert len(candidates) == 1
        assert candidates[0].entity == q2_hash
        assert candidates[0].entity_ == "Q2"
        assert candidates[0].alias == adam_hash
        assert candidates[0].alias_ == "adam"

Esempio n. 7

0

Mostra file

def test_kb_serialization():
    # Test that the KB can be used in a pipeline with a different vocab
    vector_length = 3
    with make_tempdir() as tmp_dir:
        kb_dir = tmp_dir / "kb"
        nlp1 = English()
        assert "Q2146908" not in nlp1.vocab.strings
        mykb = KnowledgeBase(nlp1.vocab, entity_vector_length=vector_length)
        mykb.add_entity(entity="Q2146908", freq=12, entity_vector=[6, -4, 3])
        mykb.add_alias(alias="Russ Cochran",
                       entities=["Q2146908"],
                       probabilities=[0.8])
        assert "Q2146908" in nlp1.vocab.strings
        mykb.to_disk(kb_dir)

        nlp2 = English()
        assert "RandomWord" not in nlp2.vocab.strings
        nlp2.vocab.strings.add("RandomWord")
        assert "RandomWord" in nlp2.vocab.strings
        assert "Q2146908" not in nlp2.vocab.strings

        # Create the Entity Linker component with the KB from file, and check the final vocab
        entity_linker = nlp2.add_pipe("entity_linker", last=True)
        entity_linker.set_kb(load_kb(kb_dir))
        assert "Q2146908" in nlp2.vocab.strings
        assert "RandomWord" in nlp2.vocab.strings

Esempio n. 8

0

Mostra file

def create_kb():
    """ Step 1: create the Knowledge Base in spaCy and write it to file """
    nlp = spacy.load("en_core_web_lg")
    name_dict, desc_dict = load_entities()

    kb = KnowledgeBase(vocab=nlp.vocab, entity_vector_length=300)

    for qid, desc in desc_dict.items():
        desc_doc = nlp(desc)
        desc_enc = desc_doc.vector
        kb.add_entity(entity=qid, entity_vector=desc_enc, freq=342)  # 342 is an arbitrary value here

    for qid, name in name_dict.items():
        kb.add_alias(alias=name, entities=[qid], probabilities=[1])  # 100% prior probability P(entity|alias)

    qids = name_dict.keys()
    probs = [0.3 for qid in qids]
    kb.add_alias(alias="Emerson", entities=qids, probabilities=probs)  # sum([probs]) should be <= 1 !

    print(f"Entities in the KB: {kb.get_entity_strings()}")
    print(f"Aliases in the KB: {kb.get_alias_strings()}")
    print()

    if not os.path.exists(output_dir):
        os.mkdir(output_dir)
    kb.dump(output_dir / "my_kb")
    nlp.to_disk(output_dir / "my_nlp")

Esempio n. 9

0

Mostra file

def test_kb_valid_entities(nlp):
    """Test the valid construction of a KB with 3 entities and two aliases"""
    mykb = KnowledgeBase(nlp.vocab, entity_vector_length=3)

    # adding entities
    mykb.add_entity(entity="Q1", freq=0.9, entity_vector=[8, 4, 3])
    mykb.add_entity(entity="Q2", freq=0.5, entity_vector=[2, 1, 0])
    mykb.add_entity(entity="Q3", freq=0.5, entity_vector=[-1, -6, 5])

    # adding aliases
    mykb.add_alias(alias="douglas", entities=["Q2", "Q3"], probabilities=[0.8, 0.2])
    mykb.add_alias(alias="adam", entities=["Q2"], probabilities=[0.9])

    # test the size of the corresponding KB
    assert mykb.get_size_entities() == 3
    assert mykb.get_size_aliases() == 2

    # test retrieval of the entity vectors
    assert mykb.get_vector("Q1") == [8, 4, 3]
    assert mykb.get_vector("Q2") == [2, 1, 0]
    assert mykb.get_vector("Q3") == [-1, -6, 5]

    # test retrieval of prior probabilities
    assert_almost_equal(mykb.get_prior_prob(entity="Q2", alias="douglas"), 0.8)
    assert_almost_equal(mykb.get_prior_prob(entity="Q3", alias="douglas"), 0.2)
    assert_almost_equal(mykb.get_prior_prob(entity="Q342", alias="douglas"), 0.0)
    assert_almost_equal(mykb.get_prior_prob(entity="Q3", alias="douglassssss"), 0.0)

Esempio n. 10

0

Mostra file

 def create_kb(vocab):
     kb = KnowledgeBase(vocab, entity_vector_length=1)
     kb.add_entity(entity="Q2", freq=12, entity_vector=[2])
     kb.add_entity(entity="Q3", freq=5, entity_vector=[3])
     kb.add_alias(alias="douglas",
                  entities=["Q2", "Q3"],
                  probabilities=[0.8, 0.1])
     return kb

Esempio n. 11

0

Mostra file

File: test_entity_linker.py Progetto: nptdat/spaCy

 def create_kb(vocab):
     # create artificial KB
     mykb = KnowledgeBase(vocab, entity_vector_length=vector_length)
     mykb.add_entity(entity="Q613241", freq=12, entity_vector=[6, -4, 3])
     mykb.add_alias("Kirby", ["Q613241"], [0.9])
     # Placeholder
     mykb.add_entity(entity="pink", freq=12, entity_vector=[7, 2, -5])
     mykb.add_alias("pink", ["pink"], [0.9])
     return mykb

Esempio n. 12

0

Mostra file

 def create_kb(vocab):
     mykb = KnowledgeBase(vocab, entity_vector_length=vector_length)
     # adding entities
     mykb.add_entity(entity="Q1", freq=19, entity_vector=[1])
     mykb.add_entity(entity="Q2", freq=8, entity_vector=[1])
     # adding aliases
     mykb.add_alias(alias="Boston", entities=["Q1"], probabilities=[0.7])
     mykb.add_alias(alias="Denver", entities=["Q2"], probabilities=[0.6])
     return mykb

Esempio n. 13

0

Mostra file

File: test_entity_linker.py Progetto: nptdat/spaCy

 def create_kb(vocab):
     mykb = KnowledgeBase(vocab, entity_vector_length=vector_length)
     mykb.add_entity(entity="Q2146908", freq=12, entity_vector=[6, -4, 3])
     mykb.add_entity(entity="Q7381115", freq=12, entity_vector=[9, 1, -7])
     mykb.add_alias(
         alias="Russ Cochran",
         entities=["Q2146908", "Q7381115"],
         probabilities=[0.5, 0.5],
     )
     return mykb

Esempio n. 14

0

Mostra file

def test_kb_invalid_entity_vector(nlp):
    """Test the invalid construction of a KB with non-matching entity vector lengths"""
    mykb = KnowledgeBase(nlp.vocab, entity_vector_length=3)

    # adding entities
    mykb.add_entity(entity="Q1", freq=0.9, entity_vector=[1, 2, 3])

    # this should fail because the kb's expected entity vector length is 3
    with pytest.raises(ValueError):
        mykb.add_entity(entity="Q2", freq=0.2, entity_vector=[2])

Esempio n. 15

0

Mostra file

File: test_entity_linker.py Progetto: svlandeg/spaCy

 def create_kb(vocab):
     # create artificial KB
     mykb = KnowledgeBase(vocab, entity_vector_length=3)
     mykb.add_entity(entity=entity_id, freq=12, entity_vector=[6, -4, 3])
     mykb.add_alias(
         alias="Mahler",
         entities=[entity_id],
         probabilities=[1 if meet_threshold else 0.01],
     )
     return mykb

Esempio n. 16

0

Mostra file

File: test_el.py Progetto: spacy-io/spaCy

def test_kb_invalid_combination(nlp):
    """Test the invalid construction of a KB with non-matching entity and probability lists"""
    mykb = KnowledgeBase(nlp.vocab)

    # adding entities
    mykb.add_entity(entity=u'Q1', prob=0.9)
    mykb.add_entity(entity=u'Q2', prob=0.2)
    mykb.add_entity(entity=u'Q3', prob=0.5)

    # adding aliases - should fail because the entities and probabilities vectors are not of equal length
    with pytest.raises(ValueError):
        mykb.add_alias(alias=u'douglas', entities=[u'Q2', u'Q3'], probabilities=[0.3, 0.4, 0.1])

Esempio n. 17

0

Mostra file

File: test_el.py Progetto: spacy-io/spaCy

def test_kb_invalid_entities(nlp):
    """Test the invalid construction of a KB with an alias linked to a non-existing entity"""
    mykb = KnowledgeBase(nlp.vocab)

    # adding entities
    mykb.add_entity(entity=u'Q1', prob=0.9)
    mykb.add_entity(entity=u'Q2', prob=0.2)
    mykb.add_entity(entity=u'Q3', prob=0.5)

    # adding aliases - should fail because one of the given IDs is not valid
    with pytest.raises(ValueError):
        mykb.add_alias(alias=u'douglas', entities=[u'Q2', u'Q342'], probabilities=[0.8, 0.2])

Esempio n. 18

0

Mostra file

File: test_el.py Progetto: spacy-io/spaCy

def test_kb_invalid_probabilities(nlp):
    """Test the invalid construction of a KB with wrong prior probabilities"""
    mykb = KnowledgeBase(nlp.vocab)

    # adding entities
    mykb.add_entity(entity=u'Q1', prob=0.9)
    mykb.add_entity(entity=u'Q2', prob=0.2)
    mykb.add_entity(entity=u'Q3', prob=0.5)

    # adding aliases - should fail because the sum of the probabilities exceeds 1
    with pytest.raises(ValueError):
        mykb.add_alias(alias=u'douglas', entities=[u'Q2', u'Q3'], probabilities=[0.8, 0.4])

Esempio n. 19

0

Mostra file

def test_kb_invalid_probabilities(nlp):
    """Test the invalid construction of a KB with wrong prior probabilities"""
    mykb = KnowledgeBase(nlp.vocab, entity_vector_length=1)

    # adding entities
    mykb.add_entity(entity="Q1", freq=0.9, entity_vector=[1])
    mykb.add_entity(entity="Q2", freq=0.2, entity_vector=[2])
    mykb.add_entity(entity="Q3", freq=0.5, entity_vector=[3])

    # adding aliases - should fail because the sum of the probabilities exceeds 1
    with pytest.raises(ValueError):
        mykb.add_alias(alias="douglas", entities=["Q2", "Q3"], probabilities=[0.8, 0.4])

Esempio n. 20

0

Mostra file

 def create_kb(vocab):
     # create artificial KB - assign same prior weight to the two russ cochran's
     # Q2146908 (Russ Cochran): American golfer
     # Q7381115 (Russ Cochran): publisher
     mykb = KnowledgeBase(vocab, entity_vector_length=vector_length)
     mykb.add_entity(entity="Q2146908", freq=12, entity_vector=[6, -4, 3])
     mykb.add_entity(entity="Q7381115", freq=12, entity_vector=[9, 1, -7])
     mykb.add_alias(
         alias="Russ Cochran",
         entities=["Q2146908", "Q7381115"],
         probabilities=[0.5, 0.5],
     )
     return mykb

Esempio n. 21

0

Mostra file

File: test_entity_linker.py Progetto: paolodedios/spaCy

def test_kb_pickle():
    # Test that the KB can be pickled
    nlp = English()
    kb_1 = KnowledgeBase(nlp.vocab, entity_vector_length=3)
    kb_1.add_entity(entity="Q2146908", freq=12, entity_vector=[6, -4, 3])
    assert not kb_1.contains_alias("Russ Cochran")
    kb_1.add_alias(alias="Russ Cochran",
                   entities=["Q2146908"],
                   probabilities=[0.8])
    assert kb_1.contains_alias("Russ Cochran")
    data = pickle.dumps(kb_1)
    kb_2 = pickle.loads(data)
    assert kb_2.contains_alias("Russ Cochran")

Esempio n. 22

0

Mostra file

def test_kb_invalid_entities(nlp):
    """Test the invalid construction of a KB with an alias linked to a non-existing entity"""
    mykb = KnowledgeBase(nlp.vocab)

    # adding entities
    mykb.add_entity(entity=u'Q1', prob=0.9)
    mykb.add_entity(entity=u'Q2', prob=0.2)
    mykb.add_entity(entity=u'Q3', prob=0.5)

    # adding aliases - should fail because one of the given IDs is not valid
    with pytest.raises(ValueError):
        mykb.add_alias(alias=u'douglas',
                       entities=[u'Q2', u'Q342'],
                       probabilities=[0.8, 0.2])

Esempio n. 23

0

Mostra file

File: entity_linking.py Progetto: ToddMorrill/tutorials

def create_kb(kb_dir='sample_kb', nlp_dir='sample_nlp'):
    nlp = spacy.load('en_core_web_lg')
    text = 'Tennis champion Emerson was expected to win Wimbledon.'
    doc = nlp(text)
    file_path = 'entities.csv'
    name_dict, desc_dict = load_entities(file_path)

    sample_qid, sample_desc = list(desc_dict.items())[0]
    sample_doc = nlp(sample_desc)
    entity_vector_length = len(sample_doc.vector)  # should be 300
    kb = KnowledgeBase(vocab=nlp.vocab,
                       entity_vector_length=entity_vector_length)

    for qid, desc in desc_dict.items():
        desc_doc = nlp(desc)
        desc_enc = desc_doc.vector
        # NB: entity_vector could be any encoding
        # freq is the count of times the word appears in the corpus
        # not used in this tutorial
        kb.add_entity(entity=qid, entity_vector=desc_enc, freq=42)

    # add provided alias
    for qid, name in name_dict.items():
        # probabilities is P(entity|alias) = 1.0
        # we assume that each alias only maps to one entity
        kb.add_alias(alias=name, entities=[qid], probabilities=[1])

    # add additional alias with equal probability
    # this could be learned from data
    qids = name_dict.keys()
    probs = [0.3 for qid in qids]
    kb.add_alias(alias='Emerson', entities=qids, probabilities=probs)

    print(f'Entities in the KB: {kb.get_entity_strings()}')
    print(f'Aliases in the KB: {kb.get_alias_strings()}')
    print()
    # questions here are:
    # 1) what matching function is being used? - is this deterministic?
    # 2) what threshold is being used to determine how many candidates are presented?
    entities = [
        c.entity_ for c in kb.get_alias_candidates('Roy Stanley Emerson')
    ]
    print(f'Candidates for \'Roy Stanley Emerson\': {entities}')
    entities = [c.entity_ for c in kb.get_alias_candidates('Emerson')]
    print(f'Candidates for \'Emerson\': {entities}')
    entities = [c.entity_ for c in kb.get_alias_candidates('Todd')]
    print(f'Candidates for \'Todd\': {entities}')

    kb.to_disk(kb_dir)
    nlp.to_disk(nlp_dir)

Esempio n. 24

0

Mostra file

def test_kb_invalid_combination(nlp):
    """Test the invalid construction of a KB with non-matching entity and probability lists"""
    mykb = KnowledgeBase(nlp.vocab, entity_vector_length=1)

    # adding entities
    mykb.add_entity(entity="Q1", freq=0.9, entity_vector=[1])
    mykb.add_entity(entity="Q2", freq=0.2, entity_vector=[2])
    mykb.add_entity(entity="Q3", freq=0.5, entity_vector=[3])

    # adding aliases - should fail because the entities and probabilities vectors are not of equal length
    with pytest.raises(ValueError):
        mykb.add_alias(
            alias="douglas", entities=["Q2", "Q3"], probabilities=[0.3, 0.4, 0.1]
        )

Esempio n. 25

0

Mostra file

def test_kb_invalid_combination(nlp):
    """Test the invalid construction of a KB with non-matching entity and probability lists"""
    mykb = KnowledgeBase(nlp.vocab)

    # adding entities
    mykb.add_entity(entity=u'Q1', prob=0.9)
    mykb.add_entity(entity=u'Q2', prob=0.2)
    mykb.add_entity(entity=u'Q3', prob=0.5)

    # adding aliases - should fail because the entities and probabilities vectors are not of equal length
    with pytest.raises(ValueError):
        mykb.add_alias(alias=u'douglas',
                       entities=[u'Q2', u'Q3'],
                       probabilities=[0.3, 0.4, 0.1])

Esempio n. 26

0

Mostra file

def test_kb_invalid_entities(nlp):
    """Test the invalid construction of a KB with an alias linked to a non-existing entity"""
    mykb = KnowledgeBase(nlp.vocab, entity_vector_length=1)

    # adding entities
    mykb.add_entity(entity="Q1", freq=0.9, entity_vector=[1])
    mykb.add_entity(entity="Q2", freq=0.2, entity_vector=[2])
    mykb.add_entity(entity="Q3", freq=0.5, entity_vector=[3])

    # adding aliases - should fail because one of the given IDs is not valid
    with pytest.raises(ValueError):
        mykb.add_alias(
            alias="douglas", entities=["Q2", "Q342"], probabilities=[0.8, 0.2]
        )

Esempio n. 27

0

Mostra file

def test_kb_invalid_probabilities(nlp):
    """Test the invalid construction of a KB with wrong prior probabilities"""
    mykb = KnowledgeBase(nlp.vocab)

    # adding entities
    mykb.add_entity(entity=u'Q1', prob=0.9)
    mykb.add_entity(entity=u'Q2', prob=0.2)
    mykb.add_entity(entity=u'Q3', prob=0.5)

    # adding aliases - should fail because the sum of the probabilities exceeds 1
    with pytest.raises(ValueError):
        mykb.add_alias(alias=u'douglas',
                       entities=[u'Q2', u'Q3'],
                       probabilities=[0.8, 0.4])

Esempio n. 28

0

Mostra file

def test_kb_serialize_vocab(nlp):
    """Test serialization of the KB and custom strings"""
    entity = "MyFunnyID"
    assert entity not in nlp.vocab.strings
    mykb = KnowledgeBase(nlp.vocab, entity_vector_length=1)
    assert not mykb.contains_entity(entity)
    mykb.add_entity(entity, freq=342, entity_vector=[3])
    assert mykb.contains_entity(entity)
    assert entity in mykb.vocab.strings
    with make_tempdir() as d:
        # normal read-write behaviour
        mykb.to_disk(d / "kb")
        mykb_new = KnowledgeBase(Vocab(), entity_vector_length=1)
        mykb_new.from_disk(d / "kb")
        assert entity in mykb_new.vocab.strings

Esempio n. 29

0

Mostra file

def test_append_invalid_alias(nlp):
    """Test that append an alias will throw an error if prior probs are exceeding 1"""
    mykb = KnowledgeBase(nlp.vocab, entity_vector_length=1)

    # adding entities
    mykb.add_entity(entity="Q1", freq=27, entity_vector=[1])
    mykb.add_entity(entity="Q2", freq=12, entity_vector=[2])
    mykb.add_entity(entity="Q3", freq=5, entity_vector=[3])

    # adding aliases
    mykb.add_alias(alias="douglas", entities=["Q2", "Q3"], probabilities=[0.8, 0.1])
    mykb.add_alias(alias="adam", entities=["Q2"], probabilities=[0.9])

    # append an alias - should fail because the entities and probabilities vectors are not of equal length
    with pytest.raises(ValueError):
        mykb.append_alias(alias="douglas", entity="Q1", prior_prob=0.2)

Esempio n. 30

0

Mostra file

 def create_kb(vocab):
     # create artificial KB
     mykb = KnowledgeBase(vocab, entity_vector_length=vector_length)
     mykb.add_entity(entity="Q270853", freq=12, entity_vector=[9, 1, -7])
     mykb.add_alias(
         alias="No. 8",
         entities=["Q270853"],
         probabilities=[1.0],
     )
     mykb.add_entity(entity="Q7304", freq=12, entity_vector=[6, -4, 3])
     mykb.add_alias(
         alias="Mahler",
         entities=["Q7304"],
         probabilities=[1.0],
     )
     return mykb

Esempio n. 31

0

Mostra file

def test_preserving_links_asdoc(nlp):
    """Test that Span.as_doc preserves the existing entity links"""
    mykb = KnowledgeBase(nlp.vocab, entity_vector_length=1)

    # adding entities
    mykb.add_entity(entity="Q1", freq=19, entity_vector=[1])
    mykb.add_entity(entity="Q2", freq=8, entity_vector=[1])

    # adding aliases
    mykb.add_alias(alias="Boston", entities=["Q1"], probabilities=[0.7])
    mykb.add_alias(alias="Denver", entities=["Q2"], probabilities=[0.6])

    # set up pipeline with NER (Entity Ruler) and NEL (prior probability only, model not trained)
    sentencizer = nlp.create_pipe("sentencizer")
    nlp.add_pipe(sentencizer)

    ruler = EntityRuler(nlp)
    patterns = [
        {
            "label": "GPE",
            "pattern": "Boston"
        },
        {
            "label": "GPE",
            "pattern": "Denver"
        },
    ]
    ruler.add_patterns(patterns)
    nlp.add_pipe(ruler)

    el_pipe = nlp.create_pipe(name="entity_linker")
    el_pipe.set_kb(mykb)
    el_pipe.begin_training()
    el_pipe.incl_context = False
    el_pipe.incl_prior = True
    nlp.add_pipe(el_pipe, last=True)

    # test whether the entity links are preserved by the `as_doc()` function
    text = "She lives in Boston. He lives in Denver."
    doc = nlp(text)
    for ent in doc.ents:
        orig_text = ent.text
        orig_kb_id = ent.kb_id_
        sent_doc = ent.sent.as_doc()
        for s_ent in sent_doc.ents:
            if s_ent.text == orig_text:
                assert s_ent.kb_id_ == orig_kb_id

Esempio n. 32

0

Mostra file

File: test_el.py Progetto: spacy-io/spaCy

def test_candidate_generation(nlp):
    """Test correct candidate generation"""
    mykb = KnowledgeBase(nlp.vocab)

    # adding entities
    mykb.add_entity(entity=u'Q1', prob=0.9)
    mykb.add_entity(entity=u'Q2', prob=0.2)
    mykb.add_entity(entity=u'Q3', prob=0.5)

    # adding aliases
    mykb.add_alias(alias=u'douglas', entities=[u'Q2', u'Q3'], probabilities=[0.8, 0.2])
    mykb.add_alias(alias=u'adam', entities=[u'Q2'], probabilities=[0.9])

    # test the size of the relevant candidates
    assert(len(mykb.get_candidates(u'douglas')) == 2)
    assert(len(mykb.get_candidates(u'adam')) == 1)
    assert(len(mykb.get_candidates(u'shrubbery')) == 0)

Esempio n. 33

0

Mostra file

def test_kb_valid_entities(nlp):
    """Test the valid construction of a KB with 3 entities and two aliases"""
    mykb = KnowledgeBase(nlp.vocab, entity_vector_length=1)

    # adding entities
    mykb.add_entity(entity='Q1', prob=0.9, entity_vector=[1])
    mykb.add_entity(entity='Q2', prob=0.5, entity_vector=[2])
    mykb.add_entity(entity='Q3', prob=0.5, entity_vector=[3])

    # adding aliases
    mykb.add_alias(alias='douglas',
                   entities=['Q2', 'Q3'],
                   probabilities=[0.8, 0.2])
    mykb.add_alias(alias='adam', entities=['Q2'], probabilities=[0.9])

    # test the size of the corresponding KB
    assert (mykb.get_size_entities() == 3)
    assert (mykb.get_size_aliases() == 2)

Esempio n. 34

0

Mostra file

def test_issue6730(en_vocab):
    """Ensure that the KB does not accept empty strings, but otherwise IO works fine."""
    from spacy.kb import KnowledgeBase

    kb = KnowledgeBase(en_vocab, entity_vector_length=3)
    kb.add_entity(entity="1", freq=148, entity_vector=[1, 2, 3])

    with pytest.raises(ValueError):
        kb.add_alias(alias="", entities=["1"], probabilities=[0.4])
    assert kb.contains_alias("") is False

    kb.add_alias(alias="x", entities=["1"], probabilities=[0.2])
    kb.add_alias(alias="y", entities=["1"], probabilities=[0.1])

    with make_tempdir() as tmp_dir:
        kb.to_disk(tmp_dir)
        kb.from_disk(tmp_dir)
    assert kb.get_size_aliases() == 2
    assert set(kb.get_alias_strings()) == {"x", "y"}

Esempio n. 35

0

Mostra file

File: dummy_entity_linking.py Progetto: spacy-io/spaCy

def create_kb(vocab):
    kb = KnowledgeBase(vocab=vocab)

    # adding entities
    entity_0 = "Q1004791_Douglas"
    print("adding entity", entity_0)
    kb.add_entity(entity=entity_0, prob=0.5)

    entity_1 = "Q42_Douglas_Adams"
    print("adding entity", entity_1)
    kb.add_entity(entity=entity_1, prob=0.5)

    entity_2 = "Q5301561_Douglas_Haig"
    print("adding entity", entity_2)
    kb.add_entity(entity=entity_2, prob=0.5)

    # adding aliases
    print()
    alias_0 = "Douglas"
    print("adding alias", alias_0)
    kb.add_alias(alias=alias_0, entities=[entity_0, entity_1, entity_2], probabilities=[0.1, 0.6, 0.2])

    alias_1 = "Douglas Adams"
    print("adding alias", alias_1)
    kb.add_alias(alias=alias_1, entities=[entity_1], probabilities=[0.9])

    print()
    print("kb size:", len(kb), kb.get_size_entities(), kb.get_size_aliases())

    return kb

Esempio n. 36

0

Mostra file

File: test_el.py Progetto: spacy-io/spaCy

def test_kb_valid_entities(nlp):
    """Test the valid construction of a KB with 3 entities and two aliases"""
    mykb = KnowledgeBase(nlp.vocab)

    # adding entities
    mykb.add_entity(entity=u'Q1', prob=0.9)
    mykb.add_entity(entity=u'Q2')
    mykb.add_entity(entity=u'Q3', prob=0.5)

    # adding aliases
    mykb.add_alias(alias=u'douglas', entities=[u'Q2', u'Q3'], probabilities=[0.8, 0.2])
    mykb.add_alias(alias=u'adam', entities=[u'Q2'], probabilities=[0.9])

    # test the size of the corresponding KB
    assert(mykb.get_size_entities() == 3)
    assert(mykb.get_size_aliases() == 2)