Esempio n. 1
0
def test_candidate_generation(nlp):
    """Test correct candidate generation"""
    mykb = KnowledgeBase(nlp.vocab, entity_vector_length=1)
    doc = nlp("douglas adam Adam shrubbery")

    douglas_ent = doc[0:1]
    adam_ent = doc[1:2]
    Adam_ent = doc[2:3]
    shrubbery_ent = doc[3:4]

    # adding entities
    mykb.add_entity(entity="Q1", freq=27, entity_vector=[1])
    mykb.add_entity(entity="Q2", freq=12, entity_vector=[2])
    mykb.add_entity(entity="Q3", freq=5, entity_vector=[3])

    # adding aliases
    mykb.add_alias(alias="douglas",
                   entities=["Q2", "Q3"],
                   probabilities=[0.8, 0.1])
    mykb.add_alias(alias="adam", entities=["Q2"], probabilities=[0.9])

    # test the size of the relevant candidates
    assert len(get_candidates(mykb, douglas_ent)) == 2
    assert len(get_candidates(mykb, adam_ent)) == 1
    assert len(get_candidates(mykb, Adam_ent)) == 0  # default case sensitive
    assert len(get_candidates(mykb, shrubbery_ent)) == 0

    # test the content of the candidates
    assert get_candidates(mykb, adam_ent)[0].entity_ == "Q2"
    assert get_candidates(mykb, adam_ent)[0].alias_ == "adam"
    assert_almost_equal(get_candidates(mykb, adam_ent)[0].entity_freq, 12)
    assert_almost_equal(get_candidates(mykb, adam_ent)[0].prior_prob, 0.9)
Esempio n. 2
0
def entity_linker_manual(dataset, source, nlp_dir, kb_loc, entity_loc):
    # Load the NLP and KB objects from file
    nlp = spacy.load(nlp_dir)
    kb = KnowledgeBase(vocab=nlp.vocab, entity_vector_length=1)
    kb.load_bulk(kb_loc)
    model = EntityRecognizer(nlp)

    # Read the pre-defined CSV file into dictionaries mapping QIDs to the full names and descriptions
    id_dict = dict()
    with entity_loc.open("r", encoding="utf8") as csvfile:
        csvreader = csv.reader(csvfile, delimiter=",")
        for row in csvreader:
            id_dict[row[0]] = (row[1], row[2])

    # Initialize the Prodigy stream by running the NER model
    stream = TXT(source)
    stream = [set_hashes(eg) for eg in stream]
    stream = (eg for score, eg in model(stream))

    # For each NER mention, add the candidates from the KB to the annotation task
    stream = _add_options(stream, kb, id_dict)
    stream = filter_duplicates(stream, by_input=True, by_task=False)

    return {
        "dataset": dataset,
        "stream": stream,
        "view_id": "choice",
        "config": {
            "choice_auto_accept": True
        },
    }
Esempio n. 3
0
def create_kb():
    """ Step 1: create the Knowledge Base in spaCy and write it to file """
    nlp = spacy.load("en_core_web_lg")
    name_dict, desc_dict = load_entities()

    kb = KnowledgeBase(vocab=nlp.vocab, entity_vector_length=300)

    for qid, desc in desc_dict.items():
        desc_doc = nlp(desc)
        desc_enc = desc_doc.vector
        kb.add_entity(entity=qid, entity_vector=desc_enc, freq=342)  # 342 is an arbitrary value here

    for qid, name in name_dict.items():
        kb.add_alias(alias=name, entities=[qid], probabilities=[1])  # 100% prior probability P(entity|alias)

    qids = name_dict.keys()
    probs = [0.3 for qid in qids]
    kb.add_alias(alias="Emerson", entities=qids, probabilities=probs)  # sum([probs]) should be <= 1 !

    print(f"Entities in the KB: {kb.get_entity_strings()}")
    print(f"Aliases in the KB: {kb.get_alias_strings()}")
    print()

    if not os.path.exists(output_dir):
        os.mkdir(output_dir)
    kb.dump(output_dir / "my_kb")
    nlp.to_disk(output_dir / "my_nlp")
Esempio n. 4
0
 def __init__(self, kb_folder, vectors_loc, lang='sv', stz=True, vectors_name='fasttext'):
     self.nlp = create_model(vectors_loc=vectors_loc, lang=lang, stz=stz, vectors_name=vectors_name, max_items=1000)
     self.kb = KnowledgeBase(vocab=self.nlp.vocab)
     print(kb_folder)
     self.kb.load_bulk(kb_folder)
     print()
     _print_kb(self.kb)
Esempio n. 5
0
 def create_kb(vocab):
     # create artificial KB
     mykb = KnowledgeBase(vocab, entity_vector_length=vector_length)
     mykb.add_entity(entity="Q613241", freq=12, entity_vector=[6, -4, 3])
     mykb.add_alias("Kirby", ["Q613241"], [0.9])
     # Placeholder
     mykb.add_entity(entity="pink", freq=12, entity_vector=[7, 2, -5])
     mykb.add_alias("pink", ["pink"], [0.9])
     return mykb
 def load(self, output_dir):
     kb_path = os.path.join(output_dir, "kb")
     vocab_path = os.path.join(output_dir, "vocab")
     print("Loading vocab from", vocab_path)
     print("Loading KB from", kb_path)
     vocab = Vocab().from_disk(vocab_path)
     kb = KnowledgeBase(vocab=vocab)
     kb.load_bulk(kb_path)
     self.kb = kb
     return self.kb
Esempio n. 7
0
def test_kb_invalid_probabilities(nlp):
    """Test the invalid construction of a KB with wrong prior probabilities"""
    mykb = KnowledgeBase(nlp.vocab, entity_vector_length=1)

    # adding entities
    mykb.add_entity(entity="Q1", freq=19, entity_vector=[1])
    mykb.add_entity(entity="Q2", freq=5, entity_vector=[2])
    mykb.add_entity(entity="Q3", freq=25, entity_vector=[3])

    # adding aliases - should fail because the sum of the probabilities exceeds 1
    with pytest.raises(ValueError):
        mykb.add_alias(alias="douglas", entities=["Q2", "Q3"], probabilities=[0.8, 0.4])
Esempio n. 8
0
    def get_kb(self):
        """
        :returns: self.kb, reading it from file if not loaded
        """
        if not self.kb:
            print("Loading vocabulary...")
            vocab = Vocab().from_disk(self.spacy_nlp_vocab_dir)

            print("Loading KB...")
            self.kb = KnowledgeBase(vocab=vocab)
            self.kb.load_bulk(self.spacy_kb_file)
            print("KB loaded!")
        return self.kb
Esempio n. 9
0
def test_kb_serialization():
    # Test that the KB can be used in a pipeline with a different vocab
    vector_length = 3
    with make_tempdir() as tmp_dir:
        kb_dir = tmp_dir / "kb"
        nlp1 = English()
        assert "Q2146908" not in nlp1.vocab.strings
        mykb = KnowledgeBase(nlp1.vocab, entity_vector_length=vector_length)
        mykb.add_entity(entity="Q2146908", freq=12, entity_vector=[6, -4, 3])
        mykb.add_alias(alias="Russ Cochran",
                       entities=["Q2146908"],
                       probabilities=[0.8])
        assert "Q2146908" in nlp1.vocab.strings
        mykb.to_disk(kb_dir)

        nlp2 = English()
        assert "RandomWord" not in nlp2.vocab.strings
        nlp2.vocab.strings.add("RandomWord")
        assert "RandomWord" in nlp2.vocab.strings
        assert "Q2146908" not in nlp2.vocab.strings

        # Create the Entity Linker component with the KB from file, and check the final vocab
        entity_linker = nlp2.add_pipe("entity_linker", last=True)
        entity_linker.set_kb(load_kb(kb_dir))
        assert "Q2146908" in nlp2.vocab.strings
        assert "RandomWord" in nlp2.vocab.strings
Esempio n. 10
0
def test_kb_invalid_entities(nlp):
    """Test the invalid construction of a KB with an alias linked to a non-existing entity"""
    mykb = KnowledgeBase(nlp.vocab, entity_vector_length=1)

    # adding entities
    mykb.add_entity(entity="Q1", freq=19, entity_vector=[1])
    mykb.add_entity(entity="Q2", freq=5, entity_vector=[2])
    mykb.add_entity(entity="Q3", freq=25, entity_vector=[3])

    # adding aliases - should fail because one of the given IDs is not valid
    with pytest.raises(ValueError):
        mykb.add_alias(alias="douglas",
                       entities=["Q2", "Q342"],
                       probabilities=[0.8, 0.2])
Esempio n. 11
0
def test_kb_invalid_combination(nlp):
    """Test the invalid construction of a KB with non-matching entity and probability lists"""
    mykb = KnowledgeBase(nlp.vocab, entity_vector_length=1)

    # adding entities
    mykb.add_entity(entity="Q1", freq=19, entity_vector=[1])
    mykb.add_entity(entity="Q2", freq=5, entity_vector=[2])
    mykb.add_entity(entity="Q3", freq=25, entity_vector=[3])

    # adding aliases - should fail because the entities and probabilities vectors are not of equal length
    with pytest.raises(ValueError):
        mykb.add_alias(alias="douglas",
                       entities=["Q2", "Q3"],
                       probabilities=[0.3, 0.4, 0.1])
Esempio n. 12
0
 def create_kb(vocab):
     kb = KnowledgeBase(vocab, entity_vector_length=1)
     kb.add_entity(entity="Q2", freq=12, entity_vector=[2])
     kb.add_entity(entity="Q3", freq=5, entity_vector=[3])
     kb.add_alias(alias="douglas",
                  entities=["Q2", "Q3"],
                  probabilities=[0.8, 0.1])
     return kb
Esempio n. 13
0
 def load(self, output_dir):
     kb_path = os.path.join(output_dir, "kb")
     vocab_path = os.path.join(output_dir, "vocab")
     kb_info_path = os.path.join(output_dir, "kb_info.txt")
     print("Loading vocab from", vocab_path)
     print("Loading KB from", kb_path)
     print("Loading KB info from", kb_info_path)
     with open(kb_info_path, "r") as file:
         # The first line is the entity_vector_length
         entity_vector_length = int(file.readline().strip())
     vocab = Vocab().from_disk(vocab_path)
     kb = KnowledgeBase(vocab=vocab,
                        entity_vector_length=entity_vector_length)
     kb.from_disk(kb_path)
     self.kb = kb
     return self.kb
Esempio n. 14
0
 def create_kb(vocab):
     # create artificial KB
     mykb = KnowledgeBase(vocab, entity_vector_length=vector_length)
     mykb.add_entity(entity="Q270853", freq=12, entity_vector=[9, 1, -7])
     mykb.add_alias(
         alias="No. 8",
         entities=["Q270853"],
         probabilities=[1.0],
     )
     mykb.add_entity(entity="Q7304", freq=12, entity_vector=[6, -4, 3])
     mykb.add_alias(
         alias="Mahler",
         entities=["Q7304"],
         probabilities=[1.0],
     )
     return mykb
Esempio n. 15
0
 def create_kb(vocab):
     mykb = KnowledgeBase(vocab, entity_vector_length=vector_length)
     # adding entities
     mykb.add_entity(entity="Q1", freq=19, entity_vector=[1])
     mykb.add_entity(entity="Q2", freq=8, entity_vector=[1])
     # adding aliases
     mykb.add_alias(alias="Boston", entities=["Q1"], probabilities=[0.7])
     mykb.add_alias(alias="Denver", entities=["Q2"], probabilities=[0.6])
     return mykb
Esempio n. 16
0
def test_serialize_kb_disk(en_vocab):
    # baseline assertions
    kb1 = _get_dummy_kb(en_vocab)
    _check_kb(kb1)

    # dumping to file & loading back in
    with make_tempdir() as d:
        dir_path = ensure_path(d)
        if not dir_path.exists():
            dir_path.mkdir()
        file_path = dir_path / "kb"
        kb1.to_disk(str(file_path))
        kb2 = KnowledgeBase(vocab=en_vocab, entity_vector_length=3)
        kb2.from_disk(str(file_path))

    # final assertions
    _check_kb(kb2)
def entity_linker():
    nlp = Language()
    nlp.add_pipe(nlp.create_pipe("entity_linker"))
    entity_linker = nlp.get_pipe("entity_linker")
    # need to add model for two reasons:
    # 1. no model leads to error in serialization,
    # 2. the affected line is the one for model serialization
    kb = KnowledgeBase(nlp.vocab, entity_vector_length=1)
    entity_linker.set_kb(kb)
    entity_linker.begin_training(pipeline=nlp.pipeline)
    return entity_linker
Esempio n. 18
0
def test_kb_pickle():
    # Test that the KB can be pickled
    nlp = English()
    kb_1 = KnowledgeBase(nlp.vocab, entity_vector_length=3)
    kb_1.add_entity(entity="Q2146908", freq=12, entity_vector=[6, -4, 3])
    assert not kb_1.contains_alias("Russ Cochran")
    kb_1.add_alias(alias="Russ Cochran",
                   entities=["Q2146908"],
                   probabilities=[0.8])
    assert kb_1.contains_alias("Russ Cochran")
    data = pickle.dumps(kb_1)
    kb_2 = pickle.loads(data)
    assert kb_2.contains_alias("Russ Cochran")
Esempio n. 19
0
def read_nlp_kb(model_dir, kb_file):
    nlp = spacy.load(model_dir)
    kb = KnowledgeBase(vocab=nlp.vocab)
    kb.load_bulk(kb_file)
    logger.info("kb entities: {}".format(kb.get_size_entities()))
    logger.info("kb aliases: {}".format(kb.get_size_aliases()))
    return nlp, kb
Esempio n. 20
0
 def create_kb(vocab):
     kb = KnowledgeBase(vocab, entity_vector_length=3)
     kb.add_entity(entity="Q2146908", freq=12, entity_vector=[6, -4, 3])
     kb.add_alias(alias="Russ Cochran",
                  entities=["Q2146908"],
                  probabilities=[0.8])
     return kb
Esempio n. 21
0
def test_kb_invalid_probabilities(nlp):
    """Test the invalid construction of a KB with wrong prior probabilities"""
    mykb = KnowledgeBase(nlp.vocab, entity_vector_length=1)

    # adding entities
    mykb.add_entity(entity='Q1', prob=0.9, entity_vector=[1])
    mykb.add_entity(entity='Q2', prob=0.2, entity_vector=[2])
    mykb.add_entity(entity='Q3', prob=0.5, entity_vector=[3])

    # adding aliases - should fail because the sum of the probabilities exceeds 1
    with pytest.raises(ValueError):
        mykb.add_alias(alias='douglas',
                       entities=['Q2', 'Q3'],
                       probabilities=[0.8, 0.4])
Esempio n. 22
0
def test_kb_invalid_probabilities(nlp):
    """Test the invalid construction of a KB with wrong prior probabilities"""
    mykb = KnowledgeBase(nlp.vocab)

    # adding entities
    mykb.add_entity(entity=u'Q1', prob=0.9)
    mykb.add_entity(entity=u'Q2', prob=0.2)
    mykb.add_entity(entity=u'Q3', prob=0.5)

    # adding aliases - should fail because the sum of the probabilities exceeds 1
    with pytest.raises(ValueError):
        mykb.add_alias(alias=u'douglas',
                       entities=[u'Q2', u'Q3'],
                       probabilities=[0.8, 0.4])
Esempio n. 23
0
def test_kb_invalid_combination(nlp):
    """Test the invalid construction of a KB with non-matching entity and probability lists"""
    mykb = KnowledgeBase(nlp.vocab)

    # adding entities
    mykb.add_entity(entity=u'Q1', prob=0.9)
    mykb.add_entity(entity=u'Q2', prob=0.2)
    mykb.add_entity(entity=u'Q3', prob=0.5)

    # adding aliases - should fail because the entities and probabilities vectors are not of equal length
    with pytest.raises(ValueError):
        mykb.add_alias(alias=u'douglas',
                       entities=[u'Q2', u'Q3'],
                       probabilities=[0.3, 0.4, 0.1])
Esempio n. 24
0
def test_kb_invalid_entities(nlp):
    """Test the invalid construction of a KB with an alias linked to a non-existing entity"""
    mykb = KnowledgeBase(nlp.vocab)

    # adding entities
    mykb.add_entity(entity=u'Q1', prob=0.9)
    mykb.add_entity(entity=u'Q2', prob=0.2)
    mykb.add_entity(entity=u'Q3', prob=0.5)

    # adding aliases - should fail because one of the given IDs is not valid
    with pytest.raises(ValueError):
        mykb.add_alias(alias=u'douglas',
                       entities=[u'Q2', u'Q342'],
                       probabilities=[0.8, 0.2])
def test_save_and_load_knowledge_base():
    nlp = Language()
    kb = KnowledgeBase(nlp.vocab, entity_vector_length=1)
    with make_tempdir() as d:
        path = d / "kb"
        try:
            kb.dump(path)
        except Exception as e:
            pytest.fail(str(e))

        try:
            kb_loaded = KnowledgeBase(nlp.vocab, entity_vector_length=1)
            kb_loaded.load_bulk(path)
        except Exception as e:
            pytest.fail(str(e))
Esempio n. 26
0
def test_kb_invalid_entities(nlp):
    """Test the invalid construction of a KB with an alias linked to a non-existing entity"""
    mykb = KnowledgeBase(nlp.vocab)

    # adding entities
    mykb.add_entity(entity=u'Q1', prob=0.9)
    mykb.add_entity(entity=u'Q2', prob=0.2)
    mykb.add_entity(entity=u'Q3', prob=0.5)

    # adding aliases - should fail because one of the given IDs is not valid
    with pytest.raises(ValueError):
        mykb.add_alias(alias=u'douglas', entities=[u'Q2', u'Q342'], probabilities=[0.8, 0.2])
Esempio n. 27
0
def test_kb_invalid_probabilities(nlp):
    """Test the invalid construction of a KB with wrong prior probabilities"""
    mykb = KnowledgeBase(nlp.vocab)

    # adding entities
    mykb.add_entity(entity=u'Q1', prob=0.9)
    mykb.add_entity(entity=u'Q2', prob=0.2)
    mykb.add_entity(entity=u'Q3', prob=0.5)

    # adding aliases - should fail because the sum of the probabilities exceeds 1
    with pytest.raises(ValueError):
        mykb.add_alias(alias=u'douglas', entities=[u'Q2', u'Q3'], probabilities=[0.8, 0.4])
Esempio n. 28
0
def test_kb_invalid_combination(nlp):
    """Test the invalid construction of a KB with non-matching entity and probability lists"""
    mykb = KnowledgeBase(nlp.vocab)

    # adding entities
    mykb.add_entity(entity=u'Q1', prob=0.9)
    mykb.add_entity(entity=u'Q2', prob=0.2)
    mykb.add_entity(entity=u'Q3', prob=0.5)

    # adding aliases - should fail because the entities and probabilities vectors are not of equal length
    with pytest.raises(ValueError):
        mykb.add_alias(alias=u'douglas', entities=[u'Q2', u'Q3'], probabilities=[0.3, 0.4, 0.1])
Esempio n. 29
0
    def from_disk(self, path: Path, **kwargs):
        """Deserialize saved AnnLinker from disk.
        
        path (Path): directory to deserialize from
        
        RETURNS (AnnLinker): Initialized AnnLinker
        """        
        path = util.ensure_path(path)

        kb = KnowledgeBase(self.nlp.vocab, 300)
        kb.load_bulk(path / "kb")
        self.set_kb(kb)

        cg = CandidateGenerator().from_disk(path)
        self.set_cg(cg)

        cfg = srsly.read_json(path / "cfg")
        
        self.threshold = cfg.get("threshold", 0.7)
        self.no_description_threshold = cfg.get("no_description_threshold", 0.95)
        self.disambiguate = cfg.get("disambiguate", True)

        return self
Esempio n. 30
0
def test_preserving_links_asdoc(nlp):
    """Test that Span.as_doc preserves the existing entity links"""
    mykb = KnowledgeBase(nlp.vocab, entity_vector_length=1)

    # adding entities
    mykb.add_entity(entity="Q1", freq=19, entity_vector=[1])
    mykb.add_entity(entity="Q2", freq=8, entity_vector=[1])

    # adding aliases
    mykb.add_alias(alias="Boston", entities=["Q1"], probabilities=[0.7])
    mykb.add_alias(alias="Denver", entities=["Q2"], probabilities=[0.6])

    # set up pipeline with NER (Entity Ruler) and NEL (prior probability only, model not trained)
    sentencizer = nlp.create_pipe("sentencizer")
    nlp.add_pipe(sentencizer)

    ruler = EntityRuler(nlp)
    patterns = [
        {
            "label": "GPE",
            "pattern": "Boston"
        },
        {
            "label": "GPE",
            "pattern": "Denver"
        },
    ]
    ruler.add_patterns(patterns)
    nlp.add_pipe(ruler)

    el_pipe = nlp.create_pipe(name="entity_linker")
    el_pipe.set_kb(mykb)
    el_pipe.begin_training()
    el_pipe.incl_context = False
    el_pipe.incl_prior = True
    nlp.add_pipe(el_pipe, last=True)

    # test whether the entity links are preserved by the `as_doc()` function
    text = "She lives in Boston. He lives in Denver."
    doc = nlp(text)
    for ent in doc.ents:
        orig_text = ent.text
        orig_kb_id = ent.kb_id_
        sent_doc = ent.sent.as_doc()
        for s_ent in sent_doc.ents:
            if s_ent.text == orig_text:
                assert s_ent.kb_id_ == orig_kb_id
Esempio n. 31
0
    def train(self, entities, list_aliases):
        """
        Args:
            entities: a dict of each entity, it's description and it's corpus frequency
            list_aliases: a list of dicts for each entity e.g.::

                    [{
                        'alias':'Farrar',
                        'entities': ['Q1', 'Q2'],
                        'probabilities': [0.4, 0.6]
                    }]

                probabilities are 'prior probabilities' and must sum to < 1
        """
        try:
            nlp = spacy.load(self.kb_model)
        except IOError:
            subprocess.run(
                ["python", "-m", "spacy", "download", self.kb_model])
            # pkg_resources need to be reloaded to pick up the newly installed models
            import pkg_resources
            import imp

            imp.reload(pkg_resources)
            nlp = spacy.load(self.kb_model)

        print("Loaded model '%s'" % self.kb_model)

        # set up the data
        entity_ids = []
        embeddings = []
        freqs = []
        for key, value in entities.items():
            desc, freq = value
            entity_ids.append(key)
            embeddings.append(nlp(desc).vector)
            freqs.append(freq)

        self.entity_vector_length = len(
            embeddings[0])  # This is needed in loading a kb
        kb = KnowledgeBase(vocab=nlp.vocab,
                           entity_vector_length=self.entity_vector_length)

        # set the entities, can also be done by calling `kb.add_entity` for each entity
        kb.set_entities(entity_list=entity_ids,
                        freq_list=freqs,
                        vector_list=embeddings)

        # adding aliases, the entities need to be defined in the KB beforehand
        for alias in list_aliases:
            kb.add_alias(
                alias=alias["alias"],
                entities=alias["entities"],
                probabilities=alias["probabilities"],
            )
        self.kb = kb
        return self.kb
Esempio n. 32
0
def test_kb_invalid_entity_vector(nlp):
    """Test the invalid construction of a KB with non-matching entity vector lengths"""
    mykb = KnowledgeBase(nlp.vocab, entity_vector_length=3)

    # adding entities
    mykb.add_entity(entity="Q1", freq=0.9, entity_vector=[1, 2, 3])

    # this should fail because the kb's expected entity vector length is 3
    with pytest.raises(ValueError):
        mykb.add_entity(entity="Q2", freq=0.2, entity_vector=[2])
Esempio n. 33
0
 def create_kb(vocab):
     mykb = KnowledgeBase(vocab, entity_vector_length=vector_length)
     mykb.add_entity(entity="Q2146908", freq=12, entity_vector=[6, -4, 3])
     mykb.add_entity(entity="Q7381115", freq=12, entity_vector=[9, 1, -7])
     mykb.add_alias(
         alias="Russ Cochran",
         entities=["Q2146908", "Q7381115"],
         probabilities=[0.5, 0.5],
     )
     return mykb
Esempio n. 34
0
def create_kb(vocab):
    kb = KnowledgeBase(vocab=vocab)

    # adding entities
    entity_0 = "Q1004791_Douglas"
    print("adding entity", entity_0)
    kb.add_entity(entity=entity_0, prob=0.5)

    entity_1 = "Q42_Douglas_Adams"
    print("adding entity", entity_1)
    kb.add_entity(entity=entity_1, prob=0.5)

    entity_2 = "Q5301561_Douglas_Haig"
    print("adding entity", entity_2)
    kb.add_entity(entity=entity_2, prob=0.5)

    # adding aliases
    print()
    alias_0 = "Douglas"
    print("adding alias", alias_0)
    kb.add_alias(alias=alias_0, entities=[entity_0, entity_1, entity_2], probabilities=[0.1, 0.6, 0.2])

    alias_1 = "Douglas Adams"
    print("adding alias", alias_1)
    kb.add_alias(alias=alias_1, entities=[entity_1], probabilities=[0.9])

    print()
    print("kb size:", len(kb), kb.get_size_entities(), kb.get_size_aliases())

    return kb
Esempio n. 35
0
def test_kb_valid_entities(nlp):
    """Test the valid construction of a KB with 3 entities and two aliases"""
    mykb = KnowledgeBase(nlp.vocab)

    # adding entities
    mykb.add_entity(entity=u'Q1', prob=0.9)
    mykb.add_entity(entity=u'Q2')
    mykb.add_entity(entity=u'Q3', prob=0.5)

    # adding aliases
    mykb.add_alias(alias=u'douglas', entities=[u'Q2', u'Q3'], probabilities=[0.8, 0.2])
    mykb.add_alias(alias=u'adam', entities=[u'Q2'], probabilities=[0.9])

    # test the size of the corresponding KB
    assert(mykb.get_size_entities() == 3)
    assert(mykb.get_size_aliases() == 2)
Esempio n. 36
0
def test_candidate_generation(nlp):
    """Test correct candidate generation"""
    mykb = KnowledgeBase(nlp.vocab)

    # adding entities
    mykb.add_entity(entity=u'Q1', prob=0.9)
    mykb.add_entity(entity=u'Q2', prob=0.2)
    mykb.add_entity(entity=u'Q3', prob=0.5)

    # adding aliases
    mykb.add_alias(alias=u'douglas', entities=[u'Q2', u'Q3'], probabilities=[0.8, 0.2])
    mykb.add_alias(alias=u'adam', entities=[u'Q2'], probabilities=[0.9])

    # test the size of the relevant candidates
    assert(len(mykb.get_candidates(u'douglas')) == 2)
    assert(len(mykb.get_candidates(u'adam')) == 1)
    assert(len(mykb.get_candidates(u'shrubbery')) == 0)