Python EntityAggregator Exemples, aleph.logic.extractors.aggregate.EntityAggregator Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : test_aggregate.py Projet : pudo/aleph

 def test_phonenumber(self):
     agg = EntityAggregator()
     text = "Mr. Flubby Flubber called the number tel:+919988111222 twice"
     agg.extract(text, ['en'])
     entities = [l for l, c, w in agg.entities]
     assert '+919988111222' in entities
     assert 'in' in entities

Exemple #2

0

Afficher le fichier

Fichier : test_aggregate.py Projet : pudo/aleph

 def test_merkel(self):
     agg = EntityAggregator()
     text = 'Das ist der Pudel von Angela Merkel. '
     text = text + text + text + text + text
     agg.extract(text, ['de', 'en'])
     entities = [l for l, c, w in agg.entities]
     assert 'Angela Merkel' in entities, entities

Exemple #3

0

Afficher le fichier

Fichier : test_aggregate.py Projet : vishalbelsare/aleph

 def test_phonenumber(self):
     agg = EntityAggregator()
     text = "Mr. Flubby Flubber called the number tel:+919988111222 twice"
     agg.extract(text, ['en'])
     entities = [l for l, c, w in agg.entities]
     assert '+919988111222' in entities
     assert 'in' in entities

Exemple #4

0

Afficher le fichier

Fichier : test_aggregate.py Projet : vishalbelsare/aleph

 def test_merkel(self):
     agg = EntityAggregator()
     text = 'Das ist der Pudel von Angela Merkel. '
     text = text + text + text + text + text
     agg.extract(text, ['de', 'en'])
     entities = [l for l, c, w in agg.entities]
     assert 'Angela Merkel' in entities, entities

Exemple #5

0

Afficher le fichier

Fichier : test_aggregate.py Projet : pudo/aleph

 def test_multi(self):
     agg = EntityAggregator()
     text = "This is a text about Foo Blubb, a leader in " \
            "this industry. The should not be confused with Foo Blubb, " \
            "a smaller firm."
     agg.extract(text, ['en'])
     entities = [l for l, c, w in agg.entities]
     assert 'Foo Blubb' in entities, entities

Exemple #6

0

Afficher le fichier

Fichier : test_aggregate.py Projet : vishalbelsare/aleph

 def test_multi(self):
     agg = EntityAggregator()
     text = "This is a text about Foo Blubb, a leader in " \
            "this industry. The should not be confused with Foo Blubb, " \
            "a smaller firm."
     agg.extract(text, ['en'])
     entities = [l for l, c, w in agg.entities]
     assert 'Foo Blubb' in entities, entities

Exemple #7

0

Afficher le fichier

Fichier : test_aggregate.py Projet : pudo/aleph

 def test_entities(self):
     agg = EntityAggregator()
     agg.add(PersonResult.create(agg, 'Mr. Max Banana', 0, 12))
     agg.add(PersonResult.create(agg, 'Mr. Max Banana', 0, 12))
     agg.add(PersonResult.create(agg, 'max Banana', 0, 12))
     for label, category, weight in agg.entities:
         assert label == 'Max Banana', label
         # assert category == 'baa', label
         assert weight == 3, weight

Exemple #8

0

Afficher le fichier

Fichier : test_aggregate.py Projet : vishalbelsare/aleph

 def test_aggregator(self):
     agg = EntityAggregator()
     agg.add(PersonResult.create(agg, 'Banana', 0, 12))
     assert len(agg) == 0, agg
     agg.add(PersonResult.create(agg, 'Mr. Max Banana', 0, 12))
     assert len(agg) == 1, agg
     agg.add(PersonResult.create(agg, 'Max Banana', 0, 12))
     assert len(agg) == 1, agg

Exemple #9

0

Afficher le fichier

Fichier : test_aggregate.py Projet : pudo/aleph

 def test_aggregator(self):
     agg = EntityAggregator()
     agg.add(PersonResult.create(agg, 'Banana', 0, 12))
     assert len(agg) == 0, agg
     agg.add(PersonResult.create(agg, 'Mr. Max Banana', 0, 12))
     assert len(agg) == 1, agg
     agg.add(PersonResult.create(agg, 'Max Banana', 0, 12))
     assert len(agg) == 1, agg

Exemple #10

0

Afficher le fichier

 def test_ner_service(self):
     ctx = EntityAggregator()
     text = """This is a document about the United States. But also about
     Syria and Germany.
     """
     text = text + text + text + text
     entities = extract_entities(ctx, text, 'en')
     entities = [str(r) for r in entities]
     assert 'United States' in entities, entities
     assert 'Germany' in entities, entities
     assert 'Syria' in entities, entities

Exemple #11

0

Afficher le fichier

def extract_document_tags(document):
    if document.status != Document.STATUS_SUCCESS:
        return
    load_places()
    log.info("Tagging [%s]: %s", document.id, document.name)

    languages = list(document.languages)
    if not len(languages):
        languages = [settings.DEFAULT_LANGUAGE]

    aggregator = EntityAggregator()
    for text in document.texts:
        aggregator.extract(text, languages)

    DocumentTagCollector(document, 'polyglot').save()
    DocumentTagCollector(document, 'spacy').save()
    collector = DocumentTagCollector(document, 'ner')
    for (label, category, weight) in aggregator.entities:
        collector.emit(label, category, weight=weight)
    log.info("Extracted tags: %s", len(collector))
    collector.save()
    db.session.add(document)
    db.session.commit()

Exemple #12

0

Afficher le fichier

Fichier : test_aggregate.py Projet : vishalbelsare/aleph

 def test_entities(self):
     agg = EntityAggregator()
     agg.add(PersonResult.create(agg, 'Mr. Max Banana', 0, 12))
     agg.add(PersonResult.create(agg, 'Mr. Max Banana', 0, 12))
     agg.add(PersonResult.create(agg, 'max Banana', 0, 12))
     for label, category, weight in agg.entities:
         assert label == 'Max Banana', label
         # assert category == 'baa', label
         assert weight == 3, weight