Exemple #1
0
 def test_phonenumber(self):
     agg = EntityAggregator()
     text = "Mr. Flubby Flubber called the number tel:+919988111222 twice"
     agg.extract(text, ['en'])
     entities = [l for l, c, w in agg.entities]
     assert '+919988111222' in entities
     assert 'in' in entities
Exemple #2
0
 def test_merkel(self):
     agg = EntityAggregator()
     text = 'Das ist der Pudel von Angela Merkel. '
     text = text + text + text + text + text
     agg.extract(text, ['de', 'en'])
     entities = [l for l, c, w in agg.entities]
     assert 'Angela Merkel' in entities, entities
 def test_phonenumber(self):
     agg = EntityAggregator()
     text = "Mr. Flubby Flubber called the number tel:+919988111222 twice"
     agg.extract(text, ['en'])
     entities = [l for l, c, w in agg.entities]
     assert '+919988111222' in entities
     assert 'in' in entities
 def test_merkel(self):
     agg = EntityAggregator()
     text = 'Das ist der Pudel von Angela Merkel. '
     text = text + text + text + text + text
     agg.extract(text, ['de', 'en'])
     entities = [l for l, c, w in agg.entities]
     assert 'Angela Merkel' in entities, entities
Exemple #5
0
 def test_multi(self):
     agg = EntityAggregator()
     text = "This is a text about Foo Blubb, a leader in " \
            "this industry. The should not be confused with Foo Blubb, " \
            "a smaller firm."
     agg.extract(text, ['en'])
     entities = [l for l, c, w in agg.entities]
     assert 'Foo Blubb' in entities, entities
 def test_multi(self):
     agg = EntityAggregator()
     text = "This is a text about Foo Blubb, a leader in " \
            "this industry. The should not be confused with Foo Blubb, " \
            "a smaller firm."
     agg.extract(text, ['en'])
     entities = [l for l, c, w in agg.entities]
     assert 'Foo Blubb' in entities, entities
Exemple #7
0
 def test_entities(self):
     agg = EntityAggregator()
     agg.add(PersonResult.create(agg, 'Mr. Max Banana', 0, 12))
     agg.add(PersonResult.create(agg, 'Mr. Max Banana', 0, 12))
     agg.add(PersonResult.create(agg, 'max Banana', 0, 12))
     for label, category, weight in agg.entities:
         assert label == 'Max Banana', label
         # assert category == 'baa', label
         assert weight == 3, weight
 def test_aggregator(self):
     agg = EntityAggregator()
     agg.add(PersonResult.create(agg, 'Banana', 0, 12))
     assert len(agg) == 0, agg
     agg.add(PersonResult.create(agg, 'Mr. Max Banana', 0, 12))
     assert len(agg) == 1, agg
     agg.add(PersonResult.create(agg, 'Max Banana', 0, 12))
     assert len(agg) == 1, agg
Exemple #9
0
 def test_aggregator(self):
     agg = EntityAggregator()
     agg.add(PersonResult.create(agg, 'Banana', 0, 12))
     assert len(agg) == 0, agg
     agg.add(PersonResult.create(agg, 'Mr. Max Banana', 0, 12))
     assert len(agg) == 1, agg
     agg.add(PersonResult.create(agg, 'Max Banana', 0, 12))
     assert len(agg) == 1, agg
Exemple #10
0
 def test_ner_service(self):
     ctx = EntityAggregator()
     text = """This is a document about the United States. But also about
     Syria and Germany.
     """
     text = text + text + text + text
     entities = extract_entities(ctx, text, 'en')
     entities = [str(r) for r in entities]
     assert 'United States' in entities, entities
     assert 'Germany' in entities, entities
     assert 'Syria' in entities, entities
Exemple #11
0
def extract_document_tags(document):
    if document.status != Document.STATUS_SUCCESS:
        return
    load_places()
    log.info("Tagging [%s]: %s", document.id, document.name)

    languages = list(document.languages)
    if not len(languages):
        languages = [settings.DEFAULT_LANGUAGE]

    aggregator = EntityAggregator()
    for text in document.texts:
        aggregator.extract(text, languages)

    DocumentTagCollector(document, 'polyglot').save()
    DocumentTagCollector(document, 'spacy').save()
    collector = DocumentTagCollector(document, 'ner')
    for (label, category, weight) in aggregator.entities:
        collector.emit(label, category, weight=weight)
    log.info("Extracted tags: %s", len(collector))
    collector.save()
    db.session.add(document)
    db.session.commit()
Exemple #12
0
 def test_entities(self):
     agg = EntityAggregator()
     agg.add(PersonResult.create(agg, 'Mr. Max Banana', 0, 12))
     agg.add(PersonResult.create(agg, 'Mr. Max Banana', 0, 12))
     agg.add(PersonResult.create(agg, 'max Banana', 0, 12))
     for label, category, weight in agg.entities:
         assert label == 'Max Banana', label
         # assert category == 'baa', label
         assert weight == 3, weight