Ejemplo n.º 1
0
class LocationInterpretationTestCase(BaseTestCase):
    def setUp(self):
        self.engine = InterpretationEngine(LocationObject)
        super(LocationInterpretationTestCase, self).setUp()

    def test_get_location_object(self):
        matches = self.combinator.resolve_matches(
            self.combinator.extract('Российская Федерация'))
        objects = list(self.engine.extract(matches))
        self.assertEqual(len(objects), 1)
        self.assertEqual(objects[0].name.value, 'Российская')
        self.assertEqual(objects[0].descriptor.value, 'Федерация')

        matches = self.combinator.resolve_matches(
            self.combinator.extract('Москва'))
        objects = list(self.engine.extract(matches))
        self.assertEqual(len(objects), 1)
        self.assertEqual(objects[0].name.value, 'Москва')
        self.assertEqual(objects[0].descriptor, None)

        matches = self.combinator.resolve_matches(
            self.combinator.extract('Нижний Новгород'))
        objects = list(self.engine.extract(matches))
        self.assertEqual(len(objects), 1)
        self.assertEqual([t.value for t in objects[0].name],
                         ['Нижний', 'Новгород'])
        self.assertEqual(objects[0].descriptor, None)
Ejemplo n.º 2
0
class OrganisationInterpretationTestCase(BaseTestCase):
    def setUp(self):
        self.engine = InterpretationEngine(OrganisationObject)
        super(OrganisationInterpretationTestCase, self).setUp()

    def test_get_organisation_object(self):
        matches = self.combinator.resolve_matches(
            self.combinator.extract('ООО "Рога и копыта"'))
        objects = list(self.engine.extract(matches))
        self.assertEqual(len(objects), 1)
        self.assertEqual(objects[0].descriptor.value, 'ООО')
        self.assertEqual([x.value for x in objects[0].name],
                         ['Рога', 'и', 'копыта'])

        matches = self.combinator.resolve_matches(
            self.combinator.extract(
                'Санкт-Петербургский государственный университет'))
        objects = list(self.engine.extract(matches))
        self.assertEqual(len(objects), 1)
        self.assertEqual(objects[0].descriptor.value, 'университет')
        self.assertEqual([x.value for x in objects[0].name],
                         ['Санкт-Петербургский', 'государственный'])

        matches = self.combinator.resolve_matches(
            self.combinator.extract('филиал ООО "Рога и копыта"'))
        objects = list(self.engine.extract(matches))
        self.assertEqual(len(objects), 1)
        self.assertEqual([x.value for x in objects[0].descriptor],
                         ['филиал', 'ООО'])
        self.assertEqual([x.value for x in objects[0].name],
                         ['Рога', 'и', 'копыта'])

    def test_coreference_solving(self):
        text = 'ооо "Рога и КаПыта" или общество "рога и копыто"'
        matches = self.combinator.resolve_matches(
            self.combinator.extract(text))
        objects = list(self.engine.extract(matches))
        self.assertEqual(len(objects), 2)

        self.assertEqual(objects[0], objects[1])

        text = 'федеральная служба безопасности (сокращенно, ФСБ)'
        matches = list(
            self.combinator.resolve_matches(self.combinator.extract(text)))

        objects = list(self.engine.extract(matches))
        self.assertEqual(len(objects), 2)
        self.assertEqual(objects[0].abbr, {'фсб'})
        self.assertEqual(objects[1].abbr, {'фсб'})

        self.assertEqual(objects[0], objects[1])
Ejemplo n.º 3
0
def find_facts(text, news_id):
    """"
    Find the facts in the news
    
    Agrs:
        title: the title text
        desc: the description text
        news_id
        
    Returns:
        dictionary:
            news_id
            persons -- array of {name, middlename, lastname, descriptor}
            locations -- array of names
    """
    engine_per = InterpretationEngine(PersonObject)
    engine_loc = InterpretationEngine(LocationObject)
    combinator = Combinator([
        Person,
        Location,
    ])
    facts = Facts(news_id)
    matches = combinator.resolve_matches(combinator.extract(text), )

    for person in list(engine_per.extract(matches)):
        fact = Person_(person)
        facts.add_to_persons(fact)

    matches = combinator.resolve_matches(combinator.extract(text), )

    for location in list(engine_loc.extract(matches)):
        fact = Location_(location)
        facts.add_to_locations(fact)
    return facts.get_values()
Ejemplo n.º 4
0
 def setUp(self):
     self.engine = InterpretationEngine(OrganisationObject)
     super(OrganisationInterpretationTestCase, self).setUp()
Ejemplo n.º 5
0
from natasha.grammars.person import PersonObject
from natasha.grammars.organisation import OrganisationObject
from natasha.grammars.location import LocationObject, AddressObject

text = 'василий петрович пришел в Санкт-Петербургский государственный университет'

text = """
Иванов Иван Иванович был профессором в Санкт-Петербургском государственном университете.
Он жил в городе Санкт-Петербург на набережной реки Мойки, дом 33.
Улица Арбат дом 7 квартира 17
"""

combinator = Combinator([Person, Organisation, Location])
# Определение Персоны
matches = combinator.resolve_matches(combinator.extract(text))
enjine_person = InterpretationEngine(PersonObject)
persons = list(enjine_person.extract(matches))
for i in range(len(persons)):
    print(
        'имя:',
        str(persons[i].firstname).split('\'')[1]
        if str(persons[i].firstname) != 'None' else 'None')
    print(
        'отчество:',
        str(persons[i].middlename).split('\'')[1]
        if str(persons[i].middlename) != 'None' else 'None')
    print(
        'фамилия:',
        str(persons[i].lastname).split('\'')[1]
        if str(persons[i].lastname) != 'None' else 'None')
    print(
Ejemplo n.º 6
0
 def setUp(self):
     self.combinator = natasha.Combinator([natasha.Address])
     self.engine = InterpretationEngine(AddressObject)
Ejemplo n.º 7
0
class PersonInterpretationTestCase(BaseTestCase):

    def setUp(self):
        self.engine = InterpretationEngine(PersonObject)
        super(PersonInterpretationTestCase, self).setUp()

    def test_get_person_object(self):
        matches = self.combinator.resolve_matches(
            self.combinator.extract('иванов иван иванович')
        )
        objects = list(
            self.engine.extract(matches)
        )
        self.assertEqual(len(objects), 1)
        self.assertEqual(objects[0].firstname.value, 'иван')
        self.assertEqual(objects[0].middlename.value, 'иванович')
        self.assertEqual(objects[0].lastname.value, 'иванов')
        self.assertEqual(objects[0].descriptor, None)

        matches = self.combinator.resolve_matches(
            self.combinator.extract('Владимир «Ленин» Ульянов')
        )
        objects = list(
            self.engine.extract(matches)
        )
        self.assertEqual(len(objects), 1)
        self.assertEqual(objects[0].firstname.value, 'Владимир')
        self.assertEqual(objects[0].middlename, None)
        self.assertEqual(objects[0].lastname.value, 'Ульянов')
        self.assertEqual(objects[0].nickname.value, 'Ленин')
        self.assertEqual(objects[0].descriptor, None)

    def test_get_person_gender(self):
        matches = self.combinator.resolve_matches(
            self.combinator.extract('канцлер ФРГ ангела меркель')
        )
        objects = list(
            self.engine.extract(matches)
        )
        self.assertEqual(len(objects), 1)
        self.assertEqual(objects[0].firstname.value, 'ангела')
        self.assertEqual(objects[0].lastname.value, 'меркель')
        self.assertEqual(objects[0].descriptor.value, 'канцлер')
        genders = sorted(objects[0].gender.most_common(2))
        self.assertEqual(genders, [
            ('femn', 1),
            ('masc', 1),
        ])

        matches = self.combinator.resolve_matches(
            self.combinator.extract('президент РФ владимир путин')
        )
        objects = list(
            self.engine.extract(matches)
        )
        self.assertEqual(len(objects), 1)
        self.assertEqual(objects[0].firstname.value, 'владимир')
        self.assertEqual(objects[0].lastname.value, 'путин')
        self.assertEqual(objects[0].descriptor.value, 'президент')
        genders = sorted(objects[0].gender.most_common(2))
        self.assertEqual(genders, [
            ('masc', 4),
        ])

        matches = self.combinator.resolve_matches(
            self.combinator.extract('пётр порошенко')
        )
        objects = list(
            self.engine.extract(matches)
        )
        self.assertEqual(len(objects), 1)
        self.assertEqual(objects[0].firstname.value, 'пётр')
        self.assertEqual(objects[0].lastname.value, 'порошенко')
        self.assertEqual(objects[0].descriptor, None)
        genders = sorted(objects[0].gender.most_common(2))
        self.assertEqual(genders, [
            ('masc', 1),
        ])

    def test_coreference_solving(self):
        text = 'Н. Н. Вертинская - Надежда Николаевна'
        spans = list(
            self.combinator.resolve_matches(
                self.combinator.extract(text)
            )
        )
        objects = list(
            self.engine.extract(spans)
        )

        self.assertEqual(len(objects), 2)
        self.assertEqual(objects[0], objects[1])

        merged = objects[0].merge(objects[1])

        self.assertEqual(merged.firstname.value, 'Надежда')
        self.assertEqual(merged.middlename.value, 'Николаевна')
        self.assertEqual(merged.lastname.value, 'Вертинская')

        text = 'Иван Иванович, а не Надежда Николаевна'
        spans = list(
            self.combinator.resolve_matches(
                self.combinator.extract(text)
            )
        )
        objects = list(
            self.engine.extract(spans)
        )

        self.assertEqual(len(objects), 2)
        self.assertNotEqual(objects[0], objects[1])