Example #1
0
class Brand(Enum):

    Latin = [{
        'labels': [
            gram('LATN'),
            is_capitalized(True),
        ],
        'repeatable': True,
        'normalization': NormalizationType.Original,
        'interpretation': {
            'attribute': [
                OrganisationObject.Attributes.Name,
            ]
        },
    }, {
        'labels': [
            gram('INT'),
        ],
        'optional': True,
        'normalization': NormalizationType.Original,
        'interpretation': {
            'attribute': [
                OrganisationObject.Attributes.Name,
            ]
        },
    }]

    WithConj = [
        Latin[0],
        {
            'labels': [
                in_({
                    '&',
                    '/',
                }),
            ],
            'normalization': NormalizationType.Original,
            'interpretation': {
                'attribute': [
                    OrganisationObject.Attributes.Name,
                ]
            },
        },
        Latin[0],
    ]

    Trademark = [{
        'labels': [
            gram('Trad'),
        ],
        'normalization': NormalizationType.Original,
        'interpretation': {
            'attribute': [
                OrganisationObject.Attributes.Name,
            ]
        },
    }]
Example #2
0
class ProbabilisticOrganisation(Enum):

    # "Коммерсантъ" сообщил ...
    NounQuoted = [
        {
            'labels': [
                gram('QUOTE'),
            ],
            'skip': True,
        },
        {
            'labels': [
                is_capitalized(True),
                gram_any({
                    'NOUN',
                    'ADJF',
                    'LATN',
                }),
            ],
            'repeatable': True,
            'normalization': NormalizationType.Original,
            'interpretation': {
                'attribute': OrganisationObject.Attributes.Name,
            },
        },
        {
            'labels': [
                gram('QUOTE'),
            ],
            'skip': True,
        },
    ]

    EducationalWithInitials = Organisation.Educational.value + NAMED_ORG_INITIALS_PREFIX_RULE + PROBABILISTIC_NAMED_ORG_INITIALS_RULE
    SocialWithInitials = Organisation.Social.value + NAMED_ORG_INITIALS_PREFIX_RULE + PROBABILISTIC_NAMED_ORG_INITIALS_RULE
    AdjSocialWithInitials = Organisation.AdjSocial.value + NAMED_ORG_INITIALS_PREFIX_RULE + PROBABILISTIC_NAMED_ORG_INITIALS_RULE
    AdjCommercialWithInitials = Organisation.AdjCommercial.value + NAMED_ORG_INITIALS_PREFIX_RULE + PROBABILISTIC_NAMED_ORG_INITIALS_RULE

    EducationalWithLastname = Organisation.Educational.value + NAMED_ORG_INITIALS_PREFIX_RULE + POSSIBLE_LASTNAME_GRAMMAR
    SocialWithLastname = Organisation.Social.value + NAMED_ORG_INITIALS_PREFIX_RULE + POSSIBLE_LASTNAME_GRAMMAR
    AdjSocialWithLastname = Organisation.AdjSocial.value + NAMED_ORG_INITIALS_PREFIX_RULE + POSSIBLE_LASTNAME_GRAMMAR
    AdjCommercialWithLastname = Organisation.AdjCommercial.value + NAMED_ORG_INITIALS_PREFIX_RULE + POSSIBLE_LASTNAME_GRAMMAR
Example #3
0
class Location(Enum):

    FederalDistrict = [
        {
            'labels': [
                gram('ADJF'),
                dictionary(FEDERAL_DISTRICT_DICTIONARY),
            ],
            'normalization': NormalizationType.Inflected,
            'interpretation': {
                'attribute': LocationObject.Attributes.Name,
            },
        },
        {
            'labels': [
                gnc_match(-1, solve_disambiguation=True),
                dictionary({
                    'федеральный',
                }),
            ],
            'normalization':
            NormalizationType.Inflected,
            'interpretation': {
                'attribute': LocationObject.Attributes.Descriptor,
            },
        },
        {
            'labels': [
                gnc_match(-1, solve_disambiguation=True),
                dictionary({
                    'округ',
                }),
            ],
            'normalization':
            NormalizationType.Inflected,
            'interpretation': {
                'attribute': LocationObject.Attributes.Descriptor,
            },
        },
    ]

    FederalDistrictAbbr = [
        {
            'labels': [
                gram('ADJF'),
                dictionary(FEDERAL_DISTRICT_DICTIONARY),
            ],
            'normalization': NormalizationType.Inflected,
            'interpretation': {
                'attribute': LocationObject.Attributes.Name,
            },
        },
        {
            'labels': [
                eq('ФО'),
            ],
            'interpretation': {
                'attribute': LocationObject.Attributes.Descriptor,
            },
        },
    ]

    AutonomousDistrict = [
        {
            'labels': [
                gram('ADJF'),
            ],
            'normalization': NormalizationType.Inflected,
            'interpretation': {
                'attribute': LocationObject.Attributes.Name,
            },
        },
        {
            'labels': [
                gnc_match(-1, solve_disambiguation=True),
                dictionary({
                    'автономный',
                }),
            ],
            'normalization':
            NormalizationType.Inflected,
            'interpretation': {
                'attribute': LocationObject.Attributes.Descriptor,
            },
        },
        {
            'labels': [
                gnc_match(-1, solve_disambiguation=True),
                dictionary({
                    'округ',
                }),
            ],
            'normalization':
            NormalizationType.Inflected,
            'interpretation': {
                'attribute': LocationObject.Attributes.Descriptor,
            },
        },
    ]

    AutonomousDistrictAbbr = [
        {
            'labels': [
                gram('ADJF'),
            ],
            'normalization': NormalizationType.Inflected,
            'interpretation': {
                'attribute': LocationObject.Attributes.Name,
            },
        },
        {
            'labels': [
                eq('АО'),
            ],
            'interpretation': {
                'attribute': LocationObject.Attributes.Descriptor,
            },
        },
    ]

    Region = [
        {
            'labels': [
                gram('ADJF'),
            ],
            'normalization': NormalizationType.Inflected,
            'interpretation': {
                'attribute': LocationObject.Attributes.Name,
            },
        },
        {
            'labels': [
                dictionary(REGION_TYPE_DICTIONARY),
                gnc_match(-1, solve_disambiguation=True),
            ],
            'normalization':
            NormalizationType.Inflected,
            'interpretation': {
                'attribute': LocationObject.Attributes.Descriptor,
            },
        },
    ]

    ComplexObject = [
        {
            'labels': [
                gram('ADJF'),
                dictionary(COMPLEX_OBJECT_PREFIX_DICTIONARY),
            ],
            'normalization': NormalizationType.Inflected,
            'interpretation': {
                'attribute': LocationObject.Attributes.Name,
            },
        },
        {
            'labels': [
                gram('NOUN'),
                gram('Geox'),
                gnc_match(-1, solve_disambiguation=True),
            ],
            'normalization':
            NormalizationType.Inflected,
            'interpretation': {
                'attribute': LocationObject.Attributes.Name,
            },
        },
    ]

    PartialObject = [
        {
            'labels': [
                gram('NOUN'),
                dictionary(PARTIAL_OBJECT_PREFIX_DICTIONARY),
            ],
            'normalization': NormalizationType.Inflected,
            'interpretation': {
                'attribute': LocationObject.Attributes.Name,
            },
        },
        {
            'labels': [
                gram('NOUN'),
                gram('Geox'),
                gnc_match(-1, solve_disambiguation=True),
            ],
            'normalization':
            NormalizationType.Inflected,
            'interpretation': {
                'attribute': LocationObject.Attributes.Name,
            },
        },
    ]

    # Донецкая народная республика / Российская Федерация
    AdjfFederation = [
        {
            'labels': [
                gram('ADJF'),
                is_capitalized(True),
            ],
            'normalization': NormalizationType.Inflected,
            'interpretation': {
                'attribute': LocationObject.Attributes.Name,
            },
        },
        {
            'labels': [
                gram('ADJF'),
                gnc_match(-1, solve_disambiguation=True),
            ],
            'optional': True,
            'repeatable': True,
            'normalization': NormalizationType.Inflected,
            'interpretation': {
                'attribute': LocationObject.Attributes.Name,
            },
        },
        {
            'labels': [
                gnc_match(0, solve_disambiguation=True),
                dictionary({
                    'федерация',
                    'республика',
                    'империя',
                }),
            ],
            'normalization':
            NormalizationType.Inflected,
            'interpretation': {
                'attribute': LocationObject.Attributes.Descriptor,
            },
        },
    ]

    # Соединенные Штаты / Соединенные Штаты Америки
    AdjxFederation = [{
        'labels': [
            gram('Adjx'),
            is_capitalized(True),
        ],
        'normalization': NormalizationType.Inflected,
        'interpretation': {
            'attribute': LocationObject.Attributes.Name,
        },
    }, {
        'labels': [
            gram('Adjx'),
            gnc_match(-1, solve_disambiguation=True),
        ],
        'optional':
        True,
        'repeatable':
        True,
        'normalization':
        NormalizationType.Inflected,
        'interpretation': {
            'attribute': LocationObject.Attributes.Name,
        },
    }, {
        'labels': [
            gnc_match(0, solve_disambiguation=True),
            dictionary({
                'штат',
                'эмират',
            }),
        ],
        'normalization':
        NormalizationType.Inflected,
        'interpretation': {
            'attribute': LocationObject.Attributes.Name,
        },
    }, {
        'labels': [
            gram('gent'),
        ],
        'optional': True,
        'normalization': NormalizationType.Inflected,
        'interpretation': {
            'attribute': LocationObject.Attributes.Name,
        },
    }]

    Object = [
        {
            'labels': [
                is_capitalized(True),
                gram('Geox'),
            ],
            'normalization': NormalizationType.Inflected,
            'interpretation': {
                'attribute': LocationObject.Attributes.Name,
            },
        },
    ]
Example #4
0
]

HOUSE_NUMBER_GRAMMAR = [
    OR(HOUSE_NUMBER_SHORT_GRAMMAR, HOUSE_NUMBER_FULL_GRAMMAR,
       HOUSE_NUMBER_FULL_GRAMMAR[-1:])
]

HOUSE_LETTER_FULL_GRAMMAR = [{
    'labels': [
        dictionary({
            'литер',
        }),
    ],
}, {
    'labels': [
        is_capitalized(True),
        length_eq(1),
    ],
    'normalization': NormalizationType.Original,
    'interpretation': {
        'attribute': AddressObject.Attributes.House_Number_Letter
    },
}]

HOUSE_LETTER_SHORT_GRAMMAR = [
    {
        'labels': [
            or_((
                eq('лит'),  # литер
                eq('л'),
            )),
Example #5
0
class ProbabilisticPerson(Enum):
    '''
    This grammars matches words that looks like (but may not to be) person names
    Not included in natasha DEFAULT_GRAMMARS, but shows good result on factRuEval-16 testset
    '''

    FirstnameAndLastname = [
        Person.Firstname.value[0],
        POSSIBLE_LASTNAME_GRAMMAR,
    ]

    InitialsAndLastname = Person.InitialsAndLastname.value[:4] + [
        POSSIBLE_LASTNAME_GRAMMAR,
    ]

    LastnameAndInitials = [POSSIBLE_LASTNAME_GRAMMAR
                           ] + Person.InitialsAndLastname.value[:4]

    FirstnameAsInitialsAndLastname = Person.InitialsAndLastname.value[:2] + [
        POSSIBLE_LASTNAME_GRAMMAR,
    ]

    LastnameAndfirstnameAsInitials = [
        POSSIBLE_LASTNAME_GRAMMAR,
    ] + Person.InitialsAndLastname.value[:2]

    # Джон Х. Доу
    FirstnameAndMiddlenameAsInitialsWithLastname = FirstnameAndLastname[:1] + Person.InitialsAndLastname.value[
        2:4] + [
            POSSIBLE_LASTNAME_GRAMMAR,
        ]

    FirstnameAndLastnameWithNobilityParticle = [
        Person.Firstname.value[0],
        {
            'labels': [
                dictionary(NAME_NOBILITY_PARTICLE_DICTIONARY),
            ],
            'normalization': NormalizationType.Original,
            'interpretation': {
                'attribute': PersonObject.Attributes.Lastname,
            }
        },
        POSSIBLE_LASTNAME_GRAMMAR,
    ]

    FirstnameAndLastnameWithPosition = Person.WithPosition.value[:-1] + [
        FirstnameAndLastname[-1]
    ]

    # Эрнесто «Че» Гевара
    FirstnameAndLastnameWithQuotedNickname = Person.Firstname.value[:1] + Person.FirstnameAndLastnameWithQuotedNickname.value[
        1:-1] + [
            POSSIBLE_LASTNAME_GRAMMAR,
        ]

    # С.П. фон Дервиз
    InitialsAndLastnameWithNobilityParticle = InitialsAndLastname[:4] + [
        FirstnameAndLastnameWithNobilityParticle[1],
        POSSIBLE_LASTNAME_GRAMMAR,
    ]

    # John S. Doe
    Latin = [
        {
            'labels': [
                gram('LATN'),
                is_capitalized(True),
            ],
            'normalization': NormalizationType.Original,
            'interpretation': {
                'attribute': PersonObject.Attributes.Firstname,
            }
        },
        {
            'labels': [
                gram('LATN'),
                is_capitalized(True),
            ],
            'normalization': NormalizationType.Original,
            'interpretation': {
                'attribute': PersonObject.Attributes.Middlename,
            }
        },
        {
            'labels': [gram('PUNCT'), eq('.')],
        },
        {
            'labels': [
                gram('LATN'),
                is_capitalized(True),
            ],
            'normalization': NormalizationType.Original,
            'interpretation': {
                'attribute': PersonObject.Attributes.Lastname,
            }
        },
    ]
Example #6
0
class Person(Enum):

    # Иванов Иван Иванович
    Full = [
        {
            'labels': [
                gram('Surn'),
                gram_not('Abbr'),
            ],
            'normalization': NormalizationType.Inflected,
            'interpretation': {
                'attribute': PersonObject.Attributes.Lastname,
            },
        },
        {
            'labels': [
                gram('Name'),
                gram_not('Abbr'),
                gnc_match(-1, solve_disambiguation=True),
            ],
            'normalization':
            NormalizationType.Inflected,
            'interpretation': {
                'attribute': PersonObject.Attributes.Firstname,
            },
        },
        {
            'labels': [
                gram('Patr'),
                gram_not('Abbr'),
                gnc_match(-1, solve_disambiguation=True),
            ],
            'normalization':
            NormalizationType.Inflected,
            'interpretation': {
                'attribute': PersonObject.Attributes.Middlename,
            },
        },
    ]

    # Иван Иванович Иванов
    FullReversed = [
        {
            'labels': [
                gram('Name'),
                gram_not('Abbr'),
            ],
            'normalization': NormalizationType.Inflected,
            'interpretation': {
                'attribute': PersonObject.Attributes.Firstname,
            },
        },
        {
            'labels': [
                gram('Patr'),
                gram_not('Abbr'),
                gnc_match(-1, solve_disambiguation=True),
            ],
            'normalization':
            NormalizationType.Inflected,
            'interpretation': {
                'attribute': PersonObject.Attributes.Middlename,
            },
        },
        {
            'labels': [
                gram('Surn'),
                gram_not('Abbr'),
                gnc_match(-1, solve_disambiguation=True),
            ],
            'normalization':
            NormalizationType.Inflected,
            'interpretation': {
                'attribute': PersonObject.Attributes.Lastname,
            },
        },
    ]

    # Фелипе Родригес Фернандес
    # https://www.englishelp.ru/business-english/other/284-patronymic-vs-middle-name.html

    FullReversedWithLatinMiddlename = [
        {
            'labels': [
                gram('Name'),
            ],
            'normalization': NormalizationType.Inflected,
            'interpretation': {
                'attribute': PersonObject.Attributes.Firstname,
            },
        },
        {
            'labels': [
                gram('Name'),
                gnc_match(-1, solve_disambiguation=True),
            ],
            'repeatable': True,
            'normalization': NormalizationType.Inflected,
            'interpretation': {
                'attribute': PersonObject.Attributes.Middlename,
            },
        },
        {
            'labels': [
                gram('Surn'),
                gnc_match(0, solve_disambiguation=True),
                gnc_match(-1, solve_disambiguation=True),
            ],
            'normalization':
            NormalizationType.Inflected,
            'interpretation': {
                'attribute': PersonObject.Attributes.Lastname,
            },
        },
    ]

    # Л. А. Раневская
    InitialsAndLastname = [
        {
            'labels': [
                gram_in(['Name', 'Abbr']),
            ],
            'interpretation': {
                'attribute': PersonObject.Attributes.Firstname,
            },
            'normalization': NormalizationType.Original,
        },
        {
            'labels': [
                gram('PUNCT'),
                eq('.'),
            ],
        },
        {
            'labels': [
                gram_in(['Patr', 'Abbr']),
                gnc_match(0, solve_disambiguation=True),
            ],
            'interpretation': {
                'attribute': PersonObject.Attributes.Middlename,
            },
            'normalization':
            NormalizationType.Original,
        },
        {
            'labels': [
                gram('PUNCT'),
                eq('.'),
            ],
        },
        {
            'labels': [
                gram('Surn'),
                gram_not('Abbr'),
                gnc_match(0, solve_disambiguation=True),
            ],
            'normalization':
            NormalizationType.Inflected,
            'interpretation': {
                'attribute': PersonObject.Attributes.Lastname,
            },
        },
    ]

    # Иван Иванов
    FirstnameAndLastname = [
        {
            'labels': [
                gram('Name'),
                gram_not('Abbr'),
            ],
            'normalization': NormalizationType.Inflected,
            'interpretation': {
                'attribute': PersonObject.Attributes.Firstname,
            },
        },
        {
            'labels': [
                gram('Surn'),
                gram_not('Abbr'),
                gnc_match(-1, solve_disambiguation=True),
            ],
            'normalization':
            NormalizationType.Inflected,
            'interpretation': {
                'attribute': PersonObject.Attributes.Lastname,
            },
        },
    ]

    FirstnameAndLastnameWithQuotedNickname = [
        FirstnameAndLastname[0], {
            'labels': {
                gram('QUOTE'),
                gram_any({
                    'G-QUOTE',
                    'L-QUOTE',
                }),
            },
            'normalization': NormalizationType.Original,
        }, {
            'labels': {
                gram_not_in({
                    'QUOTE',
                    'PUNCT',
                }),
            },
            'normalization': NormalizationType.Original,
            'interpretation': {
                'attribute': PersonObject.Attributes.Nickname,
            },
        }, {
            'labels': {
                gram('QUOTE'),
            },
            'normalization': NormalizationType.Original,
        }, InitialsAndLastname[-1]
    ]

    # Иванов Иван
    LastnameAndFirstname = [
        {
            'labels': [
                gram('Surn'),
                gram_not('Abbr'),
            ],
            'normalization': NormalizationType.Inflected,
            'interpretation': {
                'attribute': PersonObject.Attributes.Lastname,
            },
        },
        {
            'labels': [
                gram('Name'),
                gram_not('Abbr'),
                gnc_match(-1, solve_disambiguation=True),
            ],
            'normalization':
            NormalizationType.Inflected,
            'interpretation': {
                'attribute': PersonObject.Attributes.Firstname,
            },
        },
    ]

    # Александр Ф. Скляр
    FullReversedWithMiddlenameAsInitials = FullReversed[:
                                                        1] + InitialsAndLastname[
                                                            3:]

    # Раневская Л. А.
    LastnameAndInitials = [
        LastnameAndFirstname[0],
    ] + InitialsAndLastname[:4]

    # Раневская Л.
    LastnameAndFirstnameAsInitials = [
        LastnameAndFirstname[0],
    ] + InitialsAndLastname[:2]

    # Л. Раневская
    FirstnameAsInitialsAndLastname = InitialsAndLastname[:2] + [
        InitialsAndLastname[-1],
    ]

    # Иван Иванович
    FirstnameAndMiddlename = [
        {
            'labels': [
                gram('Name'),
                gram_not('Abbr'),
            ],
            'normalization': NormalizationType.Inflected,
            'interpretation': {
                'attribute': PersonObject.Attributes.Firstname,
            },
        },
        {
            'labels': [
                gram('Patr'),
                gram_not('Abbr'),
                gnc_match(-1, solve_disambiguation=True),
            ],
            'normalization':
            NormalizationType.Inflected,
            'interpretation': {
                'attribute': PersonObject.Attributes.Middlename,
            },
        },
    ]

    # Иванов
    Lastname = [
        {
            'labels': [
                gram('Surn'),
                gram_any({
                    'sing',
                    'Stgm',
                }),
                gram_not('Abbr'),
                is_capitalized(True),
            ],
            'normalization':
            NormalizationType.Inflected,
            'interpretation': {
                'attribute': PersonObject.Attributes.Lastname,
            },
        },
    ]

    # Иванович
    Middlename = [{
        'labels': [
            gram('Patr'),
            gram_any({
                'sing',
                'Stgm',
            }),
            gram_not('Abbr'),
            is_capitalized(True),
        ],
        'normalization':
        NormalizationType.Inflected,
        'interpretation': {
            'attribute': PersonObject.Attributes.Middlename,
        },
    }]

    # Иван
    Firstname = [
        {
            'labels': [
                gram('Name'),
                gram_any({
                    'sing',
                    'Stgm',
                }),
                gram_not('Abbr'),
                is_capitalized(True),
            ],
            'normalization':
            NormalizationType.Inflected,
            'interpretation': {
                'attribute': PersonObject.Attributes.Firstname,
            },
        },
    ]

    # Отто фон Бисмарк
    FirstnameAndLastnameWithNobilityParticle = [
        FullReversed[0],
        {
            'labels': [
                dictionary(NAME_NOBILITY_PARTICLE_DICTIONARY),
            ],
            'normalization': NormalizationType.Original,
            'interpretation': {
                'attribute': PersonObject.Attributes.Lastname,
            }
        },
        {
            'labels': [
                gram('Surn'),
                gram_not('Abbr'),
                gnc_match(0, solve_disambiguation=True)
            ],
            'normalization':
            NormalizationType.Inflected,
            'interpretation': {
                'attribute': PersonObject.Attributes.Lastname,
            },
        },
    ]

    # Премьер-министр РФ Дмитрий Медведев
    WithPosition = [
        {
            'labels': [
                gram('Person/Position'),
            ],
            'interpretation': {
                'attribute': PersonObject.Attributes.Descriptor,
            },
        },
        {
            'labels': [
                or_((
                    and_((
                        or_((
                            gram_any({
                                'ablt',
                                'loct',
                                'gent',
                            }),
                            gram('Fixd'),
                        )),
                        gram_not_in({
                            'Name',
                            'Patr',
                            'Surn',
                        }),
                    )),
                    gram('Abbr'),
                    gram('LATN'),
                )),
            ],
            'optional':
            True,
            'repeatable':
            True,
            'normalization':
            NormalizationType.Original,
        },
        {
            'labels': [
                gram('Name'),
                case_match(0, solve_disambiguation=True),
                number_match(0, solve_disambiguation=True),
            ],
            'normalization':
            NormalizationType.Inflected,
            'interpretation': {
                'attribute': PersonObject.Attributes.Firstname,
            },
        },
        {
            'labels': [
                gram('Patr'),
                case_match(0, solve_disambiguation=True),
                number_match(0, solve_disambiguation=True),
                gnc_match(-1, solve_disambiguation=True),
            ],
            'optional':
            True,
            'normalization':
            NormalizationType.Inflected,
            'interpretation': {
                'attribute': PersonObject.Attributes.Middlename,
            },
        },
        {
            'labels': [
                gram('Surn'),
                case_match(0, solve_disambiguation=True),
                number_match(0, solve_disambiguation=True),
                gnc_match(-1, solve_disambiguation=True),
            ],
            'normalization':
            NormalizationType.Inflected,
            'interpretation': {
                'attribute': PersonObject.Attributes.Lastname,
            },
        },
    ]

    # Пресс-секретарь «Роснефти» Михаил Леонтьев
    WithPositionAndQuotedOrganisationName = (WithPosition[:2] + [
        {
            'labels': [
                gram('QUOTE'),
            ],
            'normalization': NormalizationType.Original,
        },
        {
            'labels': [
                gram_not('QUOTE'),
                gram_not_in('END-OF-LINE'),
            ],
            'repeatable': True,
            'normalization': NormalizationType.Original,
        },
        {
            'labels': [
                gram('QUOTE'),
            ],
            'normalization': NormalizationType.Original,
        },
    ] + WithPosition[2:])

    # граф де Кристо
    PositionAndNobilitySurname = [{
        'labels': [
            gram('Person/Position'),
        ],
    }, {
        'labels': [
            dictionary(NAME_NOBILITY_PARTICLE_DICTIONARY),
        ],
    }, {
        'labels': [
            gram('Surn'),
            gnc_match(0, solve_disambiguation=True),
        ],
        'interpretation': {
            'attribute': PersonObject.Attributes.Lastname,
        },
    }]

    # Генрих Восьмой / Карл XII
    NameWithNumericPart = [{
        'labels': [
            gram_in({
                'Name',
                'sing',
            }),
            gram_not('Abbr'),
        ],
        'interpretation': {
            'attribute': PersonObject.Attributes.Firstname,
        },
    }, {
        'labels': [
            or_((
                and_((gram_in({
                    'ADJF',
                    'Anum',
                }), gnc_match(-1, solve_disambiguation=True))),
                gram('ROMN'),
            ))
        ]
    }]
Example #7
0
class Organisation(Enum):

    OfficialQuoted = [
        {
            'labels': [
                or_((
                    gram('Orgn/Commercial'),
                    gram('Orgn/Social'),
                    gram('Orgn/Abbr'),
                )),
            ],
            'normalization':
            NormalizationType.Inflected,
            'interpretation': {
                'attribute': OrganisationObject.Attributes.Descriptor,
            },
        },
        {
            'labels': [
                is_abbr(True),
            ],
            'optional': True,
            'repeatable': True,
            'normalization': NormalizationType.Original,
            'interpretation': {
                'attribute': OrganisationObject.Attributes.Descriptor,
            },
        },
        {
            'labels': [
                gram('QUOTE'),
            ],
        },
        {
            'labels': [
                gram_not('QUOTE'),
                gram_not_in('END-OF-LINE'),
                not_eq('.'),
            ],
            'repeatable': True,
            'normalization': NormalizationType.Original,
            'interpretation': {
                'attribute': OrganisationObject.Attributes.Name,
            },
        },
        {
            'labels': [
                gram('QUOTE'),
            ],
        },
    ]

    Abbr = [
        {
            'labels': [
                gram('Abbr'),
                gram('Orgn'),
                gram_not('Orgn/Abbr'),
            ],
            'normalization': NormalizationType.Original,
            'interpretation': {
                'attribute': OrganisationObject.Attributes.Name,
            },
        },
    ]

    IndividualEntrepreneur = [
        {
            'labels': [
                eq('ИП'),
            ],
            'normalization': NormalizationType.Original,
        },
        Person.Full.value[0],
        Person.Full.value[1],
        Person.Full.value[2],
    ]

    SimpleLatin = [
        {
            'labels': [
                gram('Orgn/Commercial'),
            ],
            'normalization': NormalizationType.Normalized,
            'interpretation': {
                'attribute': OrganisationObject.Attributes.Descriptor,
            },
        },
        {
            'labels': [
                gram_any({
                    'LATN',
                    'NUMBER',
                }),
            ],
            'repeatable': True,
            'normalization': NormalizationType.Original,
            'interpretation': {
                'attribute': OrganisationObject.Attributes.Name,
            },
        },
    ]

    # Санкт-Петербургский Государственный университет
    Educational = [{
        'labels': [
            gram('ADJF'),
            is_capitalized(True),
        ],
        'normalization': NormalizationType.Inflected,
        'interpretation': {
            'attribute': OrganisationObject.Attributes.Name,
        },
    }, {
        'labels': [
            gram('ADJF'),
            gnc_match(-1, solve_disambiguation=True),
        ],
        'optional':
        True,
        'repeatable':
        True,
        'normalization':
        NormalizationType.Inflected,
        'interpretation': {
            'attribute': OrganisationObject.Attributes.Name,
        },
    }, {
        'labels': [
            gram('Orgn/Educational'),
            gnc_match(0, solve_disambiguation=True),
            gnc_match(-1, solve_disambiguation=True),
        ],
        'normalization':
        NormalizationType.Normalized,
        'interpretation': {
            'attribute': OrganisationObject.Attributes.Descriptor,
        },
    }, {
        'labels': [
            gram_any({
                'ablt',
                'gent',
            }),
            gram_not_in({
                'PREP',
            }),
            dictionary_not({
                'имя',
            }),
        ],
        'normalization':
        NormalizationType.Original,
        'interpretation': {
            'attribute': OrganisationObject.Attributes.Name,
        },
        'optional':
        True,
        'repeatable':
        True,
    }]

    # Публичная библиотека имени М. Е. Салтыкова-Щедрина
    EducationalWithInitials = Educational + NAMED_ORG_INITIALS_PREFIX_RULE + NAMED_ORG_INITIALS_RULE
    # Публичная библиотека имени Салтыкова-Щедрина
    EducationalWithLastname = Educational + NAMED_ORG_INITIALS_PREFIX_RULE + LASTNAME_GRAMMAR

    # Кировский завод
    AdjCommercial = Educational[:2] + [
        {
            'labels': [
                gram('Orgn/Commercial'),
                gnc_match(0, solve_disambiguation=True),
                gnc_match(-1, solve_disambiguation=True),
            ],
            'normalization':
            NormalizationType.Normalized,
            'interpretation': {
                'attribute': OrganisationObject.Attributes.Descriptor,
            },
        },
        Educational[-1],
    ]

    AdjCommercialWithInitials = AdjCommercial + NAMED_ORG_INITIALS_PREFIX_RULE + NAMED_ORG_INITIALS_RULE
    AdjCommercialWithLastname = AdjCommercial + NAMED_ORG_INITIALS_PREFIX_RULE + LASTNAME_GRAMMAR

    # Общества андрологии и сексуальной медицины
    Social = [
        {
            'labels': [
                gram('Orgn/Social'),
                gram('sing'),
            ],
            'normalization': NormalizationType.Inflected,
            'interpretation': {
                'attribute': OrganisationObject.Attributes.Descriptor,
            },
        },
        {
            'labels': [
                gram_not_in({
                    'PREP',
                    'CONJ',
                }),
                gram_any({
                    'accs',
                    'datv',
                    'gent',
                }),
            ],
            'normalization':
            NormalizationType.Original,
            'interpretation': {
                'attribute': OrganisationObject.Attributes.Name,
            },
        },
        {
            'labels': [
                gram_any({
                    'gent',
                    'accs',
                    'ablt',
                }),
                gram_not_in({
                    'PREP',
                    'Name',
                    'Patr',
                    'Surn',
                }),
            ],
            'optional':
            True,
            'repeatable':
            True,
            'normalization':
            NormalizationType.Original,
            'interpretation': {
                'attribute': OrganisationObject.Attributes.Name,
            },
        },
    ]

    SocialWithInitials = Social + NAMED_ORG_INITIALS_PREFIX_RULE + NAMED_ORG_INITIALS_RULE
    SocialWithLastname = Social + NAMED_ORG_INITIALS_PREFIX_RULE + LASTNAME_GRAMMAR

    AdjSocial = [
        {
            'labels': [
                gram('ADJF'),
            ],
            'normalization': NormalizationType.Inflected,
            'interpretation': {
                'attribute': OrganisationObject.Attributes.Name,
            },
        },
        {
            'labels': [
                gram('ADJF'),
                gnc_match(-1, solve_disambiguation=True),
            ],
            'optional': True,
            'repeatable': True,
            'normalization': NormalizationType.Inflected,
            'interpretation': {
                'attribute': OrganisationObject.Attributes.Name,
            },
        },
        {
            'labels': [
                gram('Orgn/Social'),
                gnc_match(0, solve_disambiguation=True),
                gnc_match(-1, solve_disambiguation=True),
            ],
            'interpretation': {
                'attribute': OrganisationObject.Attributes.Descriptor,
            },
        },
        Social[-1],
    ]

    AdjSocialWithInitials = AdjSocial + NAMED_ORG_INITIALS_PREFIX_RULE + NAMED_ORG_INITIALS_RULE
    AdjSocialWithLastname = AdjSocial + NAMED_ORG_INITIALS_PREFIX_RULE + LASTNAME_GRAMMAR