Example #1
0
def test_person():
    Name = fact(
        'Name',
        ['first', 'last'],
    )
    Person = fact('Person', ['position', 'name'])

    LAST = and_(
        gram('Surn'),
        not_(gram('Abbr')),
    )
    FIRST = and_(
        gram('Name'),
        not_(gram('Abbr')),
    )

    class PositionPipeline(MorphPipeline):
        grammemes = {'Position'}
        keys = ['управляющий директор', 'вице-мэр']

    POSITION = gram('Position')

    gnc = gnc_relation()

    NAME = rule(
        FIRST.match(gnc).interpretation(Name.first),
        LAST.match(gnc).interpretation(Name.last)).interpretation(Name)

    PERSON = rule(POSITION.interpretation(Person.position),
                  NAME.interpretation(Person.name)).interpretation(Person)

    parser = Parser(PERSON, pipelines=[PositionPipeline()])

    matches = list(parser.match('управляющий директор Иван Ульянов'))
    assert len(matches) == 1

    assert matches[0].fact == Person(position='управляющий директор',
                                     name=Name(first='Иван', last='Ульянов'))
Example #2
0
# coding: utf-8
from __future__ import unicode_literals

from yargy import (rule, and_, or_, fact)
from yargy.predicates import (
    eq,
    gte,
    lte,
    dictionary,
    normalized,
)

Date = fact('Date', ['year', 'month', 'day'])

MONTHS = {
    'январь': 1,
    'февраль': 2,
    'март': 3,
    'апрель': 4,
    'май': 5,
    'июнь': 6,
    'июль': 7,
    'август': 8,
    'сентябрь': 9,
    'октябрь': 10,
    'ноябрь': 11,
    'декабрь': 12,
}

MONTH_NAME = dictionary(MONTHS).interpretation(Date.month.normalized())
Example #3
0
# coding: utf-8
from __future__ import unicode_literals
from yargy import (rule, fact, not_, and_, or_, attribute,)
from yargy.predicates import (gram, caseless, normalized, is_title, dictionary, custom,)
#from yargy.relations import (gnc_relation, case_relation,)

## 1 - FACT INIT
DateRelative = fact('DateRelative', ['name'])
from natasha.dictionaries.daterelative import DATERELATIVE_DICT
###

### 2 - INIT GRAMS & GRAM RULES (pymorphy2)
ADJF = gram('ADJF')
NOUN = gram('NOUN')
INT = gram('INT')
TITLE = is_title()

###


### 1-ST RING RULES
R1_SIMPLE = rule(
   DATERELATIVE_DICT,
).repeatable()
###


### 2-ST RING RULES

###
    
Example #4
0
# coding: utf-8
from __future__ import unicode_literals

from yargy import (rule, and_, or_, not_, fact)

from yargy.predicates import (caseless, normalized, eq, length_eq, gram,
                              dictionary, is_single, is_title)

from yargy.relations import gnc_relation

Location = fact(
    'Location',
    ['name'],
)

gnc = gnc_relation()

REGION = rule(
    gram('ADJF').match(gnc),
    dictionary({
        'край',
        'район',
        'область',
        'губерния',
        'уезд',
    }).match(gnc),
).interpretation(Location.name.inflected())

gnc1 = gnc_relation()
gnc2 = gnc_relation()
Example #5
0
# coding: utf-8
from __future__ import unicode_literals

from yargy import (rule, and_, or_, fact)
from yargy.predicates import (eq, in_, gram, normalized, caseless)

Money = fact('Money', ['amount', 'currency'])

EURO = normalized('евро')

DOLLARS = or_(normalized('доллар'), eq('$'))

RUBLES = or_(rule(normalized('рубль')),
             rule(or_(caseless('руб'), caseless('р')),
                  eq('.').optional()))

CURRENCY = or_(rule(EURO), rule(DOLLARS),
               RUBLES).interpretation(Money.currency)

INT = gram('INT')

AMOUNT_ = or_(
    rule(INT),
    rule(INT, INT),
    rule(INT, INT, INT),
    rule(INT, '.', INT),
    rule(INT, '.', INT, '.', INT),
)

FRACTION_AMOUN = rule(AMOUNT_, in_({',', '.'}), INT)
Example #6
0
# coding: utf-8
from __future__ import unicode_literals

from yargy import Parser, rule, fact
from yargy.predicates import gram, dictionary

Money = fact('Money', ['count', 'base', 'currency'])


def test_constant_attribute():
    MONEY_RULE = rule(
        gram('INT').interpretation(Money.count),
        dictionary({'тысяча'}).interpretation(Money.base.const(10**3)),
        dictionary({'рубль', 'доллар'}).interpretation(Money.currency),
    ).interpretation(Money)

    parser = Parser(MONEY_RULE)
    matches = list(parser.match('1 тысяча рублей'))
    assert matches[0].fact == Money(count=1, base=1000, currency='рублей')
Example #7
0
    normalized,
    is_capitalized,
)

from yargy.relations import (
    gnc_relation,
    case_relation,
)
from yargy.pipelines import MorphPipeline


from natasha.grammars.name import NAME_
from natasha.grammars.person import PERSON_


Organisation = fact('Organisation', ['name'])


class OrganisationTypePipeline(MorphPipeline):
    grammemes = {'OrganisationType'}
    keys = [
        'АО',
        'ОАО',
        'ООО',
        'ЗАО',
        'ПАО',

        # TODO Check abbrs
        # 'ик',
        # 'нк',
        # 'хк',
Example #8
0
# coding: utf-8
from __future__ import unicode_literals
from yargy import (rule, fact, not_, and_, or_, attribute,)
from yargy.predicates import (gram, caseless, normalized, is_title, dictionary, custom,)
#from yargy.relations import (gnc_relation, case_relation,)

## 1 - FACT INIT
Enum = fact('Enum', ['name'])
from natasha.dictionaries.enum import ENUM_DICT
###

### 2 - INIT GRAMS & GRAM RULES (pymorphy2)
ADJF = gram('ADJF')
NOUN = gram('NOUN')
INT = gram('INT')
TITLE = is_title()

###


### 1-ST RING RULES
R1_SIMPLE = rule(
   ENUM_DICT,
).repeatable()
###


### 2-ST RING RULES

###
    
Example #9
0
# coding: utf-8
from __future__ import unicode_literals

from yargy import (rule, fact, attribute, or_, and_)
from yargy.predicates import (eq, lte, gte, gram, length_eq, in_, in_caseless,
                              dictionary, normalized, caseless, is_title)
from yargy.pipelines import (MorphPipeline)

Address = fact('Address', [attribute('parts').repeatable()])
Index = fact('Index', ['value'])
Country = fact('Country', ['name'])
Region = fact('Region', ['name', 'type'])
Settlement = fact('Settlement', ['name', 'type'])
Street = fact('Street', ['name', 'type'])
Building = fact('Building', ['number', 'type'])
Room = fact('Room', ['number', 'type'])

DASH = eq('-')
DOT = eq('.')

ADJF = gram('ADJF')
NOUN = gram('NOUN')
INT = gram('INT')
TITLE = is_title()

ANUM = rule(INT, DASH.optional(),
            in_caseless({'я', 'й', 'е', 'ое', 'ая', 'ий', 'ой'}))

#########
#
#  STRANA
Example #10
0
# coding: utf-8
from __future__ import unicode_literals

from yargy import (rule, fact, or_)
from yargy.predicates import gram
from yargy.pipelines import MorphPipeline

from .name import (Name, FIRST_LAST, LAST_FIRST, TITLE_FIRST_LAST,
                   TITLE_LAST_FIRST, ABBR_FIRST_LAST, LAST_ABBR_FIRST,
                   ABBR_FIRST_MIDDLE_LAST, LAST_ABBR_FIRST_MIDDLE,
                   TITLE_FIRST_MIDDLE, TITLE_FIRST_MIDDLE_LAST,
                   TITLE_LAST_FIRST_MIDDLE, JUST_FIRST, JUST_LAST)

Person = fact('Person', ['position', 'name'])


class PositionsPipeline(MorphPipeline):
    grammemes = {'Position'}
    keys = [
        'святой',
        'патриарх',
        'митрополит',
        'царь',
        'король',
        'царица',
        'император',
        'императрица',
        'принц',
        'принцесса',
        'князь',
        'граф',
Example #11
0
# coding: utf-8
from __future__ import unicode_literals

from yargy import (rule, and_, or_, not_, fact)
from yargy.predicates import (eq, length_eq, gram, dictionary, is_single,
                              is_capitalized)
from yargy.relations import gnc_relation

from natasha.data import load_lines

Name = fact('Name', ['first', 'last', 'middle', 'nick'])

FIRST_DICT = set(load_lines('first.txt'))
MAYBE_FIRST_DICT = set(load_lines('maybe_first.txt'))
LAST_DICT = set(load_lines('last.txt'))
MAYBE_LAST_DICT = set(load_lines('maybe_last.txt'))

##########
#
#  COMPONENTS
#
###########

IS_FIRST = dictionary(FIRST_DICT)

MAYBE_FIRST = or_(
    and_(
        gram('Name'),
        not_(gram('Abbr'))  # А. Леонидов
    ),
    dictionary(MAYBE_FIRST_DICT))
Example #12
0
# coding: utf-8
from __future__ import unicode_literals
from yargy import (rule, fact, not_, and_, or_, attribute,)
from yargy.predicates import (gram, caseless, normalized, is_title, dictionary, custom,)
#from yargy.relations import (gnc_relation, case_relation,)

## 1 - FACT INIT
Country = fact('Country', ['name'])
from natasha.dictionaries.country import COUNTRY_DICT
###

### 2 - INIT GRAMS & GRAM RULES (pymorphy2)
ADJF = gram('ADJF')
NOUN = gram('NOUN')
INT = gram('INT')
TITLE = is_title()

###


### 1-ST RING RULES
R1_SIMPLE = rule(
   COUNTRY_DICT,
).repeatable()
###


### 2-ST RING RULES

###