コード例 #1
0
def test_type_errors():
    F = fact('F', ['a'])
    RULE = rule(
        'a',
        eq('1').interpretation(
            custom(int)
        )
    ).interpretation(
        F.a
    )
    parser = Parser(RULE)
    match = parser.match('a 1')
    with pytest.raises(TypeError):
        match.fact

    F = fact('F', ['a'])
    RULE = rule(
        'a',
        eq('1').interpretation(
            custom(int)
        )
    ).interpretation(
        custom(str)
    )
    parser = Parser(RULE)
    match = parser.match('a 1')
    with pytest.raises(TypeError):
        match.fact
コード例 #2
0
def test_nested_facts():
    F = fact('F', ['a'])
    G = fact('G', ['b'])
    RULE = rule(eq('a').interpretation(F.a)).interpretation(F).interpretation(
        G.b).interpretation(G)
    parser = Parser(RULE)
    match = parser.match('a')
    record = match.fact
    assert record == G(b=F(a='a'))
    assert record.spans == [(0, 1)]
    assert record.as_json == {'b': {'a': 'a'}}
コード例 #3
0
def test_pipeline_key():
    from yargy.pipelines import morph_pipeline

    pipeline = morph_pipeline([
        'закрытое общество',
        'завод'
    ])

    F = fact('F', ['a'])

    RULE = pipeline.interpretation(
        F.a.normalized()
    ).interpretation(
        F
    )
    parser = Parser(RULE)
    match = parser.match('закрытом обществе')
    record = match.fact
    assert record == F(a='закрытое общество')

    RULE = pipeline.interpretation(
        normalized()
    )
    parser = Parser(RULE)
    match = parser.match('заводе')
    value = match.fact
    assert value == 'завод'
コード例 #4
0
class ConsoleGame:
    __game = fact(
        'Game',
        ['name', 'version_number', 'version_name', 'console']
    )
    __amount_of_games = []

    def __init__(self, names: list = [], version_numbers: list = [], version_names: list = [], consoles: list = []):
        rules = rule(morph_pipeline(names).interpretation(self.__game.name.const(names[0])),
                     morph_pipeline(version_numbers).interpretation(self.__game.version_number).optional(),
                     morph_pipeline(version_names).interpretation(self.__game.version_name).optional(),
                     morph_pipeline(consoles).interpretation(self.__game.console).optional())
        game = or_(rules).interpretation(self.__game)
        self.parser = Parser(game)

    def matches(self, data):
        matches = []

        for sent in data.text[:9000]:
            for match in self.parser.findall(sent):
                matches.append(match.fact)
                self.__amount_of_games.append(matches)

        for m in matches:
            print(m.name, m.version_number, m.version_name, m.console)

        print(len(self.__amount_of_games))
コード例 #5
0
def test_type_errors():
    F = fact('F', ['a'])
    RULE = rule('a', eq('1').interpretation(custom(int))).interpretation(F.a)
    parser = Parser(RULE)
    match = parser.match('a 1')
    with pytest.raises(TypeError):
        match.fact

    F = fact('F', ['a'])
    RULE = rule('a',
                eq('1').interpretation(custom(int))).interpretation(
                    custom(str))
    parser = Parser(RULE)
    match = parser.match('a 1')
    with pytest.raises(TypeError):
        match.fact
コード例 #6
0
def test_pipeline_key():
    from yargy import or_
    from yargy.pipelines import morph_pipeline

    pipeline = morph_pipeline([
        'закрытое общество',
        'завод'
    ])

    F = fact('F', ['a'])

    RULE = pipeline.interpretation(
        F.a.normalized()
    ).interpretation(
        F
    )
    parser = Parser(RULE)
    match = parser.match('закрытом обществе')
    record = match.fact
    assert record == F(a='закрытое общество')

    RULE = pipeline.interpretation(
        normalized()
    )
    parser = Parser(RULE)
    match = parser.match('заводе')
    value = match.fact
    assert value == 'завод'
コード例 #7
0
def test_name():
    Name = fact(
        'Name',
        ['first', 'last']
    )

    gnc = gnc_relation()

    FIRST = gram('Name').interpretation(
        Name.first.inflected()
    ).match(gnc)

    LAST = gram('Surn').interpretation(
        Name.last.inflected()
    ).match(gnc)

    NAME = rule(
        FIRST,
        LAST
    ).interpretation(Name)

    parser = Parser(NAME)
    match = parser.match('саше иванову')
    assert match.fact == Name(first='саша', last='иванов')

    match = parser.match('сашу иванову')
    assert match.fact == Name(first='саша', last='иванова')

    match = parser.match('сашу ивановой')
    assert not match
コード例 #8
0
    def __new__(mcs, typename, base_classes, class_attr):
        if class_attr.get('_ROOT_FACT_DEFINITION', False):
            # Checking for attempt to redeclare base definition class
            if mcs.BASE_FACT_DEFINITION_CLS is not None:
                raise TypeError(
                    f"Attempt to redeclare base fact definition class "
                    f"'{mcs.BASE_FACT_DEFINITION_CLS.__name__}' by '{typename}'"
                )

            # Saving root class to metaclass attributes
            mcs.BASE_FACT_DEFINITION_CLS = super().__new__(
                mcs, typename, base_classes, class_attr)
            return mcs.BASE_FACT_DEFINITION_CLS

        # TODO CONSIDER: research ability of mixins support
        # TODO CONSIDER: research ability of transitive inheritance from base class
        if base_classes != (mcs.BASE_FACT_DEFINITION_CLS, ):
            raise TypeError(
                f"Class {typename} must be inherited directly from FactDefinition only."
                f" Mixins and transitive inheritance are not currently supported"
            )

        annotations = class_attr.get("__annotations__")

        if not annotations:
            raise TypeError(
                f"No annotations declared in fact definition class '{typename}'"
            )

        generated_fact_cls = fact(f"{type}AutoGen", list(annotations))

        new_base_classes = (generated_fact_cls, )

        return super().__new__(mcs, typename, new_base_classes, class_attr)
コード例 #9
0
def test_normalized_custom_attribute():
    F = fact('F', ['a'])
    MONTHS = {'январь': 1}
    RULE = rule('январе').interpretation(F.a.normalized().custom(
        MONTHS.get)).interpretation(F)
    parser = Parser(RULE)
    match = parser.match('январе')
    assert match.fact == F(a=1)
コード例 #10
0
def test_inflected_custom_attribute():
    F = fact('F', ['a'])
    MONTHS = {'январь': 1}
    RULE = rule('январе').interpretation(
        F.a.inflected({'nomn', 'sing'}).custom(MONTHS.get)).interpretation(F)
    parser = Parser(RULE)
    match = parser.match('январе')
    assert match.fact == F(a=1)
コード例 #11
0
def test_nested_facts():
    F = fact('F', ['a'])
    G = fact('G', ['b'])
    RULE = rule(
        eq('a').interpretation(F.a)
    ).interpretation(
        F
    ).interpretation(
        G.b
    ).interpretation(
        G
    )
    parser = Parser(RULE)
    match = parser.match('a')
    record = match.fact
    assert record == G(b=F(a='a'))
    assert record.spans == [(0, 1)]
    assert record.as_json == {'b': {'a': 'a'}}
コード例 #12
0
def test_attribute_custom():
    F = fact('F', 'a')
    RULE = rule('1').interpretation(F.a.custom(int)).interpretation(F)
    parser = Parser(RULE)
    match = parser.match('1')
    record = match.fact
    assert record == F(a=1)
    assert record.spans == [(0, 1)]
    assert record.as_json == {'a': 1}
コード例 #13
0
def test_attribute_custom_custom():
    F = fact('F', 'a')
    MAPPING = {'a': 1}
    RULE = rule('A').interpretation(F.a.custom(str.lower).custom(
        MAPPING.get)).interpretation(F)
    parser = Parser(RULE)
    match = parser.match('A')
    record = match.fact
    assert record == F(a=1)
コード例 #14
0
def test_attribute_normalized():
    F = fact('F', 'a')
    RULE = rule('январе').interpretation(F.a.normalized()).interpretation(F)
    parser = Parser(RULE)
    match = parser.match('январе')
    record = match.fact
    assert record == F(a='январь')
    assert record.spans == [(0, 6)]
    assert record.as_json == {'a': 'январь'}
コード例 #15
0
def test_rule_attribute():
    F = fact('F', ['a'])
    RULE = rule('a', 'A').interpretation(F.a).interpretation(F)
    parser = Parser(RULE)
    match = parser.match('a   A')
    record = match.fact
    assert record == F(a='a A')
    assert record.spans == [(0, 5)]
    assert record.as_json == {'a': 'a A'}
コード例 #16
0
def test_attribute():
    F = fact('F', 'a')
    RULE = rule(
        'a'
    ).interpretation(
        F.a
    )
    parser = Parser(RULE)
    match = parser.match('a')
    assert match.fact == 'a'
コード例 #17
0
def test_repeatable():
    F = fact('F', [attribute('a').repeatable()])
    RULE = rule(eq('a').interpretation(F.a),
                eq('b').interpretation(F.a)).interpretation(F)
    parser = Parser(RULE)
    match = parser.match('a b')
    record = match.fact
    assert record == F(a=['a', 'b'])
    assert record.spans == [(0, 1), (2, 3)]
    assert record.as_json == {'a': ['a', 'b']}
コード例 #18
0
def test_attribute_const():
    F = fact('F', 'a')
    RULE = rule(
        'январь'
    ).interpretation(
        F.a.const(1)
    )
    parser = Parser(RULE)
    match = parser.match('январь')
    assert match.fact == 1
コード例 #19
0
def test_insted_attributes():
    F = fact('F', ['a', 'b'])
    RULE = rule(eq('a').interpretation(F.a)).interpretation(
        F.b).interpretation(F)
    parser = Parser(RULE)
    match = parser.match('a')
    record = match.fact
    assert record == F(a=None, b='a')
    assert record.spans == [(0, 1)]
    assert record.as_json == {'b': 'a'}
コード例 #20
0
def test_attribute_const():
    F = fact('F', 'a')
    RULE = rule(
        'январь'
    ).interpretation(
        F.a.const(1)
    )
    parser = Parser(RULE)
    match = parser.match('январь')
    assert match.fact == 1
コード例 #21
0
def test_attribute():
    F = fact('F', 'a')
    RULE = rule(
        'a'
    ).interpretation(
        F.a
    )
    parser = Parser(RULE)
    match = parser.match('a')
    assert match.fact == 'a'
コード例 #22
0
def test_attribute_inflected():
    F = fact('F', 'a')
    RULE = rule('январе').interpretation(F.a.inflected({'nomn', 'plur'
                                                        })).interpretation(F)
    parser = Parser(RULE)
    match = parser.match('январе')
    record = match.fact
    assert record == F(a='январи')
    assert record.spans == [(0, 6)]
    assert record.as_json == {'a': 'январи'}
コード例 #23
0
def test_predicate_attribute():
    F = fact('F', ['a'])
    RULE = rule(
        eq('a').interpretation(F.a)
    ).interpretation(F)
    parser = Parser(RULE)
    match = parser.match('a')
    record = match.fact
    assert record == F(a='a')
    assert record.spans == [(0, 1)]
    assert record.as_json == {'a': 'a'}
コード例 #24
0
def test_merge_facts():
    F = fact('F', ['a', 'b'])
    A = rule(eq('a').interpretation(F.a)).interpretation(F)
    B = rule(eq('b').interpretation(F.b)).interpretation(F)
    RULE = rule(A, B).interpretation(F)
    parser = Parser(RULE)
    match = parser.match('a b')
    record = match.fact
    assert record == F(a='a', b='b')
    assert record.spans == [(0, 1), (2, 3)]
    assert record.as_json == {'a': 'a', 'b': 'b'}
コード例 #25
0
def test_rule_attribute_custom():
    F = fact('F', ['a'])
    RULE = rule(
        '1'
    ).interpretation(
        F.a
    ).interpretation(
        custom(int)
    )
    parser = Parser(RULE)
    match = parser.match('1')
    assert match.fact == 1
コード例 #26
0
def test_rule_attribute_custom():
    F = fact('F', ['a'])
    RULE = rule(
        '1'
    ).interpretation(
        F.a
    ).interpretation(
        custom(int)
    )
    parser = Parser(RULE)
    match = parser.match('1')
    assert match.fact == 1
コード例 #27
0
def test_insted_attributes():
    F = fact('F', ['a', 'b'])
    RULE = rule(
        eq('a').interpretation(F.a)
    ).interpretation(
        F.b
    ).interpretation(F)
    parser = Parser(RULE)
    match = parser.match('a')
    record = match.fact
    assert record == F(a=None, b='a')
    assert record.spans == [(0, 1)]
    assert record.as_json == {'b': 'a'}
コード例 #28
0
def test_attribute_custom_custom():
    F = fact('F', 'a')
    MAPPING = {'a': 1}
    RULE = rule(
        'A'
    ).interpretation(
        F.a.custom(str.lower).custom(MAPPING.get)
    ).interpretation(
        F
    )
    parser = Parser(RULE)
    match = parser.match('A')
    record = match.fact
    assert record == F(a=1)
コード例 #29
0
def test_repeatable():
    F = fact('F', [attribute('a').repeatable()])
    RULE = rule(
        eq('a').interpretation(F.a),
        eq('b').interpretation(F.a)
    ).interpretation(
        F
    )
    parser = Parser(RULE)
    match = parser.match('a b')
    record = match.fact
    assert record == F(a=['a', 'b'])
    assert record.spans == [(0, 1), (2, 3)]
    assert record.as_json == {'a': ['a', 'b']}
コード例 #30
0
def test_attribute_normalized():
    F = fact('F', 'a')
    RULE = rule(
        'январе'
    ).interpretation(
        F.a.normalized()
    ).interpretation(
        F
    )
    parser = Parser(RULE)
    match = parser.match('январе')
    record = match.fact
    assert record == F(a='январь')
    assert record.spans == [(0, 6)]
    assert record.as_json == {'a': 'январь'}
コード例 #31
0
def test_attribute_inflected():
    F = fact('F', 'a')
    RULE = rule(
        'январе'
    ).interpretation(
        F.a.inflected({'nomn', 'plur'})
    ).interpretation(
        F
    )
    parser = Parser(RULE)
    match = parser.match('январе')
    record = match.fact
    assert record == F(a='январи')
    assert record.spans == [(0, 6)]
    assert record.as_json == {'a': 'январи'}
コード例 #32
0
def test_attribute_custom():
    F = fact('F', 'a')
    RULE = rule(
        '1'
    ).interpretation(
        F.a.custom(int)
    ).interpretation(
        F
    )
    parser = Parser(RULE)
    match = parser.match('1')
    record = match.fact
    assert record == F(a=1)
    assert record.spans == [(0, 1)]
    assert record.as_json == {'a': 1}
コード例 #33
0
def test_normalized_custom_attribute():
    F = fact('F', ['a'])
    MONTHS = {
        'январь': 1
    }
    RULE = rule(
        'январе'
    ).interpretation(
        F.a.normalized().custom(MONTHS.get)
    ).interpretation(
        F
    )
    parser = Parser(RULE)
    match = parser.match('январе')
    assert match.fact == F(a=1)
コード例 #34
0
def test_inflected_custom_attribute():
    F = fact('F', ['a'])
    MONTHS = {
        'январь': 1
    }
    RULE = rule(
        'январе'
    ).interpretation(
        F.a.inflected({'nomn', 'sing'}).custom(MONTHS.get)
    ).interpretation(
        F
    )
    parser = Parser(RULE)
    match = parser.match('январе')
    assert match.fact == F(a=1)
コード例 #35
0
def test_merge_facts():
    F = fact('F', ['a', 'b'])
    A = rule(
        eq('a').interpretation(F.a)
    ).interpretation(F)
    B = rule(
        eq('b').interpretation(F.b)
    ).interpretation(F)
    RULE = rule(
        A, B
    ).interpretation(F)
    parser = Parser(RULE)
    match = parser.match('a b')
    record = match.fact
    assert record == F(a='a', b='b')
    assert record.spans == [(0, 1), (2, 3)]
    assert record.as_json == {'a': 'a', 'b': 'b'}
コード例 #36
0
def yargy_smart_home(msg):
    Do = fact('Entity', ['action', 'object', 'place'])

    Actions = dictionary({'Включи', 'Отключи', 'Выключи'})

    Objects = dictionary(
        {'Лампочку', 'Свет', 'Розетку', 'Видеокамеру', 'Камеру'})

    ObjectsList = or_(
        rule(Objects),
        rule(Objects, Objects),
    )

    Prep = dictionary({'в', 'на'})

    Place = dictionary({
        'Гостевой', 'Ванной', 'спальной', 'спальне', 'холле', 'коридоре',
        'кухне'
    })

    Room = {'комната'}

    ActionPhrase = or_(
        rule(Actions.interpretation(Do.action.normalized()),
             Objects.interpretation(Do.object.normalized()), Prep.optional(),
             Place.interpretation(Do.place.normalized()),
             rule(normalized('комната')).optional()),
        rule(Actions.interpretation(Do.action.normalized()),
             Objects.interpretation(Do.object.normalized()), Prep.optional(),
             Place.interpretation(Do.place.normalized())),
        rule(Prep.optional(), Place.interpretation(Do.place.normalized()),
             rule(normalized('комната')).optional(),
             Actions.interpretation(Do.action.normalized()),
             Objects.interpretation(
                 Do.object.normalized()))).interpretation(Do)

    res = []
    parser = Parser(ActionPhrase)
    for match in parser.findall(msg):
        res.append({
            'Действие': match.fact.action,
            'Объект': match.fact.object,
            'Место': match.fact.place,
        })
    return res
コード例 #37
0
def test_bnf():
    from yargy.interpretation import fact
    from yargy.relations import gnc_relation

    F = fact('F', ['a'])
    gnc = gnc_relation()

    assert_bnf(rule('a').named('A').interpretation(F), "F -> 'a'")
    assert_bnf(
        rule('a').interpretation(F.a).interpretation(F), 'F -> F.a',
        "F.a -> 'a'")
    assert_bnf(rule('a').match(gnc).interpretation(F.a), "F.a^gnc -> 'a'")
    assert_bnf(
        rule('a').interpretation(F.a).repeatable(), 'R0 -> F.a | F.a R0',
        "F.a -> 'a'")
    assert_bnf(
        rule('a').repeatable().interpretation(F.a), 'F.a -> R1',
        "R1 -> 'a' | 'a' R1")
コード例 #38
0
def test_bnf():
    from yargy.interpretation import fact
    from yargy.relations import gnc_relation

    F = fact('F', ['a'])
    gnc = gnc_relation()

    assert_bnf(
        rule('a').named('A').interpretation(F),
        "F -> 'a'"
    )
    assert_bnf(
        rule('a').interpretation(F.a).interpretation(F),
        'F -> F.a',
        "F.a -> 'a'"
    )
    assert_bnf(
        rule('a').match(gnc).interpretation(F.a),
        "F.a^gnc -> 'a'"
    )
    assert_bnf(
        rule('a').interpretation(F.a).repeatable(),
        'R0 -> F.a R0 | F.a',
        "F.a -> 'a'"
    )
    assert_bnf(
        rule('a').repeatable().interpretation(F.a),
        'F.a -> R1',
        "R1 -> 'a' R1 | 'a'"
    )

    A = rule('a')
    B = A.named('B')
    C = A.named('C')
    D = rule(B, C).named('D')
    assert_bnf(
        D,
        'D -> B C',
        'B -> R0',
        'C -> R0',
        "R0 -> 'a'"
    )
コード例 #39
0
def yargy_get_genre(msg):
    Genre = fact('Genre', ['genre'])

    GENRES = {
        'ужасы', 'ужастики', 'мелодрама', 'комедия', 'боевик', 'триллер',
        'мультик', 'мультфильм', 'драма'
    }

    GENRES_NAME = dictionary(GENRES)
    GENRES_WORDS = or_(rule(normalized('жанр')), rule(normalized('раздел')))
    GENRE_PHRASE = or_(rule(GENRES_NAME, GENRES_WORDS.optional()),
                       rule(GENRES_WORDS.optional(),
                            GENRES_NAME)).interpretation(
                                Genre.genre.inflected()).interpretation(Genre)

    res = []
    parser = Parser(GENRE_PHRASE)
    for match in parser.findall(msg):
        res.append(match.fact.genre)
    return res
コード例 #40
0
def yargy_get_channel(msg):
    Channel = fact('Channel', ['name'])

    CNANNELS = {
        'Первый', 'Россия', 'ТВЦ', 'НТВ', 'ТНТ', 'СТС', 'Культура', 'Дождь',
        'Спас'
    }
    CNANNELS_NAME = dictionary(CNANNELS)
    CHANNEL_WORDS = or_(rule(normalized('канал')),
                        rule(normalized('программа')))
    CHANNEL_PHRASE = or_(
        rule(CHANNEL_WORDS, CNANNELS_NAME),
        rule(CNANNELS_NAME, CHANNEL_WORDS.optional())).interpretation(
            Channel.name.inflected()).interpretation(Channel)

    res = []
    parser = Parser(CHANNEL_PHRASE)
    for match in parser.findall(msg):
        # print(match.fact)
        for channel in CNANNELS:
            if channel.lower() in match.fact.name:
                res.append(channel)
    return res
コード例 #41
0
from yargy import Parser, rule, and_, not_, or_
from yargy.interpretation import fact
from yargy.predicates import gram
from yargy.relations import gnc_relation
from yargy.pipelines import morph_pipeline

import pymorphy2

morph = pymorphy2.MorphAnalyzer()

LimbState = fact(
    'LimbState',
    ['state'],
)

Limb = fact(
    'limb',
    ['name'],
)

Disease = fact('Person', ['limb', 'limbstate'])

LIMBSTATE = rule(gram('VERB').interpretation(
    LimbState.state)).interpretation(LimbState)

LIMB = rule(gram('NOUN').interpretation(Limb.name)).interpretation(Limb)

DISEASE = or_(
    rule(
        LIMB.interpretation(Disease.limb),
        or_(
コード例 #42
0
from .name import SIMPLE_NAME
from .person import POSITION_NAME

from yargy.rule.transformators import RuleTransformator


class StripInterpretationTransformator(RuleTransformator):
    def visit_InterpretationRule(self, item):
        return self.visit(item.rule)


NAME = SIMPLE_NAME.transform(StripInterpretationTransformator)
PERSON = POSITION_NAME.transform(StripInterpretationTransformator)


Organisation = fact('Organisation', ['name'])


TYPE = morph_pipeline([
    'АО',
    'ОАО',
    'ООО',
    'ЗАО',
    'ПАО',

    # TODO Check abbrs
    # 'ик',
    # 'нк',
    # 'хк',
    # 'ип',
    # 'чп',
コード例 #43
0
ファイル: City.py プロジェクト: yazimut/NLP
from yargy import rule, and_, or_, not_
from yargy.predicates import eq, type as _type, normalized, custom
from yargy.pipelines import morph_pipeline
from yargy.interpretation import fact

CityFact = fact('city', ['prefix', 'title'])

CityTitle = morph_pipeline({
    'липецк', 'сургут', 'нальчик', 'москва', 'санкт-петербург', 'питер',
    'нижний новгород', 'видное'
}).interpretation(CityFact.title.normalized())

CityRule = rule(
    normalized('город').optional().interpretation(CityFact.prefix), CityTitle,
    eq(';').optional()).interpretation(CityFact)
コード例 #44
0
from yargy import (rule, or_)
from yargy.interpretation import (fact)
from yargy.predicates import (dictionary, is_capitalized, eq, caseless)
from yargy.pipelines import caseless_pipeline, morph_pipeline

from natasha.extractors import Extractor

Position = fact('position', ['level', 'field', 'name'])

LEVEL = rule(
    caseless_pipeline([
        'junior', 'middle', 'senior', 'lead', 'chief', 'head', 'team lead',
        "старший", "младший", "руководитель направления"
    ]).interpretation(Position.level))

# TODO: нужно учесть жаргонные варианты (датасаентолог, датасатанист и т.д.) Скорее всего, придется парсить регулярками
NAME = rule(
    or_(
        caseless_pipeline([
            'data scientist', 'data engineer', 'engineer', 'analyst',
            'data analyst', 'data manager', 'scientist', 'researcher',
            "developer", "intern"
        ]), rule(dictionary(['DS', 'DE']), is_capitalized()),
        morph_pipeline(["аналитик", "разработчик",
                        "стажер"])).interpretation(Position.name.inflected()))

FIELD = rule(
    caseless_pipeline([
        'ML', 'DL', 'CV', 'computer vision', 'NLP', 'bi', 'machine learning',
        'deep learning', 'software', 'research', 'big data', 'python', 'c++',
        "scala", "java", 'ios', "android", 'devops', "backend", 'frontend'
コード例 #45
0
# coding: utf-8
from __future__ import unicode_literals

from yargy import (
    rule,
    and_, or_
)
from yargy.interpretation import fact, attribute
from yargy.predicates import (
    eq, gte, lte, length_eq,
    dictionary, normalized,
)


Date = fact(
    'Date',
    ['year', 'month', 'day', attribute('current_era', True)]
)


MONTHS = {
    'январь': 1,
    'февраль': 2,
    'март': 3,
    'апрель': 4,
    'май': 5,
    'июнь': 6,
    'июль': 7,
    'август': 8,
    'сентябрь': 9,
    'октябрь': 10,
    'ноябрь': 11,
コード例 #46
0
from yargy import (
    Parser,
    rule,
    and_, or_
)
from yargy.pipelines import morph_pipeline
from yargy.interpretation import fact, attribute
from yargy.predicates import (
    eq, gte, lte, length_eq,
    dictionary, normalized, gram
)
COD = fact(
      'Codex',
      ['n0', 'point', 'n1', 'subpoint', 'n2', 'part', 'n3', 'article', 'n4', 'par', 'n5',
       'subsection', 'n6', 'section', 'n7', 'chapter', 'n8', 'type', 'codex']
)

COURT_ = fact(
    'Court',
    ['smth', 'type', 'court', 'rf']
)

NUM = and_(gte(1), lte(10000))

NUMBERS = rule(NUM,
               rule(eq('.').optional(), NUM).repeatable().optional())


CODEX = rule(
        or_(rule(normalized('пункт')),
            rule('п', eq('.').optional())
コード例 #47
0
ファイル: address.py プロジェクト: bureaucratic-labs/natasha
    or_, and_
)
from yargy.interpretation import fact, attribute
from yargy.predicates import (
    eq, lte, gte, gram, type, tag,
    length_eq,
    in_, in_caseless, dictionary,
    normalized, caseless,
    is_title
)
from yargy.pipelines import morph_pipeline
from yargy.tokenizer import QUOTES


Address = fact(
    'Address',
    [attribute('parts').repeatable()]
)
Index = fact(
    'Index',
    ['value']
)
Country = fact(
    'Country',
    ['name']
)
Region = fact(
    'Region',
    ['name', 'type']
)
Settlement = fact(
    'Settlement',
コード例 #48
0
ファイル: person.py プロジェクト: bureaucratic-labs/natasha
from yargy import (
    rule,
    or_
)
from yargy.interpretation import fact
from yargy.predicates import gram
from yargy.pipelines import morph_pipeline

from .name import (
    NAME,
    SIMPLE_NAME
)


Person = fact(
    'Person',
    ['position', 'name']
)


POSITION = morph_pipeline([
    'святой',
    'патриарх',
    'митрополит',

    'царь',
    'король',
    'царица',
    'император',
    'императрица',
    'принц',
    'принцесса',
コード例 #49
0
ファイル: fact.py プロジェクト: dmitrypurtov/analyzedata
from ipymarkup import show_markup
from yargy import rule, and_, or_
from yargy.interpretation import fact, attribute
from yargy.predicates import dictionary, normalized

ProfileTypeFact = fact('ProfileTypeFact', ['profile'])

PROFILES = {
    'модель': 'Model',
    'визажист': 'Visagiste',
    'фотограф': 'Photographer',
    'стилист': 'Stylist',
}

NAME = dictionary(PROFILES).interpretation(
    ProfileTypeFact.profile.normalized().custom(PROFILES.__getitem__))

PROFILE_TYPE_PARSER = or_(rule(NAME), ).interpretation(ProfileTypeFact)
コード例 #50
0
ファイル: name.py プロジェクト: bureaucratic-labs/natasha
    eq, length_eq,
    gram, tag,
    is_single, is_capitalized
)
from yargy.predicates.bank import DictionaryPredicate as dictionary
from yargy.relations import gnc_relation

from natasha.data import load_dict

from yargy.rule.transformators import RuleTransformator
from yargy.rule.constructors import Rule
from yargy.predicates.constructors import AndPredicate


Name = fact(
    'Name',
    ['first', 'middle', 'last', 'nick']
)


FIRST_DICT = set(load_dict('first.txt'))
MAYBE_FIRST_DICT = set(load_dict('maybe_first.txt'))
LAST_DICT = set(load_dict('last.txt'))


##########
#
#  COMPONENTS
#
###########

コード例 #51
0
ファイル: money.py プロジェクト: bureaucratic-labs/natasha
    eq, length_eq,
    in_, in_caseless,
    gram, type,
    normalized, caseless, dictionary
)

from natasha.utils import Record

from natasha.dsl import (
    Normalizable,
    money as dsl
)


Money = fact(
    'Money',
    ['integer', 'fraction', 'multiplier', 'currency', 'coins']
)


class Money(Money, Normalizable):
    @property
    def normalized(self):
        amount = self.integer
        if self.fraction:
            amount += self.fraction / 100
        if self.multiplier:
            amount *= self.multiplier
        if self.coins:
            amount += self.coins / 100
        return dsl.Money(amount, self.currency)
コード例 #52
0
ファイル: location.py プロジェクト: bureaucratic-labs/natasha
from yargy import (
    rule,
    and_, or_, not_
)
from yargy.interpretation import fact
from yargy.predicates import (
    caseless, normalized,
    eq, length_eq,
    gram, dictionary,
    is_single, is_title
)
from yargy.relations import gnc_relation


Location = fact(
    'Location',
    ['name'],
)


gnc = gnc_relation()

REGION = rule(
    gram('ADJF').match(gnc),
    dictionary({
        'край',
        'район',
        'область',
        'губерния',
        'уезд',
    }).match(gnc),
).interpretation(Location.name.inflected())