def test_type_errors(): F = fact('F', ['a']) RULE = rule( 'a', eq('1').interpretation( custom(int) ) ).interpretation( F.a ) parser = Parser(RULE) match = parser.match('a 1') with pytest.raises(TypeError): match.fact F = fact('F', ['a']) RULE = rule( 'a', eq('1').interpretation( custom(int) ) ).interpretation( custom(str) ) parser = Parser(RULE) match = parser.match('a 1') with pytest.raises(TypeError): match.fact
def test_nested_facts(): F = fact('F', ['a']) G = fact('G', ['b']) RULE = rule(eq('a').interpretation(F.a)).interpretation(F).interpretation( G.b).interpretation(G) parser = Parser(RULE) match = parser.match('a') record = match.fact assert record == G(b=F(a='a')) assert record.spans == [(0, 1)] assert record.as_json == {'b': {'a': 'a'}}
def test_pipeline_key(): from yargy.pipelines import morph_pipeline pipeline = morph_pipeline([ 'закрытое общество', 'завод' ]) F = fact('F', ['a']) RULE = pipeline.interpretation( F.a.normalized() ).interpretation( F ) parser = Parser(RULE) match = parser.match('закрытом обществе') record = match.fact assert record == F(a='закрытое общество') RULE = pipeline.interpretation( normalized() ) parser = Parser(RULE) match = parser.match('заводе') value = match.fact assert value == 'завод'
class ConsoleGame: __game = fact( 'Game', ['name', 'version_number', 'version_name', 'console'] ) __amount_of_games = [] def __init__(self, names: list = [], version_numbers: list = [], version_names: list = [], consoles: list = []): rules = rule(morph_pipeline(names).interpretation(self.__game.name.const(names[0])), morph_pipeline(version_numbers).interpretation(self.__game.version_number).optional(), morph_pipeline(version_names).interpretation(self.__game.version_name).optional(), morph_pipeline(consoles).interpretation(self.__game.console).optional()) game = or_(rules).interpretation(self.__game) self.parser = Parser(game) def matches(self, data): matches = [] for sent in data.text[:9000]: for match in self.parser.findall(sent): matches.append(match.fact) self.__amount_of_games.append(matches) for m in matches: print(m.name, m.version_number, m.version_name, m.console) print(len(self.__amount_of_games))
def test_type_errors(): F = fact('F', ['a']) RULE = rule('a', eq('1').interpretation(custom(int))).interpretation(F.a) parser = Parser(RULE) match = parser.match('a 1') with pytest.raises(TypeError): match.fact F = fact('F', ['a']) RULE = rule('a', eq('1').interpretation(custom(int))).interpretation( custom(str)) parser = Parser(RULE) match = parser.match('a 1') with pytest.raises(TypeError): match.fact
def test_pipeline_key(): from yargy import or_ from yargy.pipelines import morph_pipeline pipeline = morph_pipeline([ 'закрытое общество', 'завод' ]) F = fact('F', ['a']) RULE = pipeline.interpretation( F.a.normalized() ).interpretation( F ) parser = Parser(RULE) match = parser.match('закрытом обществе') record = match.fact assert record == F(a='закрытое общество') RULE = pipeline.interpretation( normalized() ) parser = Parser(RULE) match = parser.match('заводе') value = match.fact assert value == 'завод'
def test_name(): Name = fact( 'Name', ['first', 'last'] ) gnc = gnc_relation() FIRST = gram('Name').interpretation( Name.first.inflected() ).match(gnc) LAST = gram('Surn').interpretation( Name.last.inflected() ).match(gnc) NAME = rule( FIRST, LAST ).interpretation(Name) parser = Parser(NAME) match = parser.match('саше иванову') assert match.fact == Name(first='саша', last='иванов') match = parser.match('сашу иванову') assert match.fact == Name(first='саша', last='иванова') match = parser.match('сашу ивановой') assert not match
def __new__(mcs, typename, base_classes, class_attr): if class_attr.get('_ROOT_FACT_DEFINITION', False): # Checking for attempt to redeclare base definition class if mcs.BASE_FACT_DEFINITION_CLS is not None: raise TypeError( f"Attempt to redeclare base fact definition class " f"'{mcs.BASE_FACT_DEFINITION_CLS.__name__}' by '{typename}'" ) # Saving root class to metaclass attributes mcs.BASE_FACT_DEFINITION_CLS = super().__new__( mcs, typename, base_classes, class_attr) return mcs.BASE_FACT_DEFINITION_CLS # TODO CONSIDER: research ability of mixins support # TODO CONSIDER: research ability of transitive inheritance from base class if base_classes != (mcs.BASE_FACT_DEFINITION_CLS, ): raise TypeError( f"Class {typename} must be inherited directly from FactDefinition only." f" Mixins and transitive inheritance are not currently supported" ) annotations = class_attr.get("__annotations__") if not annotations: raise TypeError( f"No annotations declared in fact definition class '{typename}'" ) generated_fact_cls = fact(f"{type}AutoGen", list(annotations)) new_base_classes = (generated_fact_cls, ) return super().__new__(mcs, typename, new_base_classes, class_attr)
def test_normalized_custom_attribute(): F = fact('F', ['a']) MONTHS = {'январь': 1} RULE = rule('январе').interpretation(F.a.normalized().custom( MONTHS.get)).interpretation(F) parser = Parser(RULE) match = parser.match('январе') assert match.fact == F(a=1)
def test_inflected_custom_attribute(): F = fact('F', ['a']) MONTHS = {'январь': 1} RULE = rule('январе').interpretation( F.a.inflected({'nomn', 'sing'}).custom(MONTHS.get)).interpretation(F) parser = Parser(RULE) match = parser.match('январе') assert match.fact == F(a=1)
def test_nested_facts(): F = fact('F', ['a']) G = fact('G', ['b']) RULE = rule( eq('a').interpretation(F.a) ).interpretation( F ).interpretation( G.b ).interpretation( G ) parser = Parser(RULE) match = parser.match('a') record = match.fact assert record == G(b=F(a='a')) assert record.spans == [(0, 1)] assert record.as_json == {'b': {'a': 'a'}}
def test_attribute_custom(): F = fact('F', 'a') RULE = rule('1').interpretation(F.a.custom(int)).interpretation(F) parser = Parser(RULE) match = parser.match('1') record = match.fact assert record == F(a=1) assert record.spans == [(0, 1)] assert record.as_json == {'a': 1}
def test_attribute_custom_custom(): F = fact('F', 'a') MAPPING = {'a': 1} RULE = rule('A').interpretation(F.a.custom(str.lower).custom( MAPPING.get)).interpretation(F) parser = Parser(RULE) match = parser.match('A') record = match.fact assert record == F(a=1)
def test_attribute_normalized(): F = fact('F', 'a') RULE = rule('январе').interpretation(F.a.normalized()).interpretation(F) parser = Parser(RULE) match = parser.match('январе') record = match.fact assert record == F(a='январь') assert record.spans == [(0, 6)] assert record.as_json == {'a': 'январь'}
def test_rule_attribute(): F = fact('F', ['a']) RULE = rule('a', 'A').interpretation(F.a).interpretation(F) parser = Parser(RULE) match = parser.match('a A') record = match.fact assert record == F(a='a A') assert record.spans == [(0, 5)] assert record.as_json == {'a': 'a A'}
def test_attribute(): F = fact('F', 'a') RULE = rule( 'a' ).interpretation( F.a ) parser = Parser(RULE) match = parser.match('a') assert match.fact == 'a'
def test_repeatable(): F = fact('F', [attribute('a').repeatable()]) RULE = rule(eq('a').interpretation(F.a), eq('b').interpretation(F.a)).interpretation(F) parser = Parser(RULE) match = parser.match('a b') record = match.fact assert record == F(a=['a', 'b']) assert record.spans == [(0, 1), (2, 3)] assert record.as_json == {'a': ['a', 'b']}
def test_attribute_const(): F = fact('F', 'a') RULE = rule( 'январь' ).interpretation( F.a.const(1) ) parser = Parser(RULE) match = parser.match('январь') assert match.fact == 1
def test_insted_attributes(): F = fact('F', ['a', 'b']) RULE = rule(eq('a').interpretation(F.a)).interpretation( F.b).interpretation(F) parser = Parser(RULE) match = parser.match('a') record = match.fact assert record == F(a=None, b='a') assert record.spans == [(0, 1)] assert record.as_json == {'b': 'a'}
def test_attribute_inflected(): F = fact('F', 'a') RULE = rule('январе').interpretation(F.a.inflected({'nomn', 'plur' })).interpretation(F) parser = Parser(RULE) match = parser.match('январе') record = match.fact assert record == F(a='январи') assert record.spans == [(0, 6)] assert record.as_json == {'a': 'январи'}
def test_predicate_attribute(): F = fact('F', ['a']) RULE = rule( eq('a').interpretation(F.a) ).interpretation(F) parser = Parser(RULE) match = parser.match('a') record = match.fact assert record == F(a='a') assert record.spans == [(0, 1)] assert record.as_json == {'a': 'a'}
def test_merge_facts(): F = fact('F', ['a', 'b']) A = rule(eq('a').interpretation(F.a)).interpretation(F) B = rule(eq('b').interpretation(F.b)).interpretation(F) RULE = rule(A, B).interpretation(F) parser = Parser(RULE) match = parser.match('a b') record = match.fact assert record == F(a='a', b='b') assert record.spans == [(0, 1), (2, 3)] assert record.as_json == {'a': 'a', 'b': 'b'}
def test_rule_attribute_custom(): F = fact('F', ['a']) RULE = rule( '1' ).interpretation( F.a ).interpretation( custom(int) ) parser = Parser(RULE) match = parser.match('1') assert match.fact == 1
def test_insted_attributes(): F = fact('F', ['a', 'b']) RULE = rule( eq('a').interpretation(F.a) ).interpretation( F.b ).interpretation(F) parser = Parser(RULE) match = parser.match('a') record = match.fact assert record == F(a=None, b='a') assert record.spans == [(0, 1)] assert record.as_json == {'b': 'a'}
def test_attribute_custom_custom(): F = fact('F', 'a') MAPPING = {'a': 1} RULE = rule( 'A' ).interpretation( F.a.custom(str.lower).custom(MAPPING.get) ).interpretation( F ) parser = Parser(RULE) match = parser.match('A') record = match.fact assert record == F(a=1)
def test_repeatable(): F = fact('F', [attribute('a').repeatable()]) RULE = rule( eq('a').interpretation(F.a), eq('b').interpretation(F.a) ).interpretation( F ) parser = Parser(RULE) match = parser.match('a b') record = match.fact assert record == F(a=['a', 'b']) assert record.spans == [(0, 1), (2, 3)] assert record.as_json == {'a': ['a', 'b']}
def test_attribute_normalized(): F = fact('F', 'a') RULE = rule( 'январе' ).interpretation( F.a.normalized() ).interpretation( F ) parser = Parser(RULE) match = parser.match('январе') record = match.fact assert record == F(a='январь') assert record.spans == [(0, 6)] assert record.as_json == {'a': 'январь'}
def test_attribute_inflected(): F = fact('F', 'a') RULE = rule( 'январе' ).interpretation( F.a.inflected({'nomn', 'plur'}) ).interpretation( F ) parser = Parser(RULE) match = parser.match('январе') record = match.fact assert record == F(a='январи') assert record.spans == [(0, 6)] assert record.as_json == {'a': 'январи'}
def test_attribute_custom(): F = fact('F', 'a') RULE = rule( '1' ).interpretation( F.a.custom(int) ).interpretation( F ) parser = Parser(RULE) match = parser.match('1') record = match.fact assert record == F(a=1) assert record.spans == [(0, 1)] assert record.as_json == {'a': 1}
def test_normalized_custom_attribute(): F = fact('F', ['a']) MONTHS = { 'январь': 1 } RULE = rule( 'январе' ).interpretation( F.a.normalized().custom(MONTHS.get) ).interpretation( F ) parser = Parser(RULE) match = parser.match('январе') assert match.fact == F(a=1)
def test_inflected_custom_attribute(): F = fact('F', ['a']) MONTHS = { 'январь': 1 } RULE = rule( 'январе' ).interpretation( F.a.inflected({'nomn', 'sing'}).custom(MONTHS.get) ).interpretation( F ) parser = Parser(RULE) match = parser.match('январе') assert match.fact == F(a=1)
def test_merge_facts(): F = fact('F', ['a', 'b']) A = rule( eq('a').interpretation(F.a) ).interpretation(F) B = rule( eq('b').interpretation(F.b) ).interpretation(F) RULE = rule( A, B ).interpretation(F) parser = Parser(RULE) match = parser.match('a b') record = match.fact assert record == F(a='a', b='b') assert record.spans == [(0, 1), (2, 3)] assert record.as_json == {'a': 'a', 'b': 'b'}
def yargy_smart_home(msg): Do = fact('Entity', ['action', 'object', 'place']) Actions = dictionary({'Включи', 'Отключи', 'Выключи'}) Objects = dictionary( {'Лампочку', 'Свет', 'Розетку', 'Видеокамеру', 'Камеру'}) ObjectsList = or_( rule(Objects), rule(Objects, Objects), ) Prep = dictionary({'в', 'на'}) Place = dictionary({ 'Гостевой', 'Ванной', 'спальной', 'спальне', 'холле', 'коридоре', 'кухне' }) Room = {'комната'} ActionPhrase = or_( rule(Actions.interpretation(Do.action.normalized()), Objects.interpretation(Do.object.normalized()), Prep.optional(), Place.interpretation(Do.place.normalized()), rule(normalized('комната')).optional()), rule(Actions.interpretation(Do.action.normalized()), Objects.interpretation(Do.object.normalized()), Prep.optional(), Place.interpretation(Do.place.normalized())), rule(Prep.optional(), Place.interpretation(Do.place.normalized()), rule(normalized('комната')).optional(), Actions.interpretation(Do.action.normalized()), Objects.interpretation( Do.object.normalized()))).interpretation(Do) res = [] parser = Parser(ActionPhrase) for match in parser.findall(msg): res.append({ 'Действие': match.fact.action, 'Объект': match.fact.object, 'Место': match.fact.place, }) return res
def test_bnf(): from yargy.interpretation import fact from yargy.relations import gnc_relation F = fact('F', ['a']) gnc = gnc_relation() assert_bnf(rule('a').named('A').interpretation(F), "F -> 'a'") assert_bnf( rule('a').interpretation(F.a).interpretation(F), 'F -> F.a', "F.a -> 'a'") assert_bnf(rule('a').match(gnc).interpretation(F.a), "F.a^gnc -> 'a'") assert_bnf( rule('a').interpretation(F.a).repeatable(), 'R0 -> F.a | F.a R0', "F.a -> 'a'") assert_bnf( rule('a').repeatable().interpretation(F.a), 'F.a -> R1', "R1 -> 'a' | 'a' R1")
def test_bnf(): from yargy.interpretation import fact from yargy.relations import gnc_relation F = fact('F', ['a']) gnc = gnc_relation() assert_bnf( rule('a').named('A').interpretation(F), "F -> 'a'" ) assert_bnf( rule('a').interpretation(F.a).interpretation(F), 'F -> F.a', "F.a -> 'a'" ) assert_bnf( rule('a').match(gnc).interpretation(F.a), "F.a^gnc -> 'a'" ) assert_bnf( rule('a').interpretation(F.a).repeatable(), 'R0 -> F.a R0 | F.a', "F.a -> 'a'" ) assert_bnf( rule('a').repeatable().interpretation(F.a), 'F.a -> R1', "R1 -> 'a' R1 | 'a'" ) A = rule('a') B = A.named('B') C = A.named('C') D = rule(B, C).named('D') assert_bnf( D, 'D -> B C', 'B -> R0', 'C -> R0', "R0 -> 'a'" )
def yargy_get_genre(msg): Genre = fact('Genre', ['genre']) GENRES = { 'ужасы', 'ужастики', 'мелодрама', 'комедия', 'боевик', 'триллер', 'мультик', 'мультфильм', 'драма' } GENRES_NAME = dictionary(GENRES) GENRES_WORDS = or_(rule(normalized('жанр')), rule(normalized('раздел'))) GENRE_PHRASE = or_(rule(GENRES_NAME, GENRES_WORDS.optional()), rule(GENRES_WORDS.optional(), GENRES_NAME)).interpretation( Genre.genre.inflected()).interpretation(Genre) res = [] parser = Parser(GENRE_PHRASE) for match in parser.findall(msg): res.append(match.fact.genre) return res
def yargy_get_channel(msg): Channel = fact('Channel', ['name']) CNANNELS = { 'Первый', 'Россия', 'ТВЦ', 'НТВ', 'ТНТ', 'СТС', 'Культура', 'Дождь', 'Спас' } CNANNELS_NAME = dictionary(CNANNELS) CHANNEL_WORDS = or_(rule(normalized('канал')), rule(normalized('программа'))) CHANNEL_PHRASE = or_( rule(CHANNEL_WORDS, CNANNELS_NAME), rule(CNANNELS_NAME, CHANNEL_WORDS.optional())).interpretation( Channel.name.inflected()).interpretation(Channel) res = [] parser = Parser(CHANNEL_PHRASE) for match in parser.findall(msg): # print(match.fact) for channel in CNANNELS: if channel.lower() in match.fact.name: res.append(channel) return res
from yargy import Parser, rule, and_, not_, or_ from yargy.interpretation import fact from yargy.predicates import gram from yargy.relations import gnc_relation from yargy.pipelines import morph_pipeline import pymorphy2 morph = pymorphy2.MorphAnalyzer() LimbState = fact( 'LimbState', ['state'], ) Limb = fact( 'limb', ['name'], ) Disease = fact('Person', ['limb', 'limbstate']) LIMBSTATE = rule(gram('VERB').interpretation( LimbState.state)).interpretation(LimbState) LIMB = rule(gram('NOUN').interpretation(Limb.name)).interpretation(Limb) DISEASE = or_( rule( LIMB.interpretation(Disease.limb), or_(
from .name import SIMPLE_NAME from .person import POSITION_NAME from yargy.rule.transformators import RuleTransformator class StripInterpretationTransformator(RuleTransformator): def visit_InterpretationRule(self, item): return self.visit(item.rule) NAME = SIMPLE_NAME.transform(StripInterpretationTransformator) PERSON = POSITION_NAME.transform(StripInterpretationTransformator) Organisation = fact('Organisation', ['name']) TYPE = morph_pipeline([ 'АО', 'ОАО', 'ООО', 'ЗАО', 'ПАО', # TODO Check abbrs # 'ик', # 'нк', # 'хк', # 'ип', # 'чп',
from yargy import rule, and_, or_, not_ from yargy.predicates import eq, type as _type, normalized, custom from yargy.pipelines import morph_pipeline from yargy.interpretation import fact CityFact = fact('city', ['prefix', 'title']) CityTitle = morph_pipeline({ 'липецк', 'сургут', 'нальчик', 'москва', 'санкт-петербург', 'питер', 'нижний новгород', 'видное' }).interpretation(CityFact.title.normalized()) CityRule = rule( normalized('город').optional().interpretation(CityFact.prefix), CityTitle, eq(';').optional()).interpretation(CityFact)
from yargy import (rule, or_) from yargy.interpretation import (fact) from yargy.predicates import (dictionary, is_capitalized, eq, caseless) from yargy.pipelines import caseless_pipeline, morph_pipeline from natasha.extractors import Extractor Position = fact('position', ['level', 'field', 'name']) LEVEL = rule( caseless_pipeline([ 'junior', 'middle', 'senior', 'lead', 'chief', 'head', 'team lead', "старший", "младший", "руководитель направления" ]).interpretation(Position.level)) # TODO: нужно учесть жаргонные варианты (датасаентолог, датасатанист и т.д.) Скорее всего, придется парсить регулярками NAME = rule( or_( caseless_pipeline([ 'data scientist', 'data engineer', 'engineer', 'analyst', 'data analyst', 'data manager', 'scientist', 'researcher', "developer", "intern" ]), rule(dictionary(['DS', 'DE']), is_capitalized()), morph_pipeline(["аналитик", "разработчик", "стажер"])).interpretation(Position.name.inflected())) FIELD = rule( caseless_pipeline([ 'ML', 'DL', 'CV', 'computer vision', 'NLP', 'bi', 'machine learning', 'deep learning', 'software', 'research', 'big data', 'python', 'c++', "scala", "java", 'ios', "android", 'devops', "backend", 'frontend'
# coding: utf-8 from __future__ import unicode_literals from yargy import ( rule, and_, or_ ) from yargy.interpretation import fact, attribute from yargy.predicates import ( eq, gte, lte, length_eq, dictionary, normalized, ) Date = fact( 'Date', ['year', 'month', 'day', attribute('current_era', True)] ) MONTHS = { 'январь': 1, 'февраль': 2, 'март': 3, 'апрель': 4, 'май': 5, 'июнь': 6, 'июль': 7, 'август': 8, 'сентябрь': 9, 'октябрь': 10, 'ноябрь': 11,
from yargy import ( Parser, rule, and_, or_ ) from yargy.pipelines import morph_pipeline from yargy.interpretation import fact, attribute from yargy.predicates import ( eq, gte, lte, length_eq, dictionary, normalized, gram ) COD = fact( 'Codex', ['n0', 'point', 'n1', 'subpoint', 'n2', 'part', 'n3', 'article', 'n4', 'par', 'n5', 'subsection', 'n6', 'section', 'n7', 'chapter', 'n8', 'type', 'codex'] ) COURT_ = fact( 'Court', ['smth', 'type', 'court', 'rf'] ) NUM = and_(gte(1), lte(10000)) NUMBERS = rule(NUM, rule(eq('.').optional(), NUM).repeatable().optional()) CODEX = rule( or_(rule(normalized('пункт')), rule('п', eq('.').optional())
or_, and_ ) from yargy.interpretation import fact, attribute from yargy.predicates import ( eq, lte, gte, gram, type, tag, length_eq, in_, in_caseless, dictionary, normalized, caseless, is_title ) from yargy.pipelines import morph_pipeline from yargy.tokenizer import QUOTES Address = fact( 'Address', [attribute('parts').repeatable()] ) Index = fact( 'Index', ['value'] ) Country = fact( 'Country', ['name'] ) Region = fact( 'Region', ['name', 'type'] ) Settlement = fact( 'Settlement',
from yargy import ( rule, or_ ) from yargy.interpretation import fact from yargy.predicates import gram from yargy.pipelines import morph_pipeline from .name import ( NAME, SIMPLE_NAME ) Person = fact( 'Person', ['position', 'name'] ) POSITION = morph_pipeline([ 'святой', 'патриарх', 'митрополит', 'царь', 'король', 'царица', 'император', 'императрица', 'принц', 'принцесса',
from ipymarkup import show_markup from yargy import rule, and_, or_ from yargy.interpretation import fact, attribute from yargy.predicates import dictionary, normalized ProfileTypeFact = fact('ProfileTypeFact', ['profile']) PROFILES = { 'модель': 'Model', 'визажист': 'Visagiste', 'фотограф': 'Photographer', 'стилист': 'Stylist', } NAME = dictionary(PROFILES).interpretation( ProfileTypeFact.profile.normalized().custom(PROFILES.__getitem__)) PROFILE_TYPE_PARSER = or_(rule(NAME), ).interpretation(ProfileTypeFact)
eq, length_eq, gram, tag, is_single, is_capitalized ) from yargy.predicates.bank import DictionaryPredicate as dictionary from yargy.relations import gnc_relation from natasha.data import load_dict from yargy.rule.transformators import RuleTransformator from yargy.rule.constructors import Rule from yargy.predicates.constructors import AndPredicate Name = fact( 'Name', ['first', 'middle', 'last', 'nick'] ) FIRST_DICT = set(load_dict('first.txt')) MAYBE_FIRST_DICT = set(load_dict('maybe_first.txt')) LAST_DICT = set(load_dict('last.txt')) ########## # # COMPONENTS # ###########
eq, length_eq, in_, in_caseless, gram, type, normalized, caseless, dictionary ) from natasha.utils import Record from natasha.dsl import ( Normalizable, money as dsl ) Money = fact( 'Money', ['integer', 'fraction', 'multiplier', 'currency', 'coins'] ) class Money(Money, Normalizable): @property def normalized(self): amount = self.integer if self.fraction: amount += self.fraction / 100 if self.multiplier: amount *= self.multiplier if self.coins: amount += self.coins / 100 return dsl.Money(amount, self.currency)
from yargy import ( rule, and_, or_, not_ ) from yargy.interpretation import fact from yargy.predicates import ( caseless, normalized, eq, length_eq, gram, dictionary, is_single, is_title ) from yargy.relations import gnc_relation Location = fact( 'Location', ['name'], ) gnc = gnc_relation() REGION = rule( gram('ADJF').match(gnc), dictionary({ 'край', 'район', 'область', 'губерния', 'уезд', }).match(gnc), ).interpretation(Location.name.inflected())