Ejemplo n.º 1
0
def req_deverbal(require_deverbal_noun: str = "?"):
    if require_deverbal_noun == "1":  ## strictly deverbal noun
        return y.and_(yp.gram("NOUN"), yp.in_caseless(deverbal_nouns))
    elif require_deverbal_noun == "0":  ## strictly regular verb
        return y.or_(yp.gram("VERB"), yp.gram("INFN"))
    elif require_deverbal_noun == "?":  ## anything
        return y.or_(
            y.and_(yp.gram("NOUN"), yp.in_caseless(deverbal_nouns)),
            yp.gram("VERB"),
            yp.gram("INFN"),
        )
    else:
        raise ValueError("Incorrect deverbal status")
Ejemplo n.º 2
0
)


DASH = eq('-')
DOT = eq('.')

ADJF = gram('ADJF')
NOUN = gram('NOUN')
INT = type('INT')
TITLE = is_title()

ANUM = rule(
    INT,
    DASH.optional(),
    in_caseless({
        'я', 'й', 'е',
        'ое', 'ая', 'ий', 'ой'
    })
)


#########
#
#  STRANA
#
##########


# TODO
COUNTRY_VALUE = dictionary({
    'россия',
    'украина'
Ejemplo n.º 3
0
    'сутки',
    'смена',
    'm',
    'month',
    'м'
    'месяц',
    'мес',
    'y',
    'year',
    'г',
    'год',
}

PERIOD = dictionary(PERIODS)

PER = or_(eq('/'), in_caseless({'в', 'за', 'per'}))

RATE = rule(PER, PERIOD.interpretation(Money.period))

MONEY = rule(
    or_(
        in_({
            '•', ':', '`', '~', '*', '-', '–', '—', ';', '.', '(', 'от', 'from'
        }),
        type('RU'),
        type('LATIN'),
    ).optional(),
    CURRENCY.interpretation(Money.currency).optional(),
    eq('+').optional(),
    eq('*').optional(),
    INTEGER.interpretation(Money.integer_min.custom(normalize_integer)),
MULTIPLIER = or_(MILLIARD, MILLION, THOUSAND).interpretation(Money.multiplier)

########
#
#  NUMERAL
#
#######

NUMR = or_(
    gram('NUMR'),
    # https://github.com/OpenCorpora/opencorpora/issues/818
    dictionary({'ноль', 'один'}),
)
# TODO: можно выпилить дробные части для снижения числа ложных срабатываний, их все равно не бывает в реальных вилках
#  Хотя одна вакаха в Tampere University of Technology реально была с дробями
MODIFIER = in_caseless({'целых', 'сотых', 'десятых'})

PART = or_(rule(or_(INT, NUMR, MODIFIER)), MILLIARD, MILLION, THOUSAND,
           CURRENCY, COINS_CURRENCY)
# TODO: вот здесь можно поправить, чтобы телефоны не парсились
BOUND = in_('()//')

NUMERAL = rule(BOUND, PART.repeatable(), BOUND)

#######
#
#   AMOUNT
#
########

Ejemplo n.º 5
0
RIGHT_QUOTE = in_("»" + __quotes)

STATION = rule(
    STATION_WORD.optional(),
    METRO_WORD.optional(),
    LEFT_QUOTE.optional(),
    STATION_TITLE.interpretation(
        meaning.custom(lambda p: p.value)).interpretation(Station.name),
    rule(
        eq('-').optional(),
        LIST_OF_NUMERALS.interpretation(Station.num),
    ).optional(),
    RIGHT_QUOTE.optional(),
).interpretation(Station)

LIST_OF_STATIONS = rule(
    STATION.means(Array.element),
    rule(
        in_caseless('и,-'),
        STATION.means(Array.element),
    ).repeatable().optional(),
).interpretation(Array).interpretation(meaning.custom(lambda p: p.element))

FROM_STATION_TO_STATION = rule(
    or_(caseless('с'), caseless('со')),
    STATION.means(Array.element),
    caseless('на'),
    STATION.means(Array.element
                  ),  # todo LIST_OF_STATIONS: со спасской на садовую и сенную
).interpretation(Array).interpretation(meaning.custom(lambda p: p.element))
Ejemplo n.º 6
0
class Building(Building):
    value = value('buildingName')


class AddrPart(AddrPart):
    @property
    def obj(self):
        from natasha import obj

        part = self.value
        return obj.AddrPart(part.value, part.type)


INT = type('INT')

LETTER = in_caseless(set('абвгдежзиклмнопрстуфхшщэюя'))

TYPE_CITY = dictionary({'город'}).interpretation(City.typeCity)

STRUCTURE_TYPE = dictionary({'строение',
                             'ст'}).interpretation(Structure.structureType)

TYPE_APPART = dictionary({'квартира'}).interpretation(Appart.typeAppart)

BUILDING_TYPE = dictionary({'дом', 'шоссе', 'проспект',
                            'улица'}).interpretation(Building.buildingType)

VALUE = rule(INT, LETTER.optional())

SEP = in_(r'/\-')
Ejemplo n.º 7
0
from yargy.tokenizer import QUOTES

INT = type('INT')
DOT = eq('.')
ADJF = gram('ADJF')
NOUN = gram('NOUN')
TITLE = is_title()
DASH = eq('-')
SLASH = eq('/')
QUOTE = in_(QUOTES)

ANUM = rule(
    INT,
    DASH.optional(),
    in_caseless({
        'я', 'й', 'е',
        'ое', 'ая', 'ий', 'ой'
    })
)

def value(key):
    @property
    def field(self):
        return getattr(self, key)
    return field

OnlyNameStreet = fact(
    'OnlyNameStreet',
    ['name']
)

class OnlyNameStreet(OnlyNameStreet):
Ejemplo n.º 8
0
MULTIPLIER = or_(MILLIARD, MILLION, THOUSAND).interpretation(Money.multiplier)

########
#
#  NUMERAL
#
#######

NUMR = or_(
    gram('NUMR'),
    # https://github.com/OpenCorpora/opencorpora/issues/818
    dictionary({'ноль', 'один'}),
)

MODIFIER = in_caseless({'целых', 'сотых', 'десятых'})

PART = or_(rule(or_(INT, NUMR, MODIFIER)), MILLIARD, MILLION, THOUSAND,
           CURRENCY, COINS_CURRENCY)

BOUND = in_('()//')

NUMERAL = rule(BOUND, PART.repeatable(), BOUND)

#######
#
#   AMOUNT
#
########

Ejemplo n.º 9
0
        return obj.AddrPart(part.value, part.type)


DASH = eq('-')
DOT = eq('.')

ADJF = gram('ADJF')
NOUN = gram('NOUN')
INT = type('INT')
TITLE = is_title()

ANUM = rule(
    INT,
    DASH.optional(),
    in_caseless({
        'я', 'й', 'е',
        'ое', 'ая', 'ий', 'ой'
    })
)


#########
#
#  STRANA
#
##########


# TODO
COUNTRY_VALUE = dictionary({
    'россия',
    'украина'
Ejemplo n.º 10
0
from yargy import or_, rule
from yargy.interpretation import attribute, fact
import yargy.interpretation as meaning
from yargy.predicates import caseless, gram, in_caseless, normalized

from .station import FROM_STATION_TO_STATION, LIST_OF_STATIONS, STATION

Transfer = fact('Transfer', [attribute('to', default=[])])

TRANSFER = rule(
    gram('ADJF').optional(),  # пешеходный
    normalized('переход'),
    or_(
        FROM_STATION_TO_STATION.interpretation(Transfer.to),
        rule(
            or_(caseless('на'), caseless('между'), caseless('с')).optional(),
            LIST_OF_STATIONS.interpretation(Transfer.to)),
    ).optional(),
).interpretation(Transfer)

StationAndTransfer = fact('StationAndTransfer', ['station', 'transfer'])

STATION_AND_TRANSFER = rule(
    STATION.interpretation(StationAndTransfer.station),
    rule(
        in_caseless('и,'),
        TRANSFER.interpretation(meaning.custom(lambda p: p.to)).interpretation(
            StationAndTransfer.transfer),
    ).optional()).interpretation(StationAndTransfer)
Ejemplo n.º 11
0
#
#######


NUMR = or_(
    gram('NUMR'),
    # https://github.com/OpenCorpora/opencorpora/issues/818
    dictionary({
        'ноль',
        'один'
    }),
)

MODIFIER = in_caseless({
    'целых',
    'сотых',
    'десятых'
})

PART = or_(
    rule(
        or_(
            INT,
            NUMR,
            MODIFIER
        )
    ),
    MILLIARD,
    MILLION,
    THOUSAND,
    CURRENCY,
Ejemplo n.º 12
0

Nums = fact('Nums', [attribute('values').repeatable()])

__literals = {
    'один':   1,
    'два':    2,
    'три':    3,
    'четыре': 4,
    'пять':   5,
    'шесть':  6,
    'семь':   7,
    'восемь': 8,
    'девять': 9,
}

LITERAL = dictionary(__literals).means(
    interp.normalized().custom(__literals.get))

CONJ_NUMS = in_caseless('-и,')

NUMERAL = or_(*[eq(str(i)) for i in __literals.values()]).means(interp.custom(int))

# вестибюль 1 и 2
LIST_OF_NUMERALS = connect(NUMERAL.means(Nums.values), CONJ_NUMS) \
    .means(Nums).means(meaning.custom(lambda p: list(sorted(set(p.values)))))

# первый и второй вестибюли
LIST_OF_LITERALS = connect(LITERAL.means(Nums.values), CONJ_NUMS) \
    .means(Nums).means(meaning.custom(lambda p: list(sorted(set(p.values)))))
Ejemplo n.º 13
0
PREP = gram('PREP')
CONJ = gram('CONJ')

# А творение ниже сделано для критикалов и береговой
SIMPLE_WILDCARD = rule(
  NOUN.repeatable().optional(),
  ADJF.optional(),
  INT.optional()
)

TITLE = is_title()
ANUM = rule(
    INT,
    DASH.optional(),
    in_caseless({
        'я', 'й', 'е',
        'ое', 'ая', 'ий', 'ой'
    })
)

# # # # # # # # # Адрес # # # # # # # # # # #

Address = fact(
    'Address',
    ['City', 'Street',
     'House', 'Building',
     'Appartment']
)

# # # # # # # # # Города # # # # # # # # # # #

# Сначала беру готовые списки названий городов + добавляю пару своих