def test_parsing():
    assert rule_engine.parse(RULES1) == [
        (['NOUN'], ['_S']),
        (['COMP', 'Cmp2'], ['_A', '_comp2']),
        (['|'], ['_='])
    ]
def _transform_tag(tag, **kwargs):
    rules = rule_engine.parse(RULES2)
    return ','.join(
        rule_engine.apply_rules(rules, tag.split(','), **kwargs)
    )
Example #3
0
def test_parsing():
    assert rule_engine.parse(RULES1) == [(['NOUN'], ['_S']),
                                         (['COMP', 'Cmp2'], ['_A', '_comp2']),
                                         (['|'], ['_='])]
Example #4
0
FROM_OPENCORPORA = rule_engine.parse("""
# part of speech
NOUN => S

ADJF,Apro,Subx,Anph => A-PRO
ADJF,Apro,Subx => S-PRO
ADJF,Apro => A-PRO
ADJF,Anum => ANUM
ADJF => A,plen
ADJS => A,brev

COMP,Cmp2 => A,comp2
COMP,Supr => A,supr
COMP => A,comp

# INFN => V,inf,med
# INFN => V,inf,act
INFN => V,inf

PRTF,inan => V,partcp,plen
PRTF,anim => V,partcp,plen
PRTF => V,partcp,plen

PRTS,intr => V,intr,partcp,plen
PRTS => V,tran,partcp,brev

GRND => V,ger

VERB,impr,incl,sing => V,1p,imper,pl
VERB,impr,incl,plur => V,1p,imper2,pl
VERB,impr,excl => V,2p,imper
VERB,impr,incl => V,1p,imper

VERB => V

NUMR,inan => NUM
NUMR => NUM

PRED,pres => PRAEDIC
PRED => PRAEDIC

ADVB,Ques => ADV-PRO
ADVB => ADV

NPRO => S-PRO
PREP => PR
PRCL => PART
INTJ => INTJ

CONJ,Prnt => PARENTH
CONJ => CONJ

# animacy
anim => anim
inan => inan

# gender
femn,Ms-f => m-f
masc,Ms-f => m-f
neut,Ms-f => m-f
Ms-f => m-f

masc => m
femn => f
neut => n

# number
sing => sg
plur => pl
Fixd => 0

# case
nomn => nom
gent => gen
datv => dat
accs => acc
ablt => ins
loct => loc
voct => voc
gen1 => gen
gen2 => gen2
acc2 => acc2
loc1 => loc
loc2 => loc2

# Abbr doesn't mean the same, but convert it nevertheless
Abbr => abbr

Name => persn
Surn => famn
Patr => patrn
Dist => distort
Arch => anom

# ??
Litr => anom
# Infr => anom
# V-oy => anom
V-be => distort

Supr => supr

# --
# ??
Coun => adnum

# aspect
perf => pf
impf => ipf

# transitivity
tran => tran
intr => intr

# voice
actv => act
pssv => pass

# person
1per => 1p
2per => 2p
3per => 3p

# tense
pres => praes
past => praet
futr => fut

# mood
indc => indic
impr => imper

# extra grammemes
LATN => NONLEX
ROMN => ANUM,ciph

# non-standard & pymorphy2-specific
# (RNC doesn't tag punctuation marks)
PNCT => PNCT
NUMB => NUM,ciph

# hack to preserve whitespace info:
| => =
""")
Example #5
0
def _transform_tag(tag, **kwargs):
    rules = rule_engine.parse(RULES2)
    return ','.join(rule_engine.apply_rules(rules, tag.split(','), **kwargs))