def from_opencorpora_int(open_tag): """ Convert OpenCorpora tag to www.ruscorpora.com tag:: >>> print(from_opencorpora_int('NOUN,inan,masc sing,nomn')) S,inan,m=sg,nom """ # Whitespace is replaced with ",|," # then "|" is treated as token and replaced with "=", # then commas around "=" are removed in result. # This way space is converted to "=". grammeme_list = open_tag.replace(' ', ',|,').split(',') if _is_initials(grammeme_list): return 'INIT=abbr' result = rule_engine.apply_rules(FROM_OPENCORPORA, grammeme_list) result = ','.join(result).replace(',=,', '=').replace(',=', '') if result == '': return 'NONLEX' return result
def _transform_tag(tag, **kwargs): rules = rule_engine.parse(RULES2) return ','.join( rule_engine.apply_rules(rules, tag.split(','), **kwargs) )
def _transform_tag(tag, **kwargs): rules = rule_engine.parse(RULES2) return ','.join(rule_engine.apply_rules(rules, tag.split(','), **kwargs))