Esempio n. 1
0
def from_opencorpora_int(open_tag):
    """
    Convert OpenCorpora tag to www.ruscorpora.com tag::

        >>> print(from_opencorpora_int('NOUN,inan,masc sing,nomn'))
        S,inan,m=sg,nom

    """

    # Whitespace is replaced with ",|,"
    # then "|" is treated as token and replaced with "=",
    # then commas around "=" are removed in result.
    # This way space is converted to "=".

    grammeme_list = open_tag.replace(' ', ',|,').split(',')

    if _is_initials(grammeme_list):
        return 'INIT=abbr'

    result = rule_engine.apply_rules(FROM_OPENCORPORA, grammeme_list)
    result = ','.join(result).replace(',=,', '=').replace(',=', '')
    if result == '':
        return 'NONLEX'
    return result
def _transform_tag(tag, **kwargs):
    rules = rule_engine.parse(RULES2)
    return ','.join(
        rule_engine.apply_rules(rules, tag.split(','), **kwargs)
    )
Esempio n. 3
0
def _transform_tag(tag, **kwargs):
    rules = rule_engine.parse(RULES2)
    return ','.join(rule_engine.apply_rules(rules, tag.split(','), **kwargs))