Beispiel #1
0
def _get_variant_config(rules, normalization_rules):
    """ Convert the variant definition from the configuration into
        replacement sets.
    """
    immediate = defaultdict(list)
    chars = set()

    if rules:
        vset = set()
        rules = flatten_config_list(rules, 'variants')

        vmaker = _VariantMaker(normalization_rules)

        for section in rules:
            for rule in (section.get('words') or []):
                vset.update(vmaker.compute(rule))

        # Intermediate reorder by source. Also compute required character set.
        for variant in vset:
            if variant.source[-1] == ' ' and variant.replacement[-1] == ' ':
                replstr = variant.replacement[:-1]
            else:
                replstr = variant.replacement
            immediate[variant.source].append(replstr)
            chars.update(variant.source)

    return list(immediate.items()), ''.join(chars)
Beispiel #2
0
def test_flatten_config_list_nested():
    content = [
        34, [{
            'first': '1st',
            'second': '2nd'
        }, {}], [[2, 3], [45, [56, 78], 66]], 'end'
    ]
    assert flatten_config_list(content) == \
               [34, {'first': '1st', 'second': '2nd'}, {},
                2, 3, 45, 56, 78, 66, 'end']
Beispiel #3
0
    def _cfg_to_icu_rules(rules, section):
        """ Load an ICU ruleset from the given section. If the section is a
            simple string, it is interpreted as a file name and the rules are
            loaded verbatim from the given file. The filename is expected to be
            relative to the tokenizer rule file. If the section is a list then
            each line is assumed to be a rule. All rules are concatenated and returned.
        """
        content = _get_section(rules, section)

        if content is None:
            return ''

        return ';'.join(flatten_config_list(content, section)) + ';'
Beispiel #4
0
def test_flatten_config_list_allready_flat():
    assert flatten_config_list([1, 2, 456]) == [1, 2, 456]
Beispiel #5
0
def test_flatten_config_list_no_list(content):
    with pytest.raises(UsageError):
        flatten_config_list(content)
Beispiel #6
0
def test_flatten_config_list_empty(content):
    assert flatten_config_list(content) == []