import pathlib import collections import argparse import itertools import gramtool from databot.commands import CommandsManager, Command from botlib.compositions import compositions, strjoin pattern_re = re.compile(r'({[^}]+})', re.UNICODE) norm_re = re.compile(r'\W+', re.UNICODE) FLAGS = { 'title': str.title, 'lemma': lambda v: (gramtool.get_lemma(v) or v), 'genitive': lambda v: (gramtool.change_form(v, case='genitive') or v), } def norm(value): return norm_re.sub(' ', value).strip().lower() class IndexFinder(object): def __init__(self, index, debug_info=None): self.index = index self.index = self.create_index(index) self.aliases, self.patterns = self.create_aliases(index, debug_info)
def test_lemma(): gramtool.get_lemma('Vilnius') == 'Vilnius' gramtool.get_lemma('Vilnių') == 'Vilnius' gramtool.get_lemma('Vilniaus') == 'Vilnius' gramtool.get_lemma('Vilniui') == 'Vilnius' gramtool.get_lemma('Vilniuje') == 'Vilnius' gramtool.get_lemma('Vilniau') == 'Vilnius'
def assertLemma(self, word, lemma): self.assertEqual(gramtool.get_lemma(word), lemma)
def test_phrase(): gramtool.get_lemma('Šiaulių banko') == 'Šiaulių bankas'
def test_suo(): gramtool.get_lemma('šunį') == 'šuo'