def test_real(): assert_tags_equal( tagger=NumericalTagger(), expected=[ (0, _REAL), (1, _REAL), ], words=['123.1231', '1231,34555'], )
def test_integer(): assert_tags_equal( tagger=NumericalTagger(), expected=[ (0, _INTEGER), (1, _INTEGER), ], words=['123', '51515'], )
def test_numerical_range(): assert_tags_equal( tagger=NumericalTagger(), expected=[ (0, _INTEGER), (1, _INTEGER), (2, _INTEGER), ], words=['16-18', '1942-1944', '2/3'], )
def test_indices(): assert_tags_equal( tagger=NumericalTagger(), expected=[ (0, _REAL), (2, _INTEGER), ], words=['1.1', '123', '567'], indices=[0, 2], )
def get_analyzer( tagger: str = 'linear', lemmatizer: str = 'pymorphy', cache_size: int = 15000, ): return Analyzer( taggers=[ PunctuationTagger(), NumericalTagger(), get_tagger(name=tagger, cache_size=cache_size), ], lemmatizer=get_lemmatizer(name=lemmatizer, cache_size=cache_size), )
def test_non_numerical(): assert_tags_equal( tagger=NumericalTagger(), expected=[], words=['', ' ', '!!!!', 'XV', 'unknown', '<<123>>', '23years'], )
def create_tagger(): return NumericalTagger()