Example #1
0
def test_real():
    assert_tags_equal(
        tagger=NumericalTagger(),
        expected=[
            (0, _REAL),
            (1, _REAL),
        ],
        words=['123.1231', '1231,34555'],
    )
Example #2
0
def test_integer():
    assert_tags_equal(
        tagger=NumericalTagger(),
        expected=[
            (0, _INTEGER),
            (1, _INTEGER),
        ],
        words=['123', '51515'],
    )
Example #3
0
def test_numerical_range():
    assert_tags_equal(
        tagger=NumericalTagger(),
        expected=[
            (0, _INTEGER),
            (1, _INTEGER),
            (2, _INTEGER),
        ],
        words=['16-18', '1942-1944', '2/3'],
    )
Example #4
0
def test_indices():
    assert_tags_equal(
        tagger=NumericalTagger(),
        expected=[
            (0, _REAL),
            (2, _INTEGER),
        ],
        words=['1.1', '123', '567'],
        indices=[0, 2],
    )
Example #5
0
def get_analyzer(
    tagger: str = 'linear',
    lemmatizer: str = 'pymorphy',
    cache_size: int = 15000,
):
    return Analyzer(
        taggers=[
            PunctuationTagger(),
            NumericalTagger(),
            get_tagger(name=tagger, cache_size=cache_size),
        ],
        lemmatizer=get_lemmatizer(name=lemmatizer, cache_size=cache_size),
    )
Example #6
0
def test_non_numerical():
    assert_tags_equal(
        tagger=NumericalTagger(),
        expected=[],
        words=['', '  ', '!!!!', 'XV', 'unknown', '<<123>>', '23years'],
    )
Example #7
0
def create_tagger():
    return NumericalTagger()