Example #1
0
def test__calc_flex_warns_if_flex_longer_than_query(
    nlp: Language, searcher: FuzzySearcher
) -> None:
    """It provides UserWarning if flex > len(query)."""
    query = nlp.make_doc("Test query.")
    with pytest.warns(FlexWarning):
        searcher._calc_flex(query, 5)
Example #2
0
def test__calc_flex_raises_error_if_non_valid_value(
    nlp: Language, searcher: FuzzySearcher
) -> None:
    """It raises TypeError if flex is not an int or "default"."""
    query = nlp("Test query.")
    with pytest.raises(TypeError):
        searcher._calc_flex(query, None)
Example #3
0
def test_match_raises_error_if_query_not_Doc(searcher: FuzzySearcher,
                                             nlp: Language) -> None:
    """It raises a TypeError if query not a doc."""
    doc = nlp("This is a doc")
    query = "Not a doc"
    with pytest.raises(TypeError):
        searcher.match(doc, query)
Example #4
0
def test_match_raises_error_when_doc_not_Doc(searcher: FuzzySearcher,
                                             nlp: Language) -> None:
    """It raises a TypeError if doc is not a Doc object."""
    doc = "G-rant Anderson lives in TN."
    query = nlp("xenomorph")
    with pytest.raises(TypeError):
        searcher.match(doc, query)
Example #5
0
def test_best_match_raises_error_when_query_not_Doc(
    searcher: FuzzySearcher, nlp: Language
) -> None:
    """Raises a Type error if query is not a Doc object."""
    doc = nlp("G-rant Anderson lives in TN.")
    query = "xenomorph"
    with pytest.raises(TypeError):
        searcher.best_match(doc, query)
Example #6
0
def test_multi_match_with_n_less_than_actual_matches(
    searcher: FuzzySearcher, nlp: Language
) -> None:
    """It returns the n best fuzzy matches that meet threshold correctly sorted."""
    doc = nlp("cow, cow, cow, cow")
    query = nlp("cow")
    assert searcher.multi_match(doc, query, n=2) == [(0, 1, 100), (2, 3, 100)]
Example #7
0
def test_multi_match_return_empty_list_when_no_matches_after_adjust(
    searcher: FuzzySearcher, nlp: Language
) -> None:
    """It returns an empty list if no fuzzy matches meet min_r2 threshold."""
    doc = nlp("G-rant Anderson lives in TN.")
    query = nlp("Garth, Anderdella")
    assert searcher.multi_match(doc, query) == []
Example #8
0
def test_best_match_return_none_when_no_matches(
    searcher: FuzzySearcher, nlp: Language
) -> None:
    """It returns None if no fuzzy match meets threshold."""
    doc = nlp("G-rant Anderson lives in TN.")
    query = nlp("xenomorph")
    assert searcher.best_match(doc, query) is None
Example #9
0
def test__scan_doc_with_no_matches(searcher: FuzzySearcher, nlp: Language,
                                   scan_example: Doc) -> None:
    """It returns None if no matches >= min_r1."""
    query = nlp.make_doc("xenomorph")
    assert (searcher._scan_doc(
        scan_example, query, fuzzy_func="simple", min_r1=30, ignore_case=True)
            is None)
Example #10
0
def test__scan_doc_returns_all_matches_with_no_min_r1(
    searcher: FuzzySearcher, nlp: Language, scan_example: Doc
) -> None:
    """It returns all spans of len(query) in doc if min_r1 = 0."""
    query = nlp.make_doc("Shirley")
    assert searcher._scan_doc(
        scan_example, query, fuzzy_func="simple", min_r1=0, ignore_case=True
    ) == {0: 0, 1: 0, 2: 18, 3: 22, 4: 86}
Example #11
0
def test_multi_match_finds_best_matches(searcher: FuzzySearcher, nlp: Language) -> None:
    """It returns all the fuzzy matches that meet threshold correctly sorted."""
    doc = nlp("chiken from Popeyes is better than chken from Chick-fil-A")
    query = nlp("chicken")
    assert searcher.multi_match(doc, query, ignore_case=False) == [
        (0, 1, 92),
        (6, 7, 83),
    ]
Example #12
0
def test__scan_doc_returns_matches_over_min_r1(
    searcher: FuzzySearcher, nlp: Language, scan_example: Doc
) -> None:
    """It returns all spans of len(query) in doc if ratio >= min_r1."""
    query = nlp.make_doc("Shirley")
    assert searcher._scan_doc(
        scan_example, query, fuzzy_func="simple", min_r1=30, ignore_case=True
    ) == {4: 86}
Example #13
0
def test__adjust_left_right_positions_finds_better_match2(
        searcher: FuzzySearcher, nlp: Language, adjust_example: Doc) -> None:
    """It optimizes the initial match to find a better match."""
    query = nlp.make_doc("Kareem Abdul-Jabbar")
    match_values = {0: 33, 1: 39, 2: 41, 3: 33, 5: 37, 6: 59, 7: 84}
    assert searcher._adjust_left_right_positions(
        adjust_example,
        query,
        match_values,
        pos=7,
        fuzzy_func="simple",
        min_r2=70,
        ignore_case=True,
        flex=4,
    ) == (8, 11, 89)
Example #14
0
def test__adjust_left_right_positions_finds_better_match(
        searcher: FuzzySearcher, nlp: Language) -> None:
    """It optimizes the initial match to find a better match."""
    doc = nlp.make_doc("Patient was prescribed Zithromax tablets.")
    query = nlp.make_doc("zithromax tablet")
    match_values = {0: 30, 2: 50, 3: 97, 4: 50}
    assert searcher._adjust_left_right_positions(
        doc,
        query,
        match_values,
        pos=3,
        fuzzy_func="simple",
        min_r2=70,
        ignore_case=True,
        flex=2,
    ) == (3, 5, 97)
Example #15
0
def test__adjust_left_right_positions_with_no_flex(searcher: FuzzySearcher,
                                                   nlp: Language) -> None:
    """It returns the intial match when flex value = 0."""
    doc = nlp.make_doc("Patient was prescribed Zithroma tablets.")
    query = nlp.make_doc("zithromax")
    match_values = {3: 94}
    assert searcher._adjust_left_right_positions(
        doc,
        query,
        match_values,
        pos=3,
        fuzzy_func="simple",
        min_r2=70,
        ignore_case=True,
        flex=0,
    ) == (3, 4, 94)
Example #16
0
def test__indice_maxes_returns_all_keys_if_n_is_0(
        searcher: FuzzySearcher, initial_matches: Dict[int, int]) -> None:
    """It returns input unchanged if n is 0."""
    assert searcher._indice_maxes(initial_matches, 0) == [1, 4, 5, 8, 9]
Example #17
0
def test__indice_maxes_returns_n_keys_with_max_values(
        searcher: FuzzySearcher, initial_matches: Dict[int, int]) -> None:
    """It returns the n keys correctly sorted."""
    assert searcher._indice_maxes(initial_matches, 3) == [8, 9, 4]
Example #18
0
def test__calc_flex_passes_through_valid_value(
        nlp: Language, searcher: FuzzySearcher) -> None:
    """It passes through a valid flex value (<= len(query))."""
    query = nlp.make_doc("Test query.")
    assert searcher._calc_flex(query, 1) == 1
Example #19
0
def test__calc_flex_with_default(nlp: Language,
                                 searcher: FuzzySearcher) -> None:
    """It returns len(query) if set with "default"."""
    query = nlp.make_doc("Test query.")
    assert searcher._calc_flex(query, "default") == 3
Example #20
0
def test_compare_without_ignore_case(searcher: FuzzySearcher) -> None:
    """Checks ignore_case is working."""
    assert searcher.compare("SPACZZ", "spaczz", ignore_case=False) == 0
Example #21
0
def test_compare_works_with_defaults(searcher: FuzzySearcher) -> None:
    """Checks compare is working as intended."""
    assert searcher.compare("spaczz", "spacy") == 73
Example #22
0
def test_get_fuzzy_alg_raises_error_with_unknown_name(
        searcher: FuzzySearcher) -> None:
    """It raises a ValueError if fuzzy_func does not match a predefined key name."""
    with pytest.raises(ValueError):
        searcher.get_fuzzy_func("unkown")
Example #23
0
def test_get_fuzzy_alg_returns_alg(searcher: FuzzySearcher) -> None:
    """It returns the expected fuzzy matching function."""
    func = searcher.get_fuzzy_func("simple")
    assert func == fuzz.ratio
Example #24
0
def test_fuzzysearcherer_uses_passed_config() -> None:
    """It uses the config passed to it."""
    config = FuzzyConfig()
    searcher = FuzzySearcher(config=config)
    assert searcher._config._fuzzy_funcs
Example #25
0
def test_fuzzysearcher_raises_error_if_config_is_not_fuzzyconfig() -> None:
    """It raises a TypeError if config is not recognized string or FuzzyConfig."""
    with pytest.raises(TypeError):
        FuzzySearcher(config="Will cause error")
Example #26
0
def test_match_return_empty_list_when_no_matches_after_scan(
        searcher: FuzzySearcher, nlp: Language) -> None:
    """It returns an empty list if no fuzzy matches meet min_r1 threshold."""
    doc = nlp("G-rant Anderson lives in TN.")
    query = nlp("xenomorph")
    assert searcher.match(doc, query) == []
Example #27
0
def test__filter_overlapping_matches_filters_correctly(
    searcher: FuzzySearcher, ) -> None:
    """It only returns the first match if more than one encompass the same tokens."""
    matches = [(1, 2, 80), (1, 3, 70)]
    assert searcher._filter_overlapping_matches(matches) == [(1, 2, 80)]
Example #28
0
def test_fuzzysearcher_has_empty_config_if_empty_passed() -> None:
    """Its config is empty."""
    searcher = FuzzySearcher(config="empty")
    assert not searcher._config._fuzzy_funcs
Example #29
0
def test_match_works_with_defaults(searcher: FuzzySearcher) -> None:
    """Checks match is working as intended."""
    assert searcher.match("spaczz", "spacy") == 73
Example #30
0
def searcher() -> FuzzySearcher:
    """It returns a default fuzzy searcher."""
    return FuzzySearcher()