def test_match_raises_error_when_doc_not_doc_obj(searcher: FuzzySearcher, nlp: Language) -> None: """It raises a TypeError if doc is not a Doc object.""" doc = "G-rant Anderson lives in TN." query = nlp("xenomorph") with pytest.raises(TypeError): searcher.match(doc, query)
def test_match_raises_error_if_query_not_doc_obj(searcher: FuzzySearcher, nlp: Language) -> None: """It raises a TypeError if query not a doc.""" doc = nlp("This is a doc") query = "Not a doc" with pytest.raises(TypeError): searcher.match(doc, query)
def test_compare_raises_error_with_unknown_func_name(searcher: FuzzySearcher, nlp: Language) -> None: """It raises a ValueError if fuzzy_func does not match a predefined key name.""" with pytest.raises(ValueError): assert searcher.compare(nlp("spaczz"), nlp("spacy"), fuzzy_func="unknown")
def test__calc_flex_warns_if_flex_longer_than_query( nlp: Language, searcher: FuzzySearcher) -> None: """It provides UserWarning if flex > len(query).""" query = nlp("Test query") with pytest.warns(FlexWarning): flex = searcher._calc_flex(query, 5) assert flex == 2
def test__scan_with_no_matches(searcher: FuzzySearcher, nlp: Language, scan_example: Doc) -> None: """It returns None if no matches >= min_r1.""" query = nlp("xenomorph") assert (searcher._scan( scan_example, query, fuzzy_func="simple", min_r1=30, ignore_case=True) is None)
def test__calc_flex_warns_if_flex_less_than_0(nlp: Language, searcher: FuzzySearcher) -> None: """It provides UserWarning if flex < 0.""" query = nlp("Test query") with pytest.warns(FlexWarning): flex = searcher._calc_flex(query, -1) assert flex == 0
def test__scan_returns_none_w_empty_query(searcher: FuzzySearcher, nlp: Language, scan_example: Doc) -> None: """It returns None if passed an empty query string.""" query = nlp("") assert (searcher._scan( scan_example, query, fuzzy_func="simple", min_r1=25, ignore_case=True) is None)
def test_match_finds_best_matches3(searcher: FuzzySearcher, nlp: Language) -> None: """It returns all the fuzzy matches that meet threshold correctly sorted.""" doc = nlp("My favorite wine is white gold riesling.") query = nlp("goldriesling") assert searcher.match(doc, query, flex="max") == [ (5, 7, 96), ]
def test_match_finds_best_matches(searcher: FuzzySearcher, nlp: Language) -> None: """It returns all the fuzzy matches that meet threshold correctly sorted.""" doc = nlp("chiken from Popeyes is better than chken from Chick-fil-A") query = nlp("chicken") assert searcher.match(doc, query, ignore_case=False) == [ (0, 1, 92), (6, 7, 83), ]
def test__scan_returns_matches_over_min_r1(searcher: FuzzySearcher, nlp: Language, scan_example: Doc) -> None: """It returns all spans of len(query) in doc if ratio >= min_r1.""" query = nlp("Shirley") assert searcher._scan(scan_example, query, fuzzy_func="simple", min_r1=30, ignore_case=True) == { 4: 86 }
def test__optimize_finds_better_match_with_max_flex( searcher: FuzzySearcher, nlp: Language, adjust_example: Doc) -> None: """It optimizes the initial match to find a better match when flex = max.""" query = nlp("Kareem Abdul-Jabbar") match_values = {0: 33, 1: 39, 2: 41, 3: 33, 5: 37, 6: 59, 7: 84} assert searcher._optimize( adjust_example, query, match_values, pos=7, fuzzy_func="simple", min_r2=70, ignore_case=True, flex=4, thresh=100, ) == (8, 11, 89)
def test__scan_returns_all_matches_with_no_min_r1(searcher: FuzzySearcher, nlp: Language, scan_example: Doc) -> None: """It returns all spans of len(query) in doc if min_r1 = 0.""" query = nlp("Shirley") assert searcher._scan(scan_example, query, fuzzy_func="simple", min_r1=0, ignore_case=True) == { 0: 0, 1: 0, 2: 18, 3: 22, 4: 86 }
def test__optimize_with_no_flex(searcher: FuzzySearcher, nlp: Language) -> None: """It returns the intial match when flex value = 0.""" doc = nlp("Patient was prescribed Zithroma tablets.") query = nlp("zithromax") match_values = {3: 94} assert searcher._optimize( doc, query, match_values, pos=3, fuzzy_func="simple", min_r2=70, ignore_case=True, flex=0, thresh=100, ) == (3, 4, 94)
def searcher(nlp: Language) -> FuzzySearcher: """It returns a fuzzy searcher.""" return FuzzySearcher(vocab=nlp.vocab)
def test_compare_without_ignore_case(searcher: FuzzySearcher, nlp: Language) -> None: """Checks ignore_case is working.""" assert searcher.compare(nlp("SPACZZ"), nlp("spaczz"), ignore_case=False) == 0
def test_compare_works_with_defaults(searcher: FuzzySearcher, nlp: Language) -> None: """Checks compare is working as intended.""" assert searcher.compare(nlp("spaczz"), nlp("spacy")) == 73
def test_match_returns_empty_list_if_doc_query_empty(searcher: FuzzySearcher, nlp: Language) -> None: """Returns empty list if doc is empty string.""" doc = nlp("") query = nlp("") assert searcher.match(doc, query) == []
def test__check_ratios_passes_valid_values_wo_flex( searcher: FuzzySearcher) -> None: """It passes through valid ratios changing `min_r1` to equal `min_r2`.""" assert searcher._check_ratios(50, 75, 100, 0) == (75, 75, 100)
def test__calc_flex_with_min(nlp: Language, searcher: FuzzySearcher) -> None: """It returns 0 if set with "min".""" query = nlp("Test query") assert searcher._calc_flex(query, "min") == 0
def test__check_ratios_warns_if_thresh_less_min_r2( searcher: FuzzySearcher) -> None: """It raises a `RatioWarning`.""" with pytest.warns(RatioWarning): ratios = searcher._check_ratios(50, 75, 70, 1) assert ratios == (50, 75, 75)
def test__check_ratios_passes_valid_values_w_flex( searcher: FuzzySearcher) -> None: """It passes through valid ratios with no changes.""" assert searcher._check_ratios(50, 75, 100, 1) == (50, 75, 100)
def test__filter_overlapping_matches_filters_correctly( searcher: FuzzySearcher, ) -> None: """It only returns the first match if more than one encompass the same tokens.""" matches = [(1, 2, 80), (1, 3, 70)] assert searcher._filter_overlapping_matches(matches) == [(1, 2, 80)]
def test__optimize_where_bpl_would_equal_bpr(searcher: FuzzySearcher, nlp: Language) -> None: """It returns the intial match when flex value = 0.""" doc = nlp("trabalho, investimento e escolhas corajosas,") query = nlp("Courtillier Musqué") assert searcher.match(doc, query, flex="max") == []
def test__calc_flex_with_default(nlp: Language, searcher: FuzzySearcher) -> None: """It returns len(query) // 2 if set with "default".""" query = nlp("Test query") assert searcher._calc_flex(query, "default") == 1
def test__calc_flex_with_max(nlp: Language, searcher: FuzzySearcher) -> None: """It returns len(query) if set with "max".""" query = nlp("Test query two") assert searcher._calc_flex(query, "max") == 3
def test_match_return_empty_list_when_no_matches_after_adjust( searcher: FuzzySearcher, nlp: Language) -> None: """It returns an empty list if no fuzzy matches meet min_r2 threshold.""" doc = nlp("G-rant Anderson lives in TN.") query = nlp("Garth, Anderdella") assert searcher.match(doc, query) == []
def test__calc_flex_passes_through_valid_value( nlp: Language, searcher: FuzzySearcher) -> None: """It passes through a valid flex value (<= len(query)).""" query = nlp("Test query") assert searcher._calc_flex(query, 0) == 0
def test__check_ratios_warns_if_minr1_greater_min_r2( searcher: FuzzySearcher) -> None: """It raises a `RatioWarning`.""" with pytest.warns(RatioWarning): ratios = searcher._check_ratios(80, 75, 100, 1) assert ratios == (75, 75, 100)
def test__calc_flex_raises_error_if_non_valid_value( nlp: Language, searcher: FuzzySearcher) -> None: """It raises TypeError if flex is not an int or "default".""" query = nlp("Test query.") with pytest.raises(TypeError): searcher._calc_flex(query, None) # type: ignore
def test__check_ratios_ignores_issues_wo_flex(searcher: FuzzySearcher) -> None: """Changes `min_r1` to equal `min_r2` but ignores unnecessary `thresh`.""" assert searcher._check_ratios(80, 75, 70, 0) == (75, 75, 70)