def test_multi_match_will_not_match_if_regex_starts_ends_with_space( searcher: RegexSearcher, nlp: Language) -> None: """Regexes that match on spaces will not produce match.""" doc = nlp( "We want to identify US but will fail because regex includes whitespaces." ) matches = searcher.match(doc, "\\s[Uu](nited|\\.?) ?[Ss](tates|\\.?)\\s") assert matches == []
def test_multi_match_will_expand_on_partial_match_if_partials( searcher: RegexSearcher, nlp: Language) -> None: """It extends partial matches to span boundaries.""" doc = nlp( "We want to identify 'USA' even though only first two letters will matched." ) matches = searcher.match(doc, "[Uu](nited|\\.?) ?[Ss](tates|\\.?)") assert matches == [(5, 6)]
def test_multi_match_will_not_expand_if_not_partials(searcher: RegexSearcher, nlp: Language) -> None: """It will not extend partial matches to span boundaries if not partial.""" doc = nlp( "We want to identify 'USA' even though only first two letters will matched." ) matches = searcher.match(doc, "[Uu](nited|\\.?) ?[Ss](tates|\\.?)", partial=False) assert matches == []
def test_multi_match_raises_error_if_regex_str_not_str(searcher: RegexSearcher, nlp: Language) -> None: """It raises a type error if regex_str is not a string.""" doc = nlp("My phone number is (555) 555-5555.") with pytest.raises(TypeError): searcher.match(doc, 1, predef=True)
def test_multi_match(searcher: RegexSearcher, nlp: Language) -> None: """It produces matches.""" doc = nlp("My phone number is (555) 555-5555, not (554) 554-5554.") matches = searcher.match(doc, "phones", predef=True) assert matches == [(4, 10), (12, 18)]