Esempio n. 1
0
def test_multi_match_will_expand_on_partial_match_if_partials(
        searcher: RegexSearcher, nlp: Language) -> None:
    """It extends partial matches to span boundaries."""
    doc = nlp(
        "We want to identify 'USA' even though only first two letters will matched."
    )
    matches = searcher.multi_match(doc, "[Uu](nited|\\.?) ?[Ss](tates|\\.?)")
    assert matches == [(5, 6)]
Esempio n. 2
0
def test_multi_match_will_not_match_if_regex_starts_ends_with_space(
        searcher: RegexSearcher, nlp: Language) -> None:
    """Regexes that match on spaces will not produce match."""
    doc = nlp(
        "We want to identify US but will fail because regex includes whitespaces."
    )
    matches = searcher.multi_match(doc,
                                   "\\s[Uu](nited|\\.?) ?[Ss](tates|\\.?)\\s")
    assert matches == []
Esempio n. 3
0
def test_multi_match_will_not_expand_if_not_partials(searcher: RegexSearcher,
                                                     nlp: Language) -> None:
    """It will not extend partial matches to span boundaries if not partial."""
    doc = nlp(
        "We want to identify 'USA' even though only first two letters will matched."
    )
    matches = searcher.multi_match(doc,
                                   "[Uu](nited|\\.?) ?[Ss](tates|\\.?)",
                                   partial=False)
    assert matches == []
Esempio n. 4
0
def test_multi_match_raises_error_if_regex_str_not_str(searcher: RegexSearcher,
                                                       nlp: Language) -> None:
    """It raises a type error if regex_str is not a string."""
    doc = nlp("My phone number is (555) 555-5555.")
    with pytest.raises(TypeError):
        searcher.multi_match(doc, 1, predef=True)
Esempio n. 5
0
def test_multi_match(searcher: RegexSearcher, nlp: Language) -> None:
    """It produces matches."""
    doc = nlp("My phone number is (555) 555-5555, not (554) 554-5554.")
    matches = searcher.multi_match(doc, "phones", predef=True)
    assert matches == [(4, 10), (12, 18)]