Exemple #1
0
def test_add_with_more_explicit_kwargs_than_patterns_warns(
    matcher: RegexMatcher, ) -> None:
    """It will warn when more explicit kwargs are added than patterns."""
    with pytest.warns(KwargsWarning):
        matcher.add("TEST", ["Test1"], [{
            "ignore_case": True
        }, {
            "ignore_case": True
        }])
Exemple #2
0
def test_matcher_pipe_with_context(nlp: Language) -> None:
    """It returns a stream of Doc objects as tuples with context."""
    doc_stream = (
        (nlp.make_doc("test doc 1: United States"), "Country"),
        (nlp.make_doc("test doc 2: US"), "Country"),
    )
    matcher = RegexMatcher(nlp.vocab)
    output = matcher.pipe(doc_stream, as_tuples=True)
    assert list(output) == list(doc_stream)
Exemple #3
0
def test_matcher_pipe(nlp: Language) -> None:
    """It returns a stream of Doc objects."""
    doc_stream = (
        nlp.make_doc("test doc 1: United States"),
        nlp.make_doc("test doc 2: US"),
    )
    matcher = RegexMatcher(nlp.vocab)
    output = matcher.pipe(doc_stream)
    assert list(output) == list(doc_stream)
Exemple #4
0
def test_matcher_pipe_with_matches(nlp: Language) -> None:
    """It returns a stream of Doc objects and matches as tuples."""
    doc_stream = (
        nlp.make_doc("test doc 1: United States"),
        nlp.make_doc("test doc 2: US"),
    )
    matcher = RegexMatcher(nlp.vocab)
    matcher.add("GPE", ["[Uu](nited|\\.?) ?[Ss](tates|\\.?)"])
    output = matcher.pipe(doc_stream, return_matches=True)
    matches = [entry[1] for entry in output]
    assert matches == [[("GPE", 4, 6)], [("GPE", 4, 5)]]
Exemple #5
0
def test_matcher_pipe_with_matches_and_context(nlp: Language) -> None:
    """It returns a stream of Doc objects, matches, and context as a tuple."""
    doc_stream = (
        (nlp.make_doc("test doc 1: United States"), "Country"),
        (nlp.make_doc("test doc 2: US"), "Country"),
    )
    matcher = RegexMatcher(nlp.vocab)
    matcher.add("GPE", ["[Uu](nited|\\.?) ?[Ss](tates|\\.?)"])
    output = matcher.pipe(doc_stream, return_matches=True, as_tuples=True)
    matches = [(entry[0][1], entry[1]) for entry in output]
    assert matches == [([("GPE", 4, 6)], "Country"),
                       ([("GPE", 4, 5)], "Country")]
def matcher(nlp: Language) -> RegexMatcher:
    """Regex matcher with patterns added."""
    matcher = RegexMatcher(nlp.vocab)
    matcher.add("GPE", ["(?i)[U](nited|\\.?) ?[S](tates|\\.?)"], on_match=add_gpe_ent)
    matcher.add("STREET", ["street_addresses"], kwargs=[{"predef": True}])
    matcher.add("ZIP", ["zip_codes"], kwargs=[{"predef": True}])
    return matcher
Exemple #7
0
def test_add_where_kwargs_are_not_dicts_raises_error(
    matcher: RegexMatcher, ) -> None:
    """Trying to add non Dict objects as kwargs raises a TypeError."""
    with pytest.raises(TypeError):
        matcher.add("TEST", ["Test1"], ["ignore_case"])
Exemple #8
0
def test_add_str_pattern_outside_list_raises_error(
    matcher: RegexMatcher, ) -> None:
    """Trying to add string as patterns, not iterable of strings, raises a TypeError."""
    with pytest.raises(TypeError):
        matcher.add("TEST", "Test1")
Exemple #9
0
def test_add_without_string_pattern_raises_error(matcher: RegexMatcher,
                                                 nlp: Language) -> None:
    """Trying to add non strings as patterns raises a TypeError."""
    with pytest.raises(TypeError):
        matcher.add("TEST", [nlp.make_doc("Test1")])
Exemple #10
0
def test_remove_label_raises_error_if_label_not_in_matcher(
    matcher: RegexMatcher, ) -> None:
    """It raises a ValueError if trying to remove a label not present."""
    with pytest.raises(ValueError):
        matcher.remove("TEST")
Exemple #11
0
def test_remove_label(matcher: RegexMatcher) -> None:
    """It removes a label from the matcher."""
    matcher.add("TEST", ["test"])
    assert "TEST" in matcher
    matcher.remove("TEST")
    assert "TEST" not in matcher