Example #1
0
def test_matcher_pipe_with_context(nlp: Language) -> None:
    """It returns a stream of Doc objects as tuples with context."""
    doc_stream = (
        (nlp.make_doc("test doc 1: United States"), "Country"),
        (nlp.make_doc("test doc 2: US"), "Country"),
    )
    matcher = RegexMatcher(nlp.vocab)
    output = matcher.pipe(doc_stream, as_tuples=True)
    assert list(output) == list(doc_stream)
Example #2
0
def test_matcher_pipe(nlp: Language) -> None:
    """It returns a stream of Doc objects."""
    doc_stream = (
        nlp.make_doc("test doc 1: United States"),
        nlp.make_doc("test doc 2: US"),
    )
    matcher = RegexMatcher(nlp.vocab)
    output = matcher.pipe(doc_stream)
    assert list(output) == list(doc_stream)
Example #3
0
def test_matcher_pipe_with_matches(nlp: Language) -> None:
    """It returns a stream of Doc objects and matches as tuples."""
    doc_stream = (
        nlp.make_doc("test doc 1: United States"),
        nlp.make_doc("test doc 2: US"),
    )
    matcher = RegexMatcher(nlp.vocab)
    matcher.add("GPE", ["[Uu](nited|\\.?) ?[Ss](tates|\\.?)"])
    output = matcher.pipe(doc_stream, return_matches=True)
    matches = [entry[1] for entry in output]
    assert matches == [[("GPE", 4, 6)], [("GPE", 4, 5)]]
Example #4
0
def test_matcher_pipe_with_matches_and_context(nlp: Language) -> None:
    """It returns a stream of Doc objects, matches, and context as a tuple."""
    doc_stream = (
        (nlp.make_doc("test doc 1: United States"), "Country"),
        (nlp.make_doc("test doc 2: US"), "Country"),
    )
    matcher = RegexMatcher(nlp.vocab)
    matcher.add("GPE", ["[Uu](nited|\\.?) ?[Ss](tates|\\.?)"])
    output = matcher.pipe(doc_stream, return_matches=True, as_tuples=True)
    matches = [(entry[0][1], entry[1]) for entry in output]
    assert matches == [([("GPE", 4, 6)], "Country"),
                       ([("GPE", 4, 5)], "Country")]