def test_matcher_pipe_with_context(nlp: Language) -> None: """It returns a stream of Doc objects as tuples with context.""" doc_stream = ( (nlp.make_doc("test doc 1: United States"), "Country"), (nlp.make_doc("test doc 2: US"), "Country"), ) matcher = RegexMatcher(nlp.vocab) output = matcher.pipe(doc_stream, as_tuples=True) assert list(output) == list(doc_stream)
def test_matcher_pipe(nlp: Language) -> None: """It returns a stream of Doc objects.""" doc_stream = ( nlp.make_doc("test doc 1: United States"), nlp.make_doc("test doc 2: US"), ) matcher = RegexMatcher(nlp.vocab) output = matcher.pipe(doc_stream) assert list(output) == list(doc_stream)
def test_matcher_pipe_with_matches(nlp: Language) -> None: """It returns a stream of Doc objects and matches as tuples.""" doc_stream = ( nlp.make_doc("test doc 1: United States"), nlp.make_doc("test doc 2: US"), ) matcher = RegexMatcher(nlp.vocab) matcher.add("GPE", ["[Uu](nited|\\.?) ?[Ss](tates|\\.?)"]) output = matcher.pipe(doc_stream, return_matches=True) matches = [entry[1] for entry in output] assert matches == [[("GPE", 4, 6)], [("GPE", 4, 5)]]
def test_matcher_pipe_with_matches_and_context(nlp: Language) -> None: """It returns a stream of Doc objects, matches, and context as a tuple.""" doc_stream = ( (nlp.make_doc("test doc 1: United States"), "Country"), (nlp.make_doc("test doc 2: US"), "Country"), ) matcher = RegexMatcher(nlp.vocab) matcher.add("GPE", ["[Uu](nited|\\.?) ?[Ss](tates|\\.?)"]) output = matcher.pipe(doc_stream, return_matches=True, as_tuples=True) matches = [(entry[0][1], entry[1]) for entry in output] assert matches == [([("GPE", 4, 6)], "Country"), ([("GPE", 4, 5)], "Country")]