def test_matcher_warns_if_unknown_pattern_elements(nlp: Language) -> None: """Calling the matcher on a `Doc` object with no matches returns empty list.""" matcher = TokenMatcher(nlp.vocab) matcher.add("TEST", [[{"TEXT": {"fuzzy": "test"}}]]) doc = nlp("test") with pytest.warns(UserWarning): matcher(doc)
def test_matcher_pipe_with_context(nlp: Language) -> None: """It returns a stream of Doc objects as tuples with context.""" doc_stream = ( (nlp("test doc 1: Corvold"), "Jund"), (nlp("test doc 2: Prosh"), "Jund"), ) matcher = TokenMatcher(nlp.vocab) output = matcher.pipe(doc_stream, as_tuples=True) assert list(output) == list(doc_stream)
def test_matcher_pipe(nlp: Language) -> None: """It returns a stream of Doc objects.""" doc_stream = ( nlp("test doc 1: Corvold"), nlp("test doc 2: Prosh"), ) matcher = TokenMatcher(nlp.vocab) output = matcher.pipe(doc_stream) assert list(output) == list(doc_stream)
def test_matcher_pipe_with_matches(nlp: Language) -> None: """It returns a stream of Doc objects and matches as tuples.""" doc_stream = ( nlp("test doc 1: Corvold"), nlp("test doc 2: Prosh"), ) matcher = TokenMatcher(nlp.vocab) matcher.add( "DRAGON", [[{"TEXT": {"FUZZY": "Korvold"}}], [{"TEXT": {"FUZZY": "Prossh"}}]] ) output = matcher.pipe(doc_stream, return_matches=True) matches = [entry[1] for entry in output] assert matches == [[("DRAGON", 4, 5, None)], [("DRAGON", 4, 5, None)]]
def matcher(model: Language) -> TokenMatcher: """It returns a token matcher.""" matcher = TokenMatcher(vocab=model.vocab) matcher.add( "DATA", [ [ {"TEXT": "SQL"}, {"LOWER": {"FREGEX": "(database){s<=1}"}}, {"LOWER": {"FUZZY": "access"}, "POS": "NOUN"}, ], [{"TEXT": {"FUZZY": "Sequel"}}, {"LOWER": "db"}], ], ) matcher.add("NAME", [[{"TEXT": {"FUZZY": "Garfield"}}]]) return matcher
def test_add_with_zero_len_pattern(matcher: TokenMatcher) -> None: """Trying to add zero-length patterns raises a ValueError.""" with pytest.raises(ValueError): matcher.add("TEST", [[]])
def test_add_without_sequence_of_patterns_raises_error(matcher: TokenMatcher,) -> None: """Trying to add non-sequences of patterns raises a TypeError.""" with pytest.raises(TypeError): matcher.add("TEST", [{"TEXT": "error"}]) # type: ignore
def test_matcher_returns_empty_list_if_no_matches(nlp: Language) -> None: """Calling the matcher on a `Doc` object with no matches returns empty list.""" matcher = TokenMatcher(nlp.vocab) matcher.add("TEST", [[{"TEXT": {"FUZZY": "blah"}}]]) doc = nlp("No matches here.") assert matcher(doc) == []
def test_remove_label_raises_error_if_label_not_in_matcher( matcher: TokenMatcher, ) -> None: """It raises a ValueError if trying to remove a label not present.""" with pytest.raises(ValueError): matcher.remove("TEST")
def test_remove_label(matcher: TokenMatcher) -> None: """It removes a label from the matcher.""" matcher.add("TEST", [[{"TEXT": "test"}]]) assert "TEST" in matcher matcher.remove("TEST") assert "TEST" not in matcher