def test_matcher_match_zero_plus(en_vocab): words = 'He said , " some words " ...'.split() pattern = [{"ORTH": '"'}, {"OP": "*", "IS_PUNCT": False}, {"ORTH": '"'}] matcher = Matcher(en_vocab) matcher.add("Quote", [pattern]) doc = Doc(en_vocab, words=words) assert len(matcher(doc)) == 1
def test_matcher_any_token_operator(en_vocab): """Test that patterns with "any token" {} work with operators.""" matcher = Matcher(en_vocab) matcher.add("TEST", [[{"ORTH": "test"}, {"OP": "*"}]]) doc = Doc(en_vocab, words=["test", "hello", "world"]) matches = [doc[start:end].text for _, start, end in matcher(doc)] assert len(matches) == 1 assert matches[0] == "test hello world"
def test_matcher_operator_shadow(en_vocab): matcher = Matcher(en_vocab) doc = Doc(en_vocab, words=["a", "b", "c"]) pattern = [{"ORTH": "a"}, {"IS_ALPHA": True, "OP": "+"}, {"ORTH": "c"}] matcher.add("A.C", [pattern]) matches = matcher(doc) assert len(matches) == 1 assert matches[0][1:] == (0, 3)
def test_matcher_match_one_plus(matcher, en_vocab): control = Matcher(en_vocab) control.add("BasicPhilippe", [[{"ORTH": "Philippe"}]]) doc = Doc(en_vocab, words=["Philippe", "Philippe"]) m = control(doc) assert len(m) == 2 pattern = [ {"ORTH": "Philippe", "OP": "1"}, {"ORTH": "Philippe", "OP": "+"}, ] matcher.add("KleenePhilippe", [pattern]) m = matcher(doc) assert len(m) == 1