예제 #1
0
def test_matcher_match_zero_plus(en_vocab):
    words = 'He said , " some words " ...'.split()
    pattern = [{"ORTH": '"'}, {"OP": "*", "IS_PUNCT": False}, {"ORTH": '"'}]
    matcher = Matcher(en_vocab)
    matcher.add("Quote", [pattern])
    doc = Doc(en_vocab, words=words)
    assert len(matcher(doc)) == 1
예제 #2
0
def test_matcher_any_token_operator(en_vocab):
    """Test that patterns with "any token" {} work with operators."""
    matcher = Matcher(en_vocab)
    matcher.add("TEST", [[{"ORTH": "test"}, {"OP": "*"}]])
    doc = Doc(en_vocab, words=["test", "hello", "world"])
    matches = [doc[start:end].text for _, start, end in matcher(doc)]
    assert len(matches) == 1
    assert matches[0] == "test hello world"
예제 #3
0
def test_matcher_operator_shadow(en_vocab):
    matcher = Matcher(en_vocab)
    doc = Doc(en_vocab, words=["a", "b", "c"])
    pattern = [{"ORTH": "a"}, {"IS_ALPHA": True, "OP": "+"}, {"ORTH": "c"}]
    matcher.add("A.C", [pattern])
    matches = matcher(doc)
    assert len(matches) == 1
    assert matches[0][1:] == (0, 3)
예제 #4
0
def test_matcher_match_one_plus(matcher, en_vocab):
    control = Matcher(en_vocab)
    control.add("BasicPhilippe", [[{"ORTH": "Philippe"}]])
    doc = Doc(en_vocab, words=["Philippe", "Philippe"])
    m = control(doc)
    assert len(m) == 2
    pattern = [
        {"ORTH": "Philippe", "OP": "1"},
        {"ORTH": "Philippe", "OP": "+"},
    ]
    matcher.add("KleenePhilippe", [pattern])
    m = matcher(doc)
    assert len(m) == 1