def test_match(self): # Assert Constraint-Word matching. R = search.Constraint.fromstring S = lambda s: Sentence(parse(s, relations=True, lemmata=True)) W = lambda s, tag=None, index=0: search.Word(None, s, tag, index) for constraint, tests in ( (R("cat|dog"), [(W("cat"), 1), (W("dog"), 1), (W("fish"), 0)]), (R("cat*"), [(W("cats"), 1)]), (R("*cat"), [(W("tomcat"), 1)]), (R("c*t|d*g"), [(W("cat"), 1), (W("cut"), 1), (W("dog"), 1), (W("dig"), 1)]), (R("cats|NN*"), [(W("cats", "NNS"), 1), (W("cats"), 0)]), (R("^cat"), [(W("cat", "NN", index=0), 1),(W("cat", "NN", index=1), 0)]), (R("*|!cat"), [(W("cat"), 0), (W("dog"), 1), (W("fish"), 1)]), (R("my cat"), [(W("cat"), 0)]), (R("my cat"), [(S("my cat").words[1], 1)]), # "my cat" is an overspecification of "cat" (R("my_cat"), [(S("my cat").words[1], 1)]), (R("cat|NP"), [(S("my cat").words[1], 1)]), (R("dog|VP"), [(S("my dog").words[1], 0)]), (R("cat|SBJ"), [(S("the cat is sleeping").words[1], 1)]), (R("dog"), [(S("MY DOGS").words[1], 1)]), # lemma matches (R("dog"), [(S("MY DOG").words[1], 1)])): # case-insensitive for test, b in tests: self.assertEqual(constraint.match(test), bool(b)) # Assert Constraint-Taxa matching. t = search.Taxonomy() t.append("Tweety", type="bird") t.append("Steven", type="bird") v = search.Constraint.fromstring("BIRD", taxonomy=t) self.assertTrue(v.match(W("bird"))) self.assertTrue(v.match(S("tweeties")[0])) self.assertTrue(v.match(W("Steven"))) print "pattern.search.Constraint.match()"
def test_taxonomy(self): # Assert Taxonomy search. t = search.Taxonomy() t.append("King Arthur", type="knight", value=1) t.append("Sir Bedevere", type="knight", value=2) t.append("Sir Lancelot", type="knight", value=3) t.append("Sir Gallahad", type="knight", value=4) t.append("Sir Robin", type="knight", value=5) t.append("John Cleese", type="Sir Lancelot") t.append("John Cleese", type="Basil Fawlty") # Matching is case-insensitive, results are lowercase. self.assertTrue("John Cleese" in t) self.assertTrue("john cleese" in t) self.assertEqual(t.classify("King Arthur"), "knight") self.assertEqual(t.value("King Arthur"), 1) self.assertEqual(t.parents("John Cleese"), ["basil fawlty", "sir lancelot"]) self.assertEqual(t.parents("John Cleese", recursive=True), [ "basil fawlty", "sir lancelot", "knight"]) self.assertEqual(t.children("knight"), [ "sir robin", "sir gallahad", "sir lancelot", "sir bedevere", "king arthur"]) self.assertEqual(t.children("knight", recursive=True), [ "sir robin", "sir gallahad", "sir lancelot", "sir bedevere", "king arthur", "john cleese"]) print "pattern.search.Taxonomy"
def test_fromstring(self): # Assert Constraint string syntax. for s, kwargs in (("cats", dict(words=["cats"])), ("Cat*", dict(words=["cat*"])), ("\\[cat\\]", dict(words=["[cat]"])), ("[black cats]", dict(words=["black cats"])), ("black_cats", dict(words=["black cats"])), ("black\\_cats", dict(words=["black_cats"])), ("NNS", dict(tags=["NNS"])), ("NN*|VB*", dict(tags=["NN*", "VB*"])), ("NP", dict(chunks=["NP"])), ("SBJ", dict(roles=["SBJ"])), ("CATS", dict(taxa=["cats"])), ("(cats)", dict(words=["cats"], optional=True)), ("\\(cats\\)", dict(words=["(cats)"])), ("cats+", dict(words=["cats"], multiple=True)), ("cats\\+", dict(words=["cats+"])), ("cats+dogs", dict(words=["cats+dogs"])), ("(cats+)", dict(words=["cats+"], optional=True)), ("cats\\|dogs", dict(words=["cats|dogs"])), ("cats|dogs", dict(words=["cats", "dogs"])), ("^cat", dict(words=["cat"], first=True)), ("\\^cat", dict(words=["^cat"])), ("(cat*)+", dict(words=["cat*"], optional=True, multiple=True)), ("^black_cat+", dict(words=["black cat"], multiple=True, first=True)), ("cats|NN*", dict(words=["cats"], tags=["NN*"]))): self._test_constraint(search.Constraint.fromstring(s), **kwargs) # Assert non-alpha taxonomy items. t = search.Taxonomy() t.append("0.5", type="0.5") t.append("half", type="0.5") v = search.Constraint.fromstring("0.5", taxonomy=t) # Assert non-alpha words without taxonomy. self.assertTrue(v.taxa == ["0.5"]) v = search.Constraint.fromstring("0.5") # Assert exclude Constraint. self.assertTrue(v.words == ["0.5"]) v = search.Constraint.fromstring("\\!cats|!dogs|!fish") self.assertTrue(v.words == ["!cats"]) self.assertTrue(v.exclude.words == ["dogs", "fish"]) print "pattern.search.Constraint.fromstring" print "pattern.search.Constraint.fromstring"
def test_wordnet_classifier(self): # Assert WordNet classifier parents & children. c = search.WordNetClassifier() t = search.Taxonomy() t.classifiers.append(c) self.assertEqual(t.classify("cat"), "feline") self.assertEqual(t.classify("dog"), "canine") self.assertTrue("domestic cat" in t.children("cat")) self.assertTrue("puppy" in t.children("dog")) print "pattern.search.WordNetClassifier"
def test_classifier(self): # Assert taxonomy classifier + keyword arguments. c1 = search.Classifier(parents=lambda word, chunk=None: word.endswith("ness") and ["quality"] or []) c2 = search.Classifier(parents=lambda word, chunk=None: chunk=="VP" and ["action"] or []) t = search.Taxonomy() t.classifiers.append(c1) t.classifiers.append(c2) self.assertEqual(t.classify("fuzziness"), "quality") self.assertEqual(t.classify("run", chunk="VP"), "action") print "pattern.search.Classifier"
def test_compile_function(self): # Assert creating and caching Pattern with compile(). t = search.Taxonomy() p = search.compile("JJ?+ NN*", search.STRICT, taxonomy=t) self.assertEqual(p.strict, True) self.assertEqual(p[0].optional, True) self.assertEqual(p[0].tags, ["JJ"]) self.assertEqual(p[1].tags, ["NN*"]) self.assertEqual(p[1].taxonomy, t) # Assert regular expression input. p = search.compile(re.compile(r"[0-9|\.]+")) self.assertTrue(isinstance(p[0].words[0], search.regexp)) # Assert TypeError for other input. self.assertRaises(TypeError, search.compile, 1) print "pattern.search.compile()"
def test_match(self): # Assert Pattern.match() P = search.Pattern.fromstring X = search.STRICT S = lambda s: Sentence(parse(s, relations=True, lemmata=True)) for i, (pattern, test, match) in enumerate(( (P("^rabbit"), "white rabbit", None), # 0 (P("^rabbit"), "rabbit", "rabbit"), # 1 (P("rabbit"), "big white rabbit", "rabbit"), # 2 (P("rabbit*"), "big white rabbits", "rabbits"), # 3 (P("JJ|NN"), S("big white rabbits"), "big"), # 4 (P("JJ+"), S("big white rabbits"), "big white"), # 5 (P("JJ+ NN*"), S("big white rabbits"), "big white rabbits"), # 6 (P("JJ black|white NN*"), S("big white rabbits"), "big white rabbits"), # 7 (P("NP"), S("big white rabbit"), "big white rabbit"), # 8 (P("big? rabbit", X), S("big white rabbit"), "rabbit"), # 9 strict (P("big? rabbit|NN"), S("big white rabbit"), "rabbit"), # 10 explicit (P("big? rabbit"), S("big white rabbit"), "big white rabbit"), # 11 greedy (P("rabbit VP JJ"), S("the rabbit was huge"), "the rabbit was huge"), # 12 (P("rabbit be JJ"), S("the rabbit was huge"), "the rabbit was huge"), # 13 lemma (P("rabbit be JJ", X), S("the rabbit was huge"), "rabbit was huge"), # 14 (P("rabbit is JJ"), S("the rabbit was huge"), None), # 15 (P("the NP"), S("the rabid rodents"), "the rabid rodents"), # 16 overlap (P("t*|r*+"), S("the rabid rodents"), "the rabid rodents"), # 17 (P("(DT) JJ? NN*"), S("the rabid rodents"), "the rabid rodents"), # 18 (P("(DT) JJ? NN*"), S("the rabbit"), "the rabbit"), # 19 (P("rabbit"), S("the big rabbit"), "the big rabbit"), # 20 greedy (P("eat carrot"), S("is eating a carrot"), "is eating a carrot"), # 21 (P("eat carrot|NP"), S("is eating a carrot"), "is eating a carrot"), # 22 (P("eat NP"), S("is eating a carrot"), "is eating a carrot"), # 23 (P("eat a"), S("is eating a carrot"), "is eating a"), # 24 (P("!NP carrot"), S("is eating a carrot"), "is eating a carrot"), # 25 (P("eat !pizza"), S("is eating a carrot"), "is eating a carrot"), # 26 (P("eating a"), S("is eating a carrot"), "is eating a"), # 27 (P("eating !carrot", X), S("is eating a carrot"), "eating a"), # 28 (P("eat !carrot"), S("is eating a carrot"), None), # 28 NP chunk is a carrot (P("eat !DT"), S("is eating a carrot"), None), # 30 eat followed by DT (P("eat !NN"), S("is eating a carrot"), "is eating a"), # 31 a/DT is not NN (P("!be carrot"), S("is eating a carrot"), "is eating a carrot"), # 32 is eating == eat != is (P("!eat|VP carrot"), S("is eating a carrot"), None), # 33 VP chunk == eat (P("white_rabbit"), S("big white rabbit"), None), # 34 (P("[white rabbit]"), S("big white rabbit"), None), # 35 (P("[* white rabbit]"), S("big white rabbit"), "big white rabbit"), # 36 (P("[big * rabbit]"), S("big white rabbit"), "big white rabbit"), # 37 (P("big [big * rabbit]"), S("big white rabbit"), "big white rabbit"), # 38 (P("[*+ rabbit]"), S("big white rabbit"), None), # 39 bad pattern: "+" is literal )): m = pattern.match(test) #print i, match, "<=>", m and m.string or None self.assertTrue(getattr(m, "string", None) == match) # Assert chunk with head at the front. s = S("Felix the cat") s.chunks[0]._head = lambda ch: ch.words[0] # head = "Felix" (it's a hack) self.assertEqual(P("felix").match(s).string, "Felix the cat") # Assert negation + custom greedy() function. s = S("the big white rabbit") g = lambda chunk, constraint: len([w for w in chunk if not constraint.match(w)]) == 0 self.assertEqual(P("!white").match(s).string, "the big white rabbit") # a rabbit != white self.assertEqual(P("!white", greedy=g).match(s), None) # a white rabbit == white # Assert taxonomy items with spaces. s = S("Bugs Bunny is a giant talking rabbit.") t = search.Taxonomy() t.append("rabbit", type="rodent") t.append("Bugs Bunny", type="rabbit") self.assertEqual(P("RABBIT", taxonomy=t).match(s).string, "Bugs Bunny") # Assert None, the syntax cannot handle taxonomy items that span multiple chunks. s = S("Elmer Fudd fires a cannon") t = search.Taxonomy() t.append("fire cannon", type="violence") self.assertEqual(P("VIOLENCE").match(s), None) # Assert regular expressions. s = S("a sack with 3.5 rabbits") p = search.Pattern.fromstring("[] NNS") p[0].words.append(re.compile(r"[0-9|\.]+")) self.assertEqual(p.match(s).string, "3.5 rabbits") print "pattern.search.Pattern.match()"