Example #1
0
 def test_match(self):
     # Assert Constraint-Word matching.
     R = search.Constraint.fromstring
     S = lambda s: Sentence(parse(s, relations=True, lemmata=True))
     W = lambda s, tag=None, index=0: search.Word(None, s, tag, index)
     for constraint, tests in (
       (R("cat|dog"),  [(W("cat"), 1), (W("dog"), 1), (W("fish"), 0)]),
       (R("cat*"),     [(W("cats"), 1)]),
       (R("*cat"),     [(W("tomcat"), 1)]),
       (R("c*t|d*g"),  [(W("cat"), 1), (W("cut"), 1), (W("dog"), 1), (W("dig"), 1)]),
       (R("cats|NN*"), [(W("cats", "NNS"), 1), (W("cats"), 0)]),
       (R("^cat"),     [(W("cat", "NN", index=0), 1),(W("cat", "NN", index=1), 0)]),
       (R("*|!cat"),   [(W("cat"), 0), (W("dog"), 1), (W("fish"), 1)]),
       (R("my cat"),   [(W("cat"), 0)]),
       (R("my cat"),   [(S("my cat").words[1], 1)]),  # "my cat" is an overspecification of "cat"
       (R("my_cat"),   [(S("my cat").words[1], 1)]),
       (R("cat|NP"),   [(S("my cat").words[1], 1)]),
       (R("dog|VP"),   [(S("my dog").words[1], 0)]),
       (R("cat|SBJ"),  [(S("the cat is sleeping").words[1], 1)]),
       (R("dog"),      [(S("MY DOGS").words[1], 1)]), # lemma matches
       (R("dog"),      [(S("MY DOG").words[1], 1)])): # case-insensitive
         for test, b in tests:
             self.assertEqual(constraint.match(test), bool(b))
     # Assert Constraint-Taxa matching.
     t = search.Taxonomy()
     t.append("Tweety", type="bird")
     t.append("Steven", type="bird")
     v = search.Constraint.fromstring("BIRD", taxonomy=t)
     self.assertTrue(v.match(W("bird")))
     self.assertTrue(v.match(S("tweeties")[0]))
     self.assertTrue(v.match(W("Steven")))
     print "pattern.search.Constraint.match()"
Example #2
0
 def test_taxonomy(self):
     # Assert Taxonomy search.
     t = search.Taxonomy()
     t.append("King Arthur",  type="knight", value=1)
     t.append("Sir Bedevere", type="knight", value=2)
     t.append("Sir Lancelot", type="knight", value=3)
     t.append("Sir Gallahad", type="knight", value=4)
     t.append("Sir Robin",    type="knight", value=5)
     t.append("John Cleese",  type="Sir Lancelot")
     t.append("John Cleese",  type="Basil Fawlty")
     # Matching is case-insensitive, results are lowercase.
     self.assertTrue("John Cleese" in t)
     self.assertTrue("john cleese" in t)
     self.assertEqual(t.classify("King Arthur"), "knight")
     self.assertEqual(t.value("King Arthur"), 1)
     self.assertEqual(t.parents("John Cleese"), ["basil fawlty", "sir lancelot"])
     self.assertEqual(t.parents("John Cleese", recursive=True), [
         "basil fawlty", 
         "sir lancelot", 
         "knight"])
     self.assertEqual(t.children("knight"), [
         "sir robin", 
         "sir gallahad", 
         "sir lancelot", 
         "sir bedevere", 
         "king arthur"])
     self.assertEqual(t.children("knight", recursive=True), [
         "sir robin", 
         "sir gallahad", 
         "sir lancelot", 
         "sir bedevere", 
         "king arthur",
         "john cleese"])
     print "pattern.search.Taxonomy"
Example #3
0
 def test_fromstring(self):
     # Assert Constraint string syntax.
     for s, kwargs in (("cats", dict(words=["cats"])),
                       ("Cat*",
                        dict(words=["cat*"])), ("\\[cat\\]",
                                                dict(words=["[cat]"])),
                       ("[black cats]", dict(words=["black cats"])),
                       ("black_cats", dict(words=["black cats"])),
                       ("black\\_cats", dict(words=["black_cats"])),
                       ("NNS",
                        dict(tags=["NNS"])), ("NN*|VB*",
                                              dict(tags=["NN*", "VB*"])),
                       ("NP", dict(chunks=["NP"])), ("SBJ",
                                                     dict(roles=["SBJ"])),
                       ("CATS", dict(taxa=["cats"])), ("(cats)",
                                                       dict(words=["cats"],
                                                            optional=True)),
                       ("\\(cats\\)",
                        dict(words=["(cats)"])), ("cats+",
                                                  dict(words=["cats"],
                                                       multiple=True)),
                       ("cats\\+",
                        dict(words=["cats+"])), ("cats+dogs",
                                                 dict(words=["cats+dogs"])),
                       ("(cats+)",
                        dict(words=["cats+"],
                             optional=True)), ("cats\\|dogs",
                                               dict(words=["cats|dogs"])),
                       ("cats|dogs",
                        dict(words=["cats", "dogs"])), ("^cat",
                                                        dict(words=["cat"],
                                                             first=True)),
                       ("\\^cat",
                        dict(words=["^cat"])), ("(cat*)+",
                                                dict(words=["cat*"],
                                                     optional=True,
                                                     multiple=True)),
                       ("^black_cat+",
                        dict(words=["black cat"], multiple=True,
                             first=True)), ("cats|NN*",
                                            dict(words=["cats"],
                                                 tags=["NN*"]))):
         self._test_constraint(search.Constraint.fromstring(s), **kwargs)
     # Assert non-alpha taxonomy items.
     t = search.Taxonomy()
     t.append("0.5", type="0.5")
     t.append("half", type="0.5")
     v = search.Constraint.fromstring("0.5", taxonomy=t)
     # Assert non-alpha words without taxonomy.
     self.assertTrue(v.taxa == ["0.5"])
     v = search.Constraint.fromstring("0.5")
     # Assert exclude Constraint.
     self.assertTrue(v.words == ["0.5"])
     v = search.Constraint.fromstring("\\!cats|!dogs|!fish")
     self.assertTrue(v.words == ["!cats"])
     self.assertTrue(v.exclude.words == ["dogs", "fish"])
     print "pattern.search.Constraint.fromstring"
     print "pattern.search.Constraint.fromstring"
Example #4
0
 def test_wordnet_classifier(self):
     # Assert WordNet classifier parents & children.
     c = search.WordNetClassifier()
     t = search.Taxonomy()
     t.classifiers.append(c)
     self.assertEqual(t.classify("cat"), "feline")
     self.assertEqual(t.classify("dog"), "canine")
     self.assertTrue("domestic cat" in t.children("cat"))
     self.assertTrue("puppy" in t.children("dog"))
     print "pattern.search.WordNetClassifier"
Example #5
0
 def test_classifier(self):
     # Assert taxonomy classifier + keyword arguments.
     c1 = search.Classifier(parents=lambda word, chunk=None: word.endswith("ness") and ["quality"] or [])
     c2 = search.Classifier(parents=lambda word, chunk=None: chunk=="VP" and ["action"] or [])
     t = search.Taxonomy()
     t.classifiers.append(c1)
     t.classifiers.append(c2)
     self.assertEqual(t.classify("fuzziness"), "quality")
     self.assertEqual(t.classify("run", chunk="VP"), "action")
     print "pattern.search.Classifier"
Example #6
0
 def test_compile_function(self):
     # Assert creating and caching Pattern with compile().
     t = search.Taxonomy()
     p = search.compile("JJ?+ NN*", search.STRICT, taxonomy=t)
     self.assertEqual(p.strict,      True)
     self.assertEqual(p[0].optional, True)
     self.assertEqual(p[0].tags,     ["JJ"])
     self.assertEqual(p[1].tags,     ["NN*"])
     self.assertEqual(p[1].taxonomy, t)
     # Assert regular expression input.
     p = search.compile(re.compile(r"[0-9|\.]+"))
     self.assertTrue(isinstance(p[0].words[0], search.regexp))
     # Assert TypeError for other input.
     self.assertRaises(TypeError, search.compile, 1)
     print "pattern.search.compile()"
Example #7
0
 def test_match(self):
     # Assert Pattern.match()
     P = search.Pattern.fromstring
     X = search.STRICT
     S = lambda s: Sentence(parse(s, relations=True, lemmata=True))
     for i, (pattern, test, match) in enumerate((
       (P("^rabbit"),                  "white rabbit",     None),                  #  0
       (P("^rabbit"),                        "rabbit",     "rabbit"),              #  1
       (P("rabbit"),               "big white rabbit",     "rabbit"),              #  2
       (P("rabbit*"),              "big white rabbits",    "rabbits"),             #  3
       (P("JJ|NN"),              S("big white rabbits"),   "big"),                 #  4
       (P("JJ+"),                S("big white rabbits"),   "big white"),           #  5
       (P("JJ+ NN*"),            S("big white rabbits"),   "big white rabbits"),   #  6
       (P("JJ black|white NN*"), S("big white rabbits"),   "big white rabbits"),   #  7
       (P("NP"),                 S("big white rabbit"),    "big white rabbit"),    #  8
       (P("big? rabbit", X),     S("big white rabbit"),    "rabbit"),              #  9 strict
       (P("big? rabbit|NN"),     S("big white rabbit"),    "rabbit"),              # 10 explicit
       (P("big? rabbit"),        S("big white rabbit"),    "big white rabbit"),    # 11 greedy
       (P("rabbit VP JJ"),       S("the rabbit was huge"), "the rabbit was huge"), # 12
       (P("rabbit be JJ"),       S("the rabbit was huge"), "the rabbit was huge"), # 13 lemma
       (P("rabbit be JJ", X),    S("the rabbit was huge"), "rabbit was huge"),     # 14
       (P("rabbit is JJ"),       S("the rabbit was huge"), None),                  # 15
       (P("the NP"),             S("the rabid rodents"),   "the rabid rodents"),   # 16 overlap
       (P("t*|r*+"),             S("the rabid rodents"),   "the rabid rodents"),   # 17
       (P("(DT) JJ? NN*"),       S("the rabid rodents"),   "the rabid rodents"),   # 18
       (P("(DT) JJ? NN*"),       S("the rabbit"),          "the rabbit"),          # 19
       (P("rabbit"),             S("the big rabbit"),      "the big rabbit"),      # 20 greedy
       (P("eat carrot"),         S("is eating a carrot"),  "is eating a carrot"),  # 21
       (P("eat carrot|NP"),      S("is eating a carrot"),  "is eating a carrot"),  # 22
       (P("eat NP"),             S("is eating a carrot"),  "is eating a carrot"),  # 23
       (P("eat a"),              S("is eating a carrot"),  "is eating a"),         # 24
       (P("!NP carrot"),         S("is eating a carrot"),  "is eating a carrot"),  # 25
       (P("eat !pizza"),         S("is eating a carrot"),  "is eating a carrot"),  # 26
       (P("eating a"),           S("is eating a carrot"),  "is eating a"),         # 27
       (P("eating !carrot", X),  S("is eating a carrot"),  "eating a"),            # 28
       (P("eat !carrot"),        S("is eating a carrot"),  None),                  # 28 NP chunk is a carrot
       (P("eat !DT"),            S("is eating a carrot"),  None),                  # 30 eat followed by DT
       (P("eat !NN"),            S("is eating a carrot"),  "is eating a"),         # 31 a/DT is not NN
       (P("!be carrot"),         S("is eating a carrot"),  "is eating a carrot"),  # 32 is eating == eat != is
       (P("!eat|VP carrot"),     S("is eating a carrot"),  None),                  # 33 VP chunk == eat
       (P("white_rabbit"),       S("big white rabbit"),    None),                  # 34
       (P("[white rabbit]"),     S("big white rabbit"),    None),                  # 35
       (P("[* white rabbit]"),   S("big white rabbit"),    "big white rabbit"),    # 36
       (P("[big * rabbit]"),     S("big white rabbit"),    "big white rabbit"),    # 37
       (P("big [big * rabbit]"), S("big white rabbit"),    "big white rabbit"),    # 38
       (P("[*+ rabbit]"),        S("big white rabbit"),    None),                  # 39 bad pattern: "+" is literal
     )):
         m = pattern.match(test)
         #print i, match, "<=>", m and m.string or None
         self.assertTrue(getattr(m, "string", None) == match)
     # Assert chunk with head at the front.
     s = S("Felix the cat")
     s.chunks[0]._head = lambda ch: ch.words[0] # head = "Felix" (it's a hack)
     self.assertEqual(P("felix").match(s).string, "Felix the cat")
     # Assert negation + custom greedy() function.
     s = S("the big white rabbit")
     g = lambda chunk, constraint: len([w for w in chunk if not constraint.match(w)]) == 0
     self.assertEqual(P("!white").match(s).string, "the big white rabbit") # a rabbit != white
     self.assertEqual(P("!white", greedy=g).match(s), None)                # a white rabbit == white
     # Assert taxonomy items with spaces.
     s = S("Bugs Bunny is a giant talking rabbit.")
     t = search.Taxonomy()
     t.append("rabbit", type="rodent")
     t.append("Bugs Bunny", type="rabbit")
     self.assertEqual(P("RABBIT", taxonomy=t).match(s).string, "Bugs Bunny")
     # Assert None, the syntax cannot handle taxonomy items that span multiple chunks.
     s = S("Elmer Fudd fires a cannon")
     t = search.Taxonomy()
     t.append("fire cannon", type="violence")
     self.assertEqual(P("VIOLENCE").match(s), None)
     # Assert regular expressions.
     s = S("a sack with 3.5 rabbits")
     p = search.Pattern.fromstring("[] NNS")
     p[0].words.append(re.compile(r"[0-9|\.]+"))
     self.assertEqual(p.match(s).string, "3.5 rabbits")
     print "pattern.search.Pattern.match()"