Example #1
0
# that can be used to define semantic word types.
# For example, consider that you want to extract flower names from a text.
# This would make patterns somewhat unwieldy, e.g.:
# Pattern.fromstring("rose|lily|daisy|daffodil|begonia").

# A better approach is to use the taxonomy:
for flower in ("rose", "lily", "daisy", "daffodil", "begonia"):
    taxonomy.append(flower, type="flower")

print taxonomy.children("flower")
print taxonomy.parents("rose")
print taxonomy.classify("rose")  # Yields the most recently added parent.
print

# Taxonomy terms can be included in a pattern:
p = Pattern([Constraint(taxa=["flower"])])  # or
p = Pattern.fromstring("FLOWER")

s = Sentence(parse("A field of white daffodils.", lemmata=True))
m = p.search(s)
print s
print m
print

from pattern.search import search
taxonomy.append("chicken", type="food")
taxonomy.append("chicken", type="bird")
taxonomy.append("penguin", type="bird")
taxonomy.append("bird", type="animal")
print taxonomy.parents("chicken")
print taxonomy.children("animal", recursive=True)
Example #2
0
m = p.search(s)
print m
print

# Sentence chunks can be matched by tag (e.g. NP, VP, ADJP).
# The pattern below matches anything from
# "the rabbit gnaws at your fingers" to
# "the white rabbit looks at the carrots":
p = Pattern.fromstring("rabbit VP at NP", s)
m = p.search(s)
print m
print

if m:
    for w in m[0].words:
        print w, " \t=>", m[0].constraint(w)

print
print "-------------------------------------------------------------"
# Finally, constraints can also include regular expressions.
# To include them we need to use the full syntax instead of the search() function:
import re
r = re.compile(r"[0-9|\.]+")  # all numbers
p = Pattern()
p.sequence.append(Constraint(words=[r]))
p.sequence.append(Constraint(tags=["NN*"]))

s = Sentence(parse("I have 9.5 fingers."))
print s
print p.search(s)
print