Example #1
0
t = Table(rows=[
    [uid(), "broccoli",  "vegetable"],
    [uid(), "turnip",    "vegetable"],
    [uid(), "asparagus", "vegetable"],
    [uid(), "banana",    "fruit"    ],
])

print t.rows[0]    # A list of rows.
print t.columns[1] # A list of columns, where each column is a list of values.
print

# Columns can be manipulated directly like any other Python list.
# This can be slow for large tables. If you need a fast way to do matrix math,
# use numpy (http://numpy.scipy.org/) instead. 
# The purpose of Table is data storage.
t.columns.append([
    "green",
    "purple",
    "white",
    "yellow"
])

# Save as a comma-separated (unicode) text file.
t.save("food.txt") 

# Load a table from file.
t = Table.load("food.txt")

pprint(t, truncate=50, padding=" ", fill=".")
Example #2
0
# This example demonstrates how table values can be grouped.

t = Table(rows=[
    #   0-ID    1-NAME       2-TYPE       3-COLOR
    [uid(), "broccoli", "vegetable", "green"],
    [uid(), "turnip", "vegetable", "purple"],
    [uid(), "asparagus", "vegetable", "white"],
    [uid(), "banana", "fruit", "yellow"],
    [uid(), "orange", "fruit", "orange"]
])

g = t.copy(columns=[2, 0])  # A copy with only the type and id columns.
g = g.group(0, COUNT)  # Group by type, count rows per type.
# Group functions: FIRST, LAST, COUNT, MAX, MIN, SUM, AVG, STDEV.
pprint(g)
print

# This will group by type and concatenate all names per type:
g = t.copy(columns=[2, 1])
g = g.group(0, function=lambda list: "/".join(list))

pprint(g)
print

# This will group by type, count the id's per type, and concatenate all names per type.
# Each column is given a different grouping function.
# For the column one whose values is grouped, simply use FIRST.
g = t.copy(columns=[2, 0, 1])
g = g.group(0, function=(FIRST, COUNT, lambda list: "/".join(list)))
g.columns[1].sort()  # Sort by count.
Example #3
0
# "X IS MORE IMPORTANT THAN Y"
# Here is a rough example of how to build a web miner.
# It mines comparative statements from Yahoo! and stores the results in a table,
# which can be saved as a text file for further processing later on.

# Pattern matching also works with Sentence objects from the MBSP module.
# MBSP's parser is much more robust (but also slower).
#from MBSP import Sentence, parse

q = '"more important than"'          # Yahoo search query
p = "NP (VP) more important than NP" # Search pattern.
p = Pattern.fromstring(p)
t = Table()

engine = Yahoo(license=None)
for i in range(1): # max=10
    for result in engine.search(q, start=i+1, count=100, cached=True):
        s = result.description
        s = plaintext(s)
        s = Sentence(parse(s))
        for m in p.search(s):
            a = m.constituents(constraint=0)[-1] # Left NP.
            b = m.constituents(constraint=5)[ 0] # Right NP.
            t.append((
                a.string.lower(), 
                b.string.lower()))

pprint(t)

print
print len(t), "results."
Example #4
0
# This example demonstrates how table values can be grouped.

t = Table(rows=[
#   0-ID    1-NAME       2-TYPE       3-COLOR
    [uid(), "broccoli",  "vegetable", "green" ],
    [uid(), "turnip",    "vegetable", "purple"],
    [uid(), "asparagus", "vegetable", "white" ],
    [uid(), "banana",    "fruit",     "yellow"],
    [uid(), "orange",    "fruit",     "orange"]
])

g = t.copy(columns=[2,0]) # A copy with only the type and id columns.
g = g.group(0, COUNT)     # Group by type, count rows per type.
                          # Group functions: FIRST, LAST, COUNT, MAX, MIN, SUM, AVG, STDEV.
pprint(g)
print

# This will group by type and concatenate all names per type:
g = t.copy(columns=[2,1])
g = g.group(0, function=lambda list: "/".join(list))

pprint(g)
print

# This will group by type, count the id's per type, and concatenate all names per type.
# Each column is given a different grouping function.
# For the column one whose values is grouped, simply use FIRST.
g = t.copy(columns=[2,0,1])
g = g.group(0, function=(FIRST, COUNT, lambda list: "/".join(list)))
g.columns[1].sort() # Sort by count.
Example #5
0
from pattern.table import Table, pprint

# "X IS MORE IMPORTANT THAN Y"
# Here is a rough example of how to build a web miner.
# It mines comparative statements from Yahoo! and stores the results in a table,
# which can be saved as a text file for further processing later on.

# Pattern matching also works with Sentence objects from the MBSP module.
# MBSP's parser is much more robust (but also slower).
#from MBSP import Sentence, parse

q = '"more important than"'  # Yahoo search query
p = "NP (VP) more important than NP"  # Search pattern.
p = Pattern.fromstring(p)
t = Table()

engine = Yahoo(license=None)
for i in range(1):  # max=10
    for result in engine.search(q, start=i + 1, count=100, cached=True):
        s = result.description
        s = plaintext(s)
        s = Sentence(parse(s))
        for m in p.search(s):
            a = m.constituents(constraint=0)[-1]  # Left NP.
            b = m.constituents(constraint=5)[0]  # Right NP.
            t.append((a.string.lower(), b.string.lower()))

pprint(t)

print
print len(t), "results."
Example #6
0
# It can be saved as a CSV text file that is both human/machine readable.
# See also: examples/01-web/03-twitter.py
# Supported values that are imported and exported correctly:
# str, unicode, int, float, bool, None
# For other data types, custom encoder and decoder functions can be used.

t = Table(rows=[
    [uid(), "broccoli", "vegetable"],
    [uid(), "turnip", "vegetable"],
    [uid(), "asparagus", "vegetable"],
    [uid(), "banana", "fruit"],
])

print t.rows[0]  # A list of rows.
print t.columns[1]  # A list of columns, where each column is a list of values.
print

# Columns can be manipulated directly like any other Python list.
# This can be slow for large tables. If you need a fast way to do matrix math,
# use numpy (http://numpy.scipy.org/) instead.
# The purpose of Table is data storage.
t.columns.append(["green", "purple", "white", "yellow"])

# Save as a comma-separated (unicode) text file.
t.save("food.txt")

# Load a table from file.
t = Table.load("food.txt")

pprint(t, truncate=50, padding=" ", fill=".")