t = Table(rows=[ [uid(), "broccoli", "vegetable"], [uid(), "turnip", "vegetable"], [uid(), "asparagus", "vegetable"], [uid(), "banana", "fruit" ], ]) print t.rows[0] # A list of rows. print t.columns[1] # A list of columns, where each column is a list of values. print # Columns can be manipulated directly like any other Python list. # This can be slow for large tables. If you need a fast way to do matrix math, # use numpy (http://numpy.scipy.org/) instead. # The purpose of Table is data storage. t.columns.append([ "green", "purple", "white", "yellow" ]) # Save as a comma-separated (unicode) text file. t.save("food.txt") # Load a table from file. t = Table.load("food.txt") pprint(t, truncate=50, padding=" ", fill=".")
# This example demonstrates how table values can be grouped. t = Table(rows=[ # 0-ID 1-NAME 2-TYPE 3-COLOR [uid(), "broccoli", "vegetable", "green"], [uid(), "turnip", "vegetable", "purple"], [uid(), "asparagus", "vegetable", "white"], [uid(), "banana", "fruit", "yellow"], [uid(), "orange", "fruit", "orange"] ]) g = t.copy(columns=[2, 0]) # A copy with only the type and id columns. g = g.group(0, COUNT) # Group by type, count rows per type. # Group functions: FIRST, LAST, COUNT, MAX, MIN, SUM, AVG, STDEV. pprint(g) print # This will group by type and concatenate all names per type: g = t.copy(columns=[2, 1]) g = g.group(0, function=lambda list: "/".join(list)) pprint(g) print # This will group by type, count the id's per type, and concatenate all names per type. # Each column is given a different grouping function. # For the column one whose values is grouped, simply use FIRST. g = t.copy(columns=[2, 0, 1]) g = g.group(0, function=(FIRST, COUNT, lambda list: "/".join(list))) g.columns[1].sort() # Sort by count.
# "X IS MORE IMPORTANT THAN Y" # Here is a rough example of how to build a web miner. # It mines comparative statements from Yahoo! and stores the results in a table, # which can be saved as a text file for further processing later on. # Pattern matching also works with Sentence objects from the MBSP module. # MBSP's parser is much more robust (but also slower). #from MBSP import Sentence, parse q = '"more important than"' # Yahoo search query p = "NP (VP) more important than NP" # Search pattern. p = Pattern.fromstring(p) t = Table() engine = Yahoo(license=None) for i in range(1): # max=10 for result in engine.search(q, start=i+1, count=100, cached=True): s = result.description s = plaintext(s) s = Sentence(parse(s)) for m in p.search(s): a = m.constituents(constraint=0)[-1] # Left NP. b = m.constituents(constraint=5)[ 0] # Right NP. t.append(( a.string.lower(), b.string.lower())) pprint(t) print print len(t), "results."
# This example demonstrates how table values can be grouped. t = Table(rows=[ # 0-ID 1-NAME 2-TYPE 3-COLOR [uid(), "broccoli", "vegetable", "green" ], [uid(), "turnip", "vegetable", "purple"], [uid(), "asparagus", "vegetable", "white" ], [uid(), "banana", "fruit", "yellow"], [uid(), "orange", "fruit", "orange"] ]) g = t.copy(columns=[2,0]) # A copy with only the type and id columns. g = g.group(0, COUNT) # Group by type, count rows per type. # Group functions: FIRST, LAST, COUNT, MAX, MIN, SUM, AVG, STDEV. pprint(g) print # This will group by type and concatenate all names per type: g = t.copy(columns=[2,1]) g = g.group(0, function=lambda list: "/".join(list)) pprint(g) print # This will group by type, count the id's per type, and concatenate all names per type. # Each column is given a different grouping function. # For the column one whose values is grouped, simply use FIRST. g = t.copy(columns=[2,0,1]) g = g.group(0, function=(FIRST, COUNT, lambda list: "/".join(list))) g.columns[1].sort() # Sort by count.
from pattern.table import Table, pprint # "X IS MORE IMPORTANT THAN Y" # Here is a rough example of how to build a web miner. # It mines comparative statements from Yahoo! and stores the results in a table, # which can be saved as a text file for further processing later on. # Pattern matching also works with Sentence objects from the MBSP module. # MBSP's parser is much more robust (but also slower). #from MBSP import Sentence, parse q = '"more important than"' # Yahoo search query p = "NP (VP) more important than NP" # Search pattern. p = Pattern.fromstring(p) t = Table() engine = Yahoo(license=None) for i in range(1): # max=10 for result in engine.search(q, start=i + 1, count=100, cached=True): s = result.description s = plaintext(s) s = Sentence(parse(s)) for m in p.search(s): a = m.constituents(constraint=0)[-1] # Left NP. b = m.constituents(constraint=5)[0] # Right NP. t.append((a.string.lower(), b.string.lower())) pprint(t) print print len(t), "results."
# It can be saved as a CSV text file that is both human/machine readable. # See also: examples/01-web/03-twitter.py # Supported values that are imported and exported correctly: # str, unicode, int, float, bool, None # For other data types, custom encoder and decoder functions can be used. t = Table(rows=[ [uid(), "broccoli", "vegetable"], [uid(), "turnip", "vegetable"], [uid(), "asparagus", "vegetable"], [uid(), "banana", "fruit"], ]) print t.rows[0] # A list of rows. print t.columns[1] # A list of columns, where each column is a list of values. print # Columns can be manipulated directly like any other Python list. # This can be slow for large tables. If you need a fast way to do matrix math, # use numpy (http://numpy.scipy.org/) instead. # The purpose of Table is data storage. t.columns.append(["green", "purple", "white", "yellow"]) # Save as a comma-separated (unicode) text file. t.save("food.txt") # Load a table from file. t = Table.load("food.txt") pprint(t, truncate=50, padding=" ", fill=".")