예제 #1
0
from pattern.table import Table
from pattern.table import uid, pprint

# The main purpose of the pattern module is to facilitate automated processes
# for (text) data acquisition and (linguistical) data mining.
# Often, this involves a tangle of messy text files and custom formats to store the data.
# The Table class offers a useful datasheet (cfr. MS Excel) in Python code.
# It can be saved as a CSV text file that is both human/machine readable.
# See also: examples/01-web/03-twitter.py
# Supported values that are imported and exported correctly:
# str, unicode, int, float, bool, None
# For other data types, custom encoder and decoder functions can be used.

t = Table(rows=[
    [uid(), "broccoli",  "vegetable"],
    [uid(), "turnip",    "vegetable"],
    [uid(), "asparagus", "vegetable"],
    [uid(), "banana",    "fruit"    ],
])

print t.rows[0]    # A list of rows.
print t.columns[1] # A list of columns, where each column is a list of values.
print

# Columns can be manipulated directly like any other Python list.
# This can be slow for large tables. If you need a fast way to do matrix math,
# use numpy (http://numpy.scipy.org/) instead. 
# The purpose of Table is data storage.
t.columns.append([
    "green",
예제 #2
0
import os, sys; sys.path.append(os.path.join("..", "..", ".."))

from pattern.table import Table
from pattern.table import uid, pprint, COUNT, FIRST

# This example demonstrates how table values can be grouped.

t = Table(rows=[
#   0-ID    1-NAME       2-TYPE       3-COLOR
    [uid(), "broccoli",  "vegetable", "green" ],
    [uid(), "turnip",    "vegetable", "purple"],
    [uid(), "asparagus", "vegetable", "white" ],
    [uid(), "banana",    "fruit",     "yellow"],
    [uid(), "orange",    "fruit",     "orange"]
])

g = t.copy(columns=[2,0]) # A copy with only the type and id columns.
g = g.group(0, COUNT)     # Group by type, count rows per type.
                          # Group functions: FIRST, LAST, COUNT, MAX, MIN, SUM, AVG, STDEV.
pprint(g)
print

# This will group by type and concatenate all names per type:
g = t.copy(columns=[2,1])
g = g.group(0, function=lambda list: "/".join(list))

pprint(g)
print

# This will group by type, count the id's per type, and concatenate all names per type.
# Each column is given a different grouping function.
예제 #3
0
import os, sys

sys.path.insert(0, os.path.join("..", "..", ".."))

from pattern.table import Table
from pattern.table import uid, pprint, COUNT, FIRST

# This example demonstrates how table values can be grouped.

t = Table(rows=[
    #   0-ID    1-NAME       2-TYPE       3-COLOR
    [uid(), "broccoli", "vegetable", "green"],
    [uid(), "turnip", "vegetable", "purple"],
    [uid(), "asparagus", "vegetable", "white"],
    [uid(), "banana", "fruit", "yellow"],
    [uid(), "orange", "fruit", "orange"]
])

g = t.copy(columns=[2, 0])  # A copy with only the type and id columns.
g = g.group(0, COUNT)  # Group by type, count rows per type.
# Group functions: FIRST, LAST, COUNT, MAX, MIN, SUM, AVG, STDEV.
pprint(g)
print

# This will group by type and concatenate all names per type:
g = t.copy(columns=[2, 1])
g = g.group(0, function=lambda list: "/".join(list))

pprint(g)
print
예제 #4
0
from pattern.table import Table
from pattern.table import uid, pprint

# The main purpose of the pattern module is to facilitate automated processes
# for (text) data acquisition and (linguistical) data mining.
# Often, this involves a tangle of messy text files and custom formats to store the data.
# The Table class offers a useful datasheet (cfr. MS Excel) in Python code.
# It can be saved as a CSV text file that is both human/machine readable.
# See also: examples/01-web/03-twitter.py
# Supported values that are imported and exported correctly:
# str, unicode, int, float, bool, None
# For other data types, custom encoder and decoder functions can be used.

t = Table(rows=[
    [uid(), "broccoli", "vegetable"],
    [uid(), "turnip", "vegetable"],
    [uid(), "asparagus", "vegetable"],
    [uid(), "banana", "fruit"],
])

print t.rows[0]  # A list of rows.
print t.columns[1]  # A list of columns, where each column is a list of values.
print

# Columns can be manipulated directly like any other Python list.
# This can be slow for large tables. If you need a fast way to do matrix math,
# use numpy (http://numpy.scipy.org/) instead.
# The purpose of Table is data storage.
t.columns.append(["green", "purple", "white", "yellow"])