) data = [ [1, 1, 2009, 10], [2, 1, 2010, 20], [3, 1, 2011, 20], [4, 1, 2012, 50], [5, 2, 2010, 50], [6, 2, 2012, 40], [7, 3, 2011, 100], [8, 3, 2012, 150], [9, 3, 2013, 120] ] # Stores for SQL alternative, if enabled (see below) stores = { "default": open_store("sql","sqlite:///") } # # Create the pipeline # p = Pipeline(stores=stores) p.source_object("iterable_data_source", iterable=data, fields=fields) # Uncomment this to get SQL operations instead of python iterator p.create("default", "data") # Find last purchase date last_purchase = p.fork() last_purchase.aggregate(["customer_id"], [["year", "max"]],
import bubbles # Follow the comments – there is a line to be uncommented URL = "https://raw.github.com/Stiivi/cubes/master/examples/hello_world/data.csv" # Prepare list of stores, we just need one temporary SQL store stores = { "target": bubbles.open_store("sql", "sqlite:///") } p = bubbles.Pipeline(stores=stores) p.source_object("csv_source", resource=URL, infer_fields=True) # We create a table # Uncomment this line and see the difference in debug messages # p.create("target", "data") p.distinct("Category") p.pretty_print() p.run()
import bubbles # Follow the comments – there is a line to be uncommented URL = "https://raw.github.com/Stiivi/cubes/master/examples/hello_world/data.csv" # Prepare list of stores, we just need one temporary SQL store stores = {"target": bubbles.open_store("sql", "sqlite:///")} p = bubbles.Pipeline(stores=stores) p.source_object("csv_source", resource=URL, encoding="utf8") p.retype({"Amount (US$, Millions)": "integer"}) # We create a table # Uncomment this line and see the difference in debug messages p.create("target", "data") p.aggregate("Category", "Amount (US$, Millions)") p.pretty_print() p.run()
from bubbles import Pipeline, FieldList, data_object, open_store # Sample order data with fields: fields = FieldList(["id", "integer"], ["customer_id", "integer"], ["year", "integer"], ["amount", "integer"]) data = [[1, 1, 2009, 10], [2, 1, 2010, 20], [3, 1, 2011, 20], [4, 1, 2012, 50], [5, 2, 2010, 50], [6, 2, 2012, 40], [7, 3, 2011, 100], [8, 3, 2012, 150], [9, 3, 2013, 120]] # Stores for SQL alternative, if enabled (see below) stores = {"default": open_store("sql", "sqlite:///")} # # Create the pipeline # p = Pipeline(stores=stores) p.source_object("iterable", iterable=data, fields=fields) # Uncomment this to get SQL operations instead of python iterator p.create("default", "data") # Find last purchase date last_purchase = p.fork() last_purchase.aggregate(["customer_id"], [["year", "max"]], include_count=False) last_purchase.rename_fields({"year_max": "last_purchase_year"}) p.join_details(last_purchase, "customer_id", "customer_id") p.pretty_print()
csv_dir = '../data' # the tables to migrate tables = [ 'departments', 'dept_emp', 'dept_manager', 'employees', 'salaries', 'titles', ] start_time = time.time() # connect to databases sqlite_store = open_store('sql', 'sqlite:////' + sqlite_path) mysql_store = open_store( 'sql', 'mysql+mysqldb://' + mysql_user + ':' + mysql_pw + '@' + mysql_host + '/' + mysql_db) csv_store = open_store('csv', csv_dir) # migrate tables for table in tables: print(table) sqlite_obj = sqlite_store.get_object(table) fields = sqlite_obj.fields # add email column if table == 'employees': email_field = Field(name='email', storage_type='string', size=255) fields.append(email_field) mysql_obj = mysql_store.create(table, fields)