look(table3) # progress from petl import dummytable, progress, tocsv d = dummytable(100500) p = progress(d, 10000) tocsv(p, 'output.csv') # clock from petl import dummytable, clock, convert, progress, tocsv t1 = dummytable(100000) c1 = clock(t1) t2 = convert(c1, 'foo', lambda v: v**2) c2 = clock(t2) p = progress(c2, 10000) tocsv(p, 'dummy.csv') # time consumed retrieving rows from t1 c1.time # time consumed retrieving rows from t2 c2.time # actual time consumed by the convert step c2.time - c1.time # unpackdict table1 = (('foo', 'bar'),
from __future__ import division, print_function, absolute_import # progress() ############ import petl as etl table = etl.dummytable(100000) table.progress(10000).tocsv('example.csv') # clock() ######### import petl as etl t1 = etl.dummytable(100000) c1 = etl.clock(t1) t2 = etl.convert(c1, 'foo', lambda v: v**2) c2 = etl.clock(t2) p = etl.progress(c2, 10000) etl.tocsv(p, 'example.csv') # time consumed retrieving rows from t1 c1.time # time consumed retrieving rows from t2 c2.time # actual time consumed by the convert step c2.time - c1.time
f_movie = etl.cut(f_movie, 'imdb_title_id', 'imdb_name_id', 'date_id', 'genre_id', 'country') # foreign key for country id (country) f_movie = etl.join(f_movie, d_country, key='country') # get only the necessary ones f_movie = etl.cut(f_movie, 'imdb_title_id', 'imdb_name_id', 'date_id', 'genre_id', 'country_id') # get the four ratings r_table = etl.cut(r_table, 'imdb_title_id', 'total_votes', 'weighted_average_vote', 'mean_vote', 'median_vote') f_movie = etl.join(f_movie, r_table, key='imdb_title_id') print("Transform DONE") # LOAD print('Loading...') # movie personnel c = etl.clock(d_movie_personnel) p = etl.progress(c, 100000) etl.todb(p, out_cursor, 'D_MOVIE_PERSONNEL') print("d_movie_personnel loaded!") # title c = etl.clock(d_title) p = etl.progress(c, 100000) etl.todb(p, out_cursor, 'D_TITLE') print("d_title loaded!") # genre c = etl.clock(d_genre) p = etl.progress(c, 100000) etl.todb(p, out_cursor, 'D_GENRE') print("d_genre loaded!")