Example #1
0
look(table3)    


# progress

from petl import dummytable, progress, tocsv
d = dummytable(100500)
p = progress(d, 10000)
tocsv(p, 'output.csv')


# clock

from petl import dummytable, clock, convert, progress, tocsv
t1 = dummytable(100000)
c1 = clock(t1)
t2 = convert(c1, 'foo', lambda v: v**2)
c2 = clock(t2)
p = progress(c2, 10000)
tocsv(p, 'dummy.csv')
# time consumed retrieving rows from t1
c1.time
# time consumed retrieving rows from t2
c2.time
# actual time consumed by the convert step
c2.time - c1.time 


# unpackdict

table1 = (('foo', 'bar'),
Example #2
0
from __future__ import division, print_function, absolute_import


# progress()
############

import petl as etl
table = etl.dummytable(100000)
table.progress(10000).tocsv('example.csv')


# clock()
#########

import petl as etl
t1 = etl.dummytable(100000)
c1 = etl.clock(t1)
t2 = etl.convert(c1, 'foo', lambda v: v**2)
c2 = etl.clock(t2)
p = etl.progress(c2, 10000)
etl.tocsv(p, 'example.csv')
# time consumed retrieving rows from t1
c1.time
# time consumed retrieving rows from t2
c2.time
# actual time consumed by the convert step
c2.time - c1.time


Example #3
0
look(table3)    


# progress

from petl import dummytable, progress, tocsv
d = dummytable(100500)
p = progress(d, 10000)
tocsv(p, 'output.csv')


# clock

from petl import dummytable, clock, convert, progress, tocsv
t1 = dummytable(100000)
c1 = clock(t1)
t2 = convert(c1, 'foo', lambda v: v**2)
c2 = clock(t2)
p = progress(c2, 10000)
tocsv(p, 'dummy.csv')
# time consumed retrieving rows from t1
c1.time
# time consumed retrieving rows from t2
c2.time
# actual time consumed by the convert step
c2.time - c1.time 


# unpackdict

table1 = (('foo', 'bar'),
Example #4
0
f_movie = etl.cut(f_movie, 'imdb_title_id', 'imdb_name_id', 'date_id', 'genre_id', 'country')
# foreign key for country id (country)
f_movie = etl.join(f_movie, d_country, key='country')
# get only the necessary ones
f_movie = etl.cut(f_movie, 'imdb_title_id', 'imdb_name_id', 'date_id', 'genre_id', 'country_id')
# get the four ratings
r_table = etl.cut(r_table, 'imdb_title_id', 'total_votes', 'weighted_average_vote', 'mean_vote', 'median_vote')
f_movie = etl.join(f_movie, r_table, key='imdb_title_id')

print("Transform DONE")

# LOAD
print('Loading...')

# movie personnel
c = etl.clock(d_movie_personnel)
p = etl.progress(c, 100000)
etl.todb(p, out_cursor, 'D_MOVIE_PERSONNEL')
print("d_movie_personnel loaded!")

# title
c = etl.clock(d_title)
p = etl.progress(c, 100000)
etl.todb(p, out_cursor, 'D_TITLE')
print("d_title loaded!")

# genre
c = etl.clock(d_genre)
p = etl.progress(c, 100000)
etl.todb(p, out_cursor, 'D_GENRE')
print("d_genre loaded!")