def iati(): req = urllib2.Request(iati_registry['datasets']) opener = urllib2.build_opener() f = opener.open(req) ds = json.load(f) tbl = etl(zip(ds)).pushheader(['dataset']) print look(tbl)
def test_look_style_minimal(): table = (('foo', 'bar'), ('a', 1), ('b', 2)) actual = repr(look(table, style='minimal')) expect = """'foo' 'bar' 'a' 1 'b' 2 """ eq_(expect, actual) look.default_style = 'minimal' actual = repr(look(table)) eq_(expect, actual) look.default_style = 'grid'
def test_look_style_minimal(): table = (("foo", "bar"), ("a", 1), ("b", 2)) actual = repr(look(table, style="minimal")) expect = """'foo' 'bar' 'a' 1 'b' 2 """ eq_(expect, actual) look.default_style = "minimal" actual = repr(look(table)) eq_(expect, actual) look.default_style = "grid"
def test_look_style_simple(): table = (("foo", "bar"), ("a", 1), ("b", 2)) actual = repr(look(table, style="simple")) expect = """===== ===== 'foo' 'bar' ===== ===== 'a' 1 'b' 2 ===== ===== """ eq_(expect, actual) look.default_style = "simple" actual = repr(look(table)) eq_(expect, actual) look.default_style = "grid"
def test_look_style_simple(): table = (('foo', 'bar'), ('a', 1), ('b', 2)) actual = repr(look(table, style='simple')) expect = """===== ===== 'foo' 'bar' ===== ===== 'a' 1 'b' 2 ===== ===== """ eq_(expect, actual) look.default_style = 'simple' actual = repr(look(table)) eq_(expect, actual) look.default_style = 'grid'
def _test_with_schema(dbo, schema): expect = (('foo', 'bar'), ('a', 1), ('b', 2)) expect_appended = (('foo', 'bar'), ('a', 1), ('b', 2), ('a', 1), ('b', 2)) actual = etl.fromdb(dbo, 'SELECT * FROM test') print('write some data and verify...') etl.todb(expect, dbo, 'test', schema=schema) ieq(expect, actual) print(etl.look(actual)) print('append some data and verify...') etl.appenddb(expect, dbo, 'test', schema=schema) ieq(expect_appended, actual) print(etl.look(actual))
def _write_to_log(parameter_view, sample_rows, resource_index): parameter_view = json.dumps(parameter_view, ensure_ascii=False, indent=4) table_view = petl.look(petl.fromdicts(sample_rows)) logging.info('Processed resource %s', resource_index) logging.info('Parameters = %s', parameter_view) logging.info('Sample output: \n%s', table_view)
def test_repr(): table = (('foo', 'bar'), ('a', 1), ('b', 2), ('c', 2)) expect = str(etl.look(table)) actual = repr(etl.wrap(table)) eq_(expect, actual)
def format_data_sample(stream): """Return a table representation of a sample of the streamed data.""" keyed_rows = [] for row in stream.sample: keyed_rows.append(dict(zip(stream.headers, row))) petl_table = fromdicts(keyed_rows) return repr(look(petl_table, limit=None))
def print_all_to_screen(data, offset, lines, style): """Print a representation for each table currently stored.""" for table in data.registry: d = data.get(table) if offset > 0: d = d.rowslice(offset, offset + lines) a = petl.look(d, limit=lines, style=style) unsync.secho('== {} =='.format(table), fg='green') unsync.echo(a)
def writeToDB(self): ''' Retrive the hockey data from the CSV file, print snippet of the data to the console for visual feedback on progress, and write the data (with petl) to the hockey database. ''' data = fromcsv(self.csvPath) print(look(data, style='minimal')) todb(data, connection, self.tableName)
def exercise_unicode(dbo): print '=' * len(repr(dbo)) print 'EXERCISE UNICODE' print repr(dbo) print '=' * len(repr(dbo)) print expect = ((u'name', u'id'), (u'Արամ Խաչատրյան', 1), (u'Johann Strauß', 2), (u'Вагиф Сәмәдоғлу', 3), (u'章子怡', 4), ) actual = fromdb(dbo, 'SELECT * FROM test_unicode') print 'write some data and verify...' todb(expect, dbo, 'test_unicode') ieq(expect, actual) print look(actual)
def _show(stream): """Return a table of sample data.""" keyed_rows = [] for row in stream.sample: keyed_rows.append(dict(zip(stream.headers, row))) petl_table = fromdicts(keyed_rows) return repr(look(petl_table, limit=None))
def print(self, sql, title=None, limit=10): try: if isinstance(sql, str): sql = execute(sql, self) if isinstance(title, str): print(title) print(etl.look(sql, limit=limit)) except SQLError as e: print(e)
def exercise_with_schema(dbo, db): print('=' * len(repr(dbo))) print('EXERCISE WITH EXPLICIT SCHEMA NAME') print(repr(dbo)) print('=' * len(repr(dbo))) print( ) expect = (('foo', 'bar'), ('a', 1), ('b', 1)) expect_appended = (('foo', 'bar'), ('a', 1), ('b', 1), ('a', 1), ('b', 1)) actual = fromdb(dbo, 'SELECT * FROM test') print('write some data and verify...') todb(expect, dbo, 'test', schema=db) ieq(expect, actual) print(look(actual)) print('append some data and verify...') appenddb(expect, dbo, 'test', schema=db) ieq(expect_appended, actual) print(look(actual))
def print_to_screen(data, offset, lines, source, column, style): """Print out a text version of the data contained in the source table.""" d = data.get(source) if offset > 0: d = d.rowslice(offset, offset + lines) if not column[0] is None: d = d.cut(*column) a = petl.look(d, limit=lines, style=style) unsync.secho('== {} =='.format(source), fg='green') unsync.echo(a)
def test_look(): table = (("foo", "bar"), ("a", 1), ("b", 2)) actual = repr(look(table)) expect = """+-------+-------+ | 'foo' | 'bar' | +=======+=======+ | 'a' | 1 | +-------+-------+ | 'b' | 2 | +-------+-------+ """ eq_(expect, actual)
def test_look_bool(): table = (('foo', 'bar'), ('a', True), ('b', False)) actual = repr(look(table)) expect = """+-------+-------+ | 'foo' | 'bar' | +=======+=======+ | 'a' | True | +-------+-------+ | 'b' | False | +-------+-------+ """ eq_(expect, actual)
def test_look(): table = (('foo', 'bar'), ('a', 1), ('b', 2)) actual = repr(look(table)) expect = """+-------+-------+ | 'foo' | 'bar' | +=======+=======+ | 'a' | 1 | +-------+-------+ | 'b' | 2 | +-------+-------+ """ eq_(expect, actual)
def test_look_bool(): table = (("foo", "bar"), ("a", True), ("b", False)) actual = repr(look(table)) expect = """+-------+-------+ | 'foo' | 'bar' | +=======+=======+ | 'a' | True | +-------+-------+ | 'b' | False | +-------+-------+ """ eq_(expect, actual)
def print_table(ctx): """Output a list of pipelines as table.""" rows = [dict(source.state) for source in ctx.obj['sources']] message = '\nNumber of pipelines = {}\n' secho(message.format(len(rows)), **SUCCESS) subset = [ 'id', 'pipeline_status', 'validation_status', 'nb_validation_errors', 'scraper_required', 'resource_type', 'extension' ] sorted_rows = sort(cut(fromdicts(rows), *subset), key='id') echo(look(sorted_rows, limit=None))
def test_look_irregular_rows(): table = (('foo', 'bar'), ('a', ), ('b', 2, True)) actual = repr(look(table)) expect = """+-------+-------+------+ | 'foo' | 'bar' | | +=======+=======+======+ | 'a' | | | +-------+-------+------+ | 'b' | 2 | True | +-------+-------+------+ """ eq_(expect, actual)
def test_look_irregular_rows(): table = (('foo', 'bar'), ('a',), ('b', 2, True)) actual = repr(look(table)) expect = """+-------+-------+------+ | 'foo' | 'bar' | | +=======+=======+======+ | 'a' | | | +-------+-------+------+ | 'b' | 2 | True | +-------+-------+------+ """ eq_(expect, actual)
def _test_unicode(dbo): expect = ((u'name', u'id'), (u'Արամ Խաչատրյան', 1), (u'Johann Strauß', 2), (u'Вагиф Сәмәдоғлу', 3), (u'章子怡', 4), ) actual = etl.fromdb(dbo, 'SELECT * FROM test_unicode') print('write some data and verify...') etl.todb(expect, dbo, 'test_unicode') ieq(expect, actual) print(etl.look(actual))
def test_look_irregular_rows(): table = (("foo", "bar"), ("a",), ("b", 2, True)) actual = repr(look(table)) expect = """+-------+-------+------+ | 'foo' | 'bar' | | +=======+=======+======+ | 'a' | | | +-------+-------+------+ | 'b' | 2 | True | +-------+-------+------+ """ eq_(expect, actual)
def test_look_irregular_rows(): """Test the look function with a table where row lengths are irregular.""" table = (('foo', 'bar'), ('a',), ('b', 2, True)) actual = repr(look(table)) expect = """+-------+-------+------+ | 'foo' | 'bar' | | +=======+=======+======+ | 'a' | | | +-------+-------+------+ | 'b' | 2 | True | +-------+-------+------+ """ assertequal(expect, actual)
def test_look(): """Test the look function.""" table = (('foo', 'bar'), ('a', 1), ('b', 2)) actual = repr(look(table)) expect = """+-------+-------+ | 'foo' | 'bar' | +=======+=======+ | 'a' | 1 | +-------+-------+ | 'b' | 2 | +-------+-------+ """ assertequal(expect, actual)
def test_look_irregular_rows(): """Test the look function with a table where row lengths are irregular.""" table = (('foo', 'bar'), ('a', ), ('b', 2, True)) actual = repr(look(table)) expect = """+-------+-------+------+ | 'foo' | 'bar' | | +=======+=======+======+ | 'a' | | | +-------+-------+------+ | 'b' | 2 | True | +-------+-------+------+ """ assertequal(expect, actual)
def test_petl(self): from petl import look m = MetapackUrl(test_data( 'packages/example.com/example.com-full-2017-us/metadata.csv'), downloader=downloader) doc = MetapackDoc(m) r = doc.resource('simple-example') r.resolved_url.get_resource().get_target() p = r.petl() print(look(p))
def process_rows(resource_): for row_index, row in enumerate(resource_): if pass_row_index: parameters.update(row_index=row_index) new_row = row_processor(row, **parameters) yield new_row if verbose and row_index < LOG_SAMPLE_SIZE: sample_rows.append(new_row) if verbose: table = look(fromdicts(sample_rows), limit=LOG_SAMPLE_SIZE) message = 'Output of processor %s for resource %s is...\n%s' args = row_processor.__name__, resource_index, table logging.info(message, *args)
def _test_dbo(write_dbo, read_dbo=None): if read_dbo is None: read_dbo = write_dbo expect_empty = (('foo', 'bar'),) expect = (('foo', 'bar'), ('a', 1), ('b', 2)) expect_appended = (('foo', 'bar'), ('a', 1), ('b', 2), ('a', 1), ('b', 2)) actual = etl.fromdb(read_dbo, 'SELECT * FROM test') debug('verify empty to start with...') debug(etl.look(actual)) ieq(expect_empty, actual) debug('write some data and verify...') etl.todb(expect, write_dbo, 'test') debug(etl.look(actual)) ieq(expect, actual) debug('append some data and verify...') etl.appenddb(expect, write_dbo, 'test') debug(etl.look(actual)) ieq(expect_appended, actual) debug('overwrite and verify...') etl.todb(expect, write_dbo, 'test') debug(etl.look(actual)) ieq(expect, actual) debug('cut, overwrite and verify') etl.todb(etl.cut(expect, 'bar', 'foo'), write_dbo, 'test') debug(etl.look(actual)) ieq(expect, actual) debug('cut, append and verify') etl.appenddb(etl.cut(expect, 'bar', 'foo'), write_dbo, 'test') debug(etl.look(actual)) ieq(expect_appended, actual) debug('try a single row') etl.todb(etl.head(expect, 1), write_dbo, 'test') debug(etl.look(actual)) ieq(etl.head(expect, 1), actual)
def exercise(dbo): print '=' * len(repr(dbo)) print repr(dbo) print '=' * len(repr(dbo)) print expect = (('foo', 'bar'), ('a', 1), ('b', 1)) expect_extended = (('foo', 'bar', 'baz'), ('a', 1, 2.3), ('b', 1, 4.1)) actual = fromdb(dbo, 'SELECT * FROM testx') print "verify table doesn't exist to start with" try: print look(actual) except Exception as e: print 'expected exception: ' + str(e) else: raise Exception('expected exception not raised') print "verify cannot write without create" try: todb(expect, dbo, 'testx') except Exception as e: print 'expected exception: ' + str(e) else: raise Exception('expected exception not raised') print 'create table and verify...' todb(expect, dbo, 'testx', create=True) ieq(expect, actual) print look(actual) print 'verify cannot overwrite with new cols without recreate...' try: todb(expect_extended, dbo, 'testx') except Exception as e: print 'expected exception: ' + str(e) else: raise Exception('expected exception not raised') print 'verify recreate...' todb(expect_extended, dbo, 'testx', create=True, drop=True) ieq(expect_extended, actual) print look(actual)
def exercise_ss_cursor(setup_dbo, ss_dbo): print('=' * len(repr(ss_dbo))) print('EXERCISE WITH SERVER-SIDE CURSOR') print(repr(ss_dbo)) print('=' * len(repr(ss_dbo))) print() expect_empty = (('foo', 'bar'),) expect = (('foo', 'bar'), ('a', 1), ('b', 1)) expect_appended = (('foo', 'bar'), ('a', 1), ('b', 1), ('a', 1), ('b', 1)) actual = fromdb(ss_dbo, 'SELECT * FROM test') print('verify empty to start with...') ieq(expect_empty, actual) print(look(actual)) print('write some data and verify...') todb(expect, setup_dbo, 'test') ieq(expect, actual) print(look(actual)) print('append some data and verify...') appenddb(expect, setup_dbo, 'test') ieq(expect_appended, actual) print(look(actual)) print('overwrite and verify...') todb(expect, setup_dbo, 'test') ieq(expect, actual) print(look(actual)) print('cut, overwrite and verify') todb(cut(expect, 'bar', 'foo'), setup_dbo, 'test') ieq(expect, actual) print(look(actual)) print('cut, append and verify') appenddb(cut(expect, 'bar', 'foo'), setup_dbo, 'test') ieq(expect_appended, actual) print(look(actual))
def exercise(dbo): print('=' * len(repr(dbo))) print(repr(dbo)) print('=' * len(repr(dbo))) print( ) expect_empty = (('foo', 'bar'),) expect = (('foo', 'bar'), ('a', 1), ('b', 1)) expect_appended = (('foo', 'bar'), ('a', 1), ('b', 1), ('a', 1), ('b', 1)) actual = fromdb(dbo, 'SELECT * FROM test') print('verify empty to start with...') ieq(expect_empty, actual) print(look(actual)) print('write some data and verify...') todb(expect, dbo, 'test') ieq(expect, actual) print(look(actual)) print('append some data and verify...') appenddb(expect, dbo, 'test') ieq(expect_appended, actual) print(look(actual)) print('overwrite and verify...') todb(expect, dbo, 'test') ieq(expect, actual) print(look(actual)) print('cut, overwrite and verify') todb(cut(expect, 'bar', 'foo'), dbo, 'test') ieq(expect, actual) print(look(actual)) print('cut, append and verify') appenddb(cut(expect, 'bar', 'foo'), dbo, 'test') ieq(expect_appended, actual) print(look(actual))
fc['b'] fc['b']['foo'] fc['b']['bar'] fc['b']['baz'] fc['c'] # rename table1 = [['sex', 'age'], ['m', 12], ['f', 34], ['-', 56]] from petl import look, rename look(table1) # rename a single field table2 = rename(table1, 'sex', 'gender') look(table2) # rename multiple fields by passing a dictionary as the second argument table3 = rename(table1, {'sex': 'gender', 'age': 'age_years'}) look(table3) # the returned table object can also be used to modify the field mapping using the suffix notation table4 = rename(table1) table4['sex'] = 'gender' table4['age'] = 'age_years' look(table4) # cut
def look(self): print(etl.look(self.table))
def log_sample_table(stream): """Record a tabular representation of the stream sample to the log.""" samples = list(map(lambda x: dict(zip(stream.headers, x)), stream.sample)) table = look(fromdicts(samples), limit=len(stream.sample)) info('Data sample =\n%s', table)
return ' '.join(stnam_list) def standardize_name(name): tmp = name.strip() # Name standardization: tmp_list = re.sub('[' + string.punctuation + ']', '', tmp).split() std = StandardName(tmp_list, False).output std_name = ' '.join(std) return std_name centerline_stmt = '''select trim(PRE_DIR) AS PRE_DIR,trim(ST_NAME) AS ST_NAME,trim(ST_TYPE) AS ST_TYPE,trim(SUF_DIR) AS SUF_DIR, L_F_ADD,L_T_ADD,R_F_ADD,R_T_ADD,ST_CODE,SEG_ID,trim(RESPONSIBL) AS RESPONSIBL from {} order by st_name, st_type, pre_dir, suf_dir, l_f_add, l_t_add, r_f_add, r_t_add, st_code, seg_id'''.format(street_centerline_table_name) centerline_rows = etl.fromdb(dbo, centerline_stmt).convert('ST_NAME', lambda s: standardize_name(s)) print(etl.look(centerline_rows)) centerline_rows.tocsv(centerline_csv) # Centerline_streets centerline_street_rows = centerline_rows.cut('PRE_DIR', 'ST_NAME', 'ST_TYPE') \ .addfield('STREET_FULL', lambda a: concat_streetname(a)) \ .addfield('POST_DIR', '') \ .cut('STREET_FULL', 'PRE_DIR', 'ST_NAME', 'ST_TYPE', 'POST_DIR') \ .distinct() \ .sort(key=['ST_NAME', 'ST_TYPE', 'PRE_DIR', 'POST_DIR']) print(etl.look(centerline_street_rows)) centerline_street_rows.tocsv(centerline_streets_csv, write_header=False)
""" Examples. """ table = [('foo', 'bar', 'baz'), ('apples', 1, 2.5), ('oranges', 3, 4.4), ('pears', 7, .1)] from petl import look from petlx.array import toarray look(table) a = toarray(table) a a = toarray(table, dtype='a4, i2, f4') a a = toarray(table, dtype={'foo': 'a4'}) a table = (('type', 'start', 'stop', 'value'), ('apple', 1, 4, 'foo'), ('apple', 3, 7, 'bar'), ('orange', 4, 9, 'baz')) from petl import look from petlx.interval import facetintervallookup look(table)
if i not in ('', 'y', 'Y'): sys.exit(0) prompt('setup table') cursor = connection.cursor() # deal with quote compatibility cursor.execute('SET SQL_MODE=ANSI_QUOTES') cursor.execute('DROP TABLE IF EXISTS test') cursor.execute('CREATE TABLE test (foo TEXT, bar INT)') cursor.close() connection.commit() prompt('exercise the petl functions using a connection') from petl import look, fromdb, todb, appenddb t1 = fromdb(connection, 'SELECT * FROM test') print look(t1) t2 = (('foo', 'bar'), ('a', 1), ('b', 1)) t2app = (('foo', 'bar'), ('a', 1), ('b', 1), ('a', 1), ('b', 1)) todb(t2, connection, 'test') print look(t1) ieq(t2, t1) appenddb(t2, connection, 'test') print look(t1) ieq(t2app, t1) todb(t2, connection, 'test') print look(t1) ieq(t2, t1) prompt('exercise the petl functions using a cursor') cursor = connection.cursor() todb(t2, cursor, 'test')
source_field_map_upper = {} for k, v in source_field_map.items(): source_field_map_upper[k] = v.upper() # Read DOR CONDO rows from source print("Reading condos...") # TODO: get fieldnames from source_field_map dor_condo_read_stmt = ''' select condounit, objectid, mapref from {dor_condo_table} where status in (1,3) '''.format(dor_condo_table=source_table_name) source_dor_condo_rows = etl.fromdb( dbo, dor_condo_read_stmt).fieldmap(source_field_map_upper) if DEV: print(etl.look(source_dor_condo_rows)) # Read DOR Parcel rows from engine db print("Reading parcels...") dor_parcel_read_stmt = ''' select parcel_id, street_address, address_low, address_low_suffix, address_low_frac, address_high, street_predir, street_name, street_suffix, street_postdir, street_full from {dor_parcel_table} '''.format(dor_parcel_table='dor_parcel') engine_dor_parcel_rows = etl.fromdb(pg_db, dor_parcel_read_stmt) if DEV: print(etl.look(engine_dor_parcel_rows)) # Get duplicate parcel_ids: non_unique_parcel_id_rows = engine_dor_parcel_rows.duplicates(key='parcel_id') unique_parcel_id_rows = etl.complement(engine_dor_parcel_rows, non_unique_parcel_id_rows)
"""Test python etl library https://petl.readthedocs.io/en/stable/io.html""" import psycopg2 from petl import todb, look create_table_script = """ CREATE TABLE public.petl_test ( petl_id serial NOT NULL, description varchar NULL, external_id int4 NULL ); """ # source list of data including headers source = [['description', 'external_id'], ['teach', 2], ['learn', 3]] look(source) # debugging dest_db_table = 'petl_test' # this db table must already exist with psycopg2.connect( "host=localhost dbname=mydb user=guest password=12345") as connection: with connection.cursor() as db_cur: # truncate the table and insert the data todb(table=source, dbo=db_cur, tablename=dest_db_table, schema='public')