def test_constraints(): constraints = [ dict(name='C1', field='foo', test=int), dict(name='C2', field='bar', test=etl.dateparser('%Y-%m-%d')), dict(name='C3', field='baz', assertion=lambda v: v in ['Y', 'N']), dict(name='C4', assertion=lambda row: None not in row) ] table = (('foo', 'bar', 'baz'), (1, '2000-01-01', 'Y'), ('x', '2010-10-10', 'N'), (2, '2000/01/01', 'Y'), (3, '2015-12-12', 'x'), (4, None, 'N'), ('y', '1999-99-99', 'z')) expect = (('name', 'row', 'field', 'value', 'error'), ('C1', 2, 'foo', 'x', 'ValueError'), ('C2', 3, 'bar', '2000/01/01', 'ValueError'), ('C3', 4, 'baz', 'x', 'AssertionError'), ('C2', 5, 'bar', None, 'AttributeError'), ('C4', 5, None, None, 'AssertionError'), ('C1', 6, 'foo', 'y', 'ValueError'), ('C2', 6, 'bar', '1999-99-99', 'ValueError'), ('C3', 6, 'baz', 'z', 'AssertionError')) actual = validate(table, constraints) debug(actual) ieq(expect, actual) ieq(expect, actual)
def transform_to_petl(data): isodate = etl.dateparser("%Y-%m-%d") data = etl.fromdataframe(data) data = etl.rename(data, {"index": "Date", "VALUE": "Value"}) data = etl.convert(data, {"Date": lambda d: d[:10]}) data = etl.convert(data, {"Date": lambda d: isodate(d)}) return data
def build(self): import petl import petl.fluent as petlf p = self.partitions.find_or_new(table='businesses') dp = petl.dateparser('%m/%d/%Y') for name, url in self.sources.items(): self.log("Converting: {}".format(url)) df = self.filesystem.download(url) t = ( petlf.fromcsv(df) .addfield('businesses_id',None, index=0) .addfield('zip5',lambda r: r['ZIP'][0:5] if len(r['ZIP']) >=5 else None, index=7) .addfield('zip4',lambda r: r['ZIP'][-4:] if len(r['ZIP']) == 10 else None, index=8) .setheader([c.name for c in p.table.columns]) .convert(('naics','acct_no'), int) .convert(('created', 'started','expires'), dp) .convert(('dba', 'address','city','owner','desc'), str.title) .convert(('incorp_type'), str.lower) .convert([c.name for c in p.table.columns if c.datatype =='text'], unicode) ) t.appendsqlite3(p.database.path, p.table.name) return True
import csv import petl as etl import os from petl import dateparser parsers={'date': dateparser('%Y-%m-%d')} def call(): input_filename = os.path.abspath(os.path.join(os.path.dirname(__file__), 'data.csv' )) return ( etl .fromcsv(input_filename) .convert('DATE', parsers) .convert('CPIAUCSL', float) ) if __name__ == '__main__': etl.tocsv(call())
# -*- coding: utf-8 -*- from __future__ import absolute_import, print_function, division # validate() ############ import petl as etl # define some validation constraints header = ('foo', 'bar', 'baz') constraints = [ dict(name='foo_int', field='foo', test=int), dict(name='bar_date', field='bar', test=etl.dateparser('%Y-%m-%d')), dict(name='baz_enum', field='baz', assertion=lambda v: v in ['Y', 'N']), dict(name='not_none', assertion=lambda row: None not in row) ] # now validate a table table = (('foo', 'bar', 'bazzz'), (1, '2000-01-01', 'Y'), ('x', '2010-10-10', 'N'), (2, '2000/01/01', 'Y'), (3, '2015-12-12', 'x'), (4, None, 'N'), ('y', '1999-99-99', 'z'), (6, '2000-01-01'), (7, '2001-02-02', 'N', True)) problems = etl.validate(table, constraints=constraints, header=header) problems.lookall()
from petl import datetimeparser isodatetime = datetimeparser('%Y-%m-%dT%H:%M:%S') isodatetime('2002-12-25T00:00:00') try: isodatetime('2002-12-25T00:00:99') except ValueError as e: print(e) # dateparser() ############## from petl import dateparser isodate = dateparser('%Y-%m-%d') isodate('2002-12-25') try: isodate('2002-02-30') except ValueError as e: print(e) # timeparser() ############## from petl import timeparser isotime = timeparser('%H:%M:%S') isotime('00:00:00') isotime('13:00:00') try:
################## from petl import datetimeparser isodatetime = datetimeparser('%Y-%m-%dT%H:%M:%S') isodatetime('2002-12-25T00:00:00') try: isodatetime('2002-12-25T00:00:99') except ValueError as e: print(e) # dateparser() ############## from petl import dateparser isodate = dateparser('%Y-%m-%d') isodate('2002-12-25') try: isodate('2002-02-30') except ValueError as e: print(e) # timeparser() ############## from petl import timeparser isotime = timeparser('%H:%M:%S') isotime('00:00:00') isotime('13:00:00') try: