Beispiel #1
0
def test_constraints():

    constraints = [
        dict(name='C1', field='foo', test=int),
        dict(name='C2', field='bar', test=etl.dateparser('%Y-%m-%d')),
        dict(name='C3', field='baz', assertion=lambda v: v in ['Y', 'N']),
        dict(name='C4', assertion=lambda row: None not in row)
    ]

    table = (('foo', 'bar', 'baz'),
             (1, '2000-01-01', 'Y'),
             ('x', '2010-10-10', 'N'),
             (2, '2000/01/01', 'Y'),
             (3, '2015-12-12', 'x'),
             (4, None, 'N'),
             ('y', '1999-99-99', 'z'))

    expect = (('name', 'row', 'field', 'value', 'error'),
              ('C1', 2, 'foo', 'x', 'ValueError'),
              ('C2', 3, 'bar', '2000/01/01', 'ValueError'),
              ('C3', 4, 'baz', 'x', 'AssertionError'),
              ('C2', 5, 'bar', None, 'AttributeError'),
              ('C4', 5, None, None, 'AssertionError'),
              ('C1', 6, 'foo', 'y', 'ValueError'),
              ('C2', 6, 'bar', '1999-99-99', 'ValueError'),
              ('C3', 6, 'baz', 'z', 'AssertionError'))

    actual = validate(table, constraints)
    debug(actual)

    ieq(expect, actual)
    ieq(expect, actual)
Beispiel #2
0
def transform_to_petl(data):
    isodate = etl.dateparser("%Y-%m-%d")
    data = etl.fromdataframe(data)
    data = etl.rename(data, {"index": "Date", "VALUE": "Value"})
    data = etl.convert(data, {"Date": lambda d: d[:10]})
    data = etl.convert(data, {"Date": lambda d: isodate(d)})
    return data
Beispiel #3
0
    def build(self):
        import petl
        import petl.fluent as petlf
        
        p = self.partitions.find_or_new(table='businesses')
        
        dp = petl.dateparser('%m/%d/%Y')

        for name, url in self.sources.items():
            
            self.log("Converting: {}".format(url))
            
            df = self.filesystem.download(url)
            
            t = ( petlf.fromcsv(df)
                .addfield('businesses_id',None, index=0)
                .addfield('zip5',lambda r: r['ZIP'][0:5] if len(r['ZIP']) >=5 else None, index=7)
                .addfield('zip4',lambda r: r['ZIP'][-4:] if len(r['ZIP']) == 10 else None, index=8)
                .setheader([c.name for c in p.table.columns])
                .convert(('naics','acct_no'), int)
                .convert(('created', 'started','expires'), dp)
                .convert(('dba', 'address','city','owner','desc'), str.title)
                .convert(('incorp_type'), str.lower)
                .convert([c.name for c in p.table.columns if c.datatype =='text'], unicode)
            )
         
            t.appendsqlite3(p.database.path, p.table.name)
            
        return True
import csv
import petl as etl
import os
from petl import dateparser

parsers={'date': dateparser('%Y-%m-%d')}

def call():
    input_filename = os.path.abspath(os.path.join(os.path.dirname(__file__),
        'data.csv' ))

    return (
        etl
        .fromcsv(input_filename)
        .convert('DATE', parsers)
        .convert('CPIAUCSL', float)
    )

if __name__ == '__main__':
    etl.tocsv(call())
Beispiel #5
0
# -*- coding: utf-8 -*-
from __future__ import absolute_import, print_function, division


# validate()
############

import petl as etl
# define some validation constraints
header = ('foo', 'bar', 'baz')
constraints = [
    dict(name='foo_int', field='foo', test=int),
    dict(name='bar_date', field='bar', test=etl.dateparser('%Y-%m-%d')),
    dict(name='baz_enum', field='baz', assertion=lambda v: v in ['Y', 'N']),
    dict(name='not_none', assertion=lambda row: None not in row)
]
# now validate a table
table = (('foo', 'bar', 'bazzz'),
         (1, '2000-01-01', 'Y'),
         ('x', '2010-10-10', 'N'),
         (2, '2000/01/01', 'Y'),
         (3, '2015-12-12', 'x'),
         (4, None, 'N'),
         ('y', '1999-99-99', 'z'),
         (6, '2000-01-01'),
         (7, '2001-02-02', 'N', True))
problems = etl.validate(table, constraints=constraints, header=header)
problems.lookall()


Beispiel #6
0
from petl import datetimeparser

isodatetime = datetimeparser('%Y-%m-%dT%H:%M:%S')
isodatetime('2002-12-25T00:00:00')
try:
    isodatetime('2002-12-25T00:00:99')
except ValueError as e:
    print(e)

# dateparser()
##############

from petl import dateparser

isodate = dateparser('%Y-%m-%d')
isodate('2002-12-25')
try:
    isodate('2002-02-30')
except ValueError as e:
    print(e)

# timeparser()
##############

from petl import timeparser

isotime = timeparser('%H:%M:%S')
isotime('00:00:00')
isotime('13:00:00')
try:
Beispiel #7
0
##################

from petl import datetimeparser
isodatetime = datetimeparser('%Y-%m-%dT%H:%M:%S')
isodatetime('2002-12-25T00:00:00')
try:
    isodatetime('2002-12-25T00:00:99')
except ValueError as e:
    print(e)


# dateparser()
##############

from petl import dateparser
isodate = dateparser('%Y-%m-%d')
isodate('2002-12-25')
try:
    isodate('2002-02-30')
except ValueError as e:
    print(e)


# timeparser()
##############

from petl import timeparser
isotime = timeparser('%H:%M:%S')
isotime('00:00:00')
isotime('13:00:00')
try: