def test_parse_with_dates(): index = open_collection('test', 's3', bucket_name='test.mapthemoney.org') package = index.create() src = extract.from_file(package, GPC_FIXTURE) artifact = transform.to_table(src, 'table') assert artifact.name == 'table' recs = list(artifact.records()) assert len(recs) == 23, len(recs) assert isinstance(recs[0]['transaction_date'], date)
def test_parse_with_dates(): index = open_collection('test', 's3', bucket_name='test.mapthemoney.org') package = index.create() extract.from_file(package, GPC_FIXTURE) pipeline = Pipeline(index, 'foo', {'process': { 'table': { 'operator': 'table_extract' } }}) pipeline.process_package(package) artifacts = list(package.all(Table)) assert len(artifacts) == 1, artifacts artifact = artifacts[0] assert artifact.name == 'table.json' recs = list(artifact.records()) assert len(recs) == 23, len(recs) assert isinstance(recs[0]['transaction_date'], date)
def test_parse_with_dates(): index = open_collection('test', 's3', bucket_name='test.mapthemoney.org') package = index.create() extract.from_file(package, GPC_FIXTURE) pipeline = Pipeline(index, 'foo', { 'process': { 'table': { 'operator': 'table_extract' } } }) pipeline.process_package(package) artifacts = list(package.all(Table)) assert len(artifacts) == 1, artifacts artifact = artifacts[0] assert artifact.name == 'table.json' recs = list(artifact.records()) assert len(recs) == 23, len(recs) assert isinstance(recs[0]['transaction_date'], date)
def test_extract_file(): index = open_collection('test', 's3', bucket_name='test.mapthemoney.org') package = index.create() src = extract.from_file(package, CSV_FIXTURE) assert src is not None, src sources = list(package.all(Source)) assert len(sources) == 1, sources artifacts = list(package.all(Table)) assert len(artifacts) == 0, artifacts assert 'barnet-2009.csv' in src.path, src
def test_extract_file(): index = open_collection('test', 's3', bucket_name='test.mapthemoney.org') package = index.create() src = extract.from_file(package, CSV_FIXTURE) assert src is not None, src sources = list(package.all(Source)) assert len(sources) == 1, sources artifacts = list(package.all(Artifact)) assert len(artifacts) == 0, artifacts assert 'barnet-2009.csv' in src.path, src
from loadkit.tests.util import get_bucket, CSV_FIXTURE, CSV_URL # noqa from loadkit import create, extract, transform # Connect to a package index on an S3 bucket: collection = create('file', path='~/tmp/test') # create a new package within that index: package = collection.create() # load a resource from the local file system: source = extract.from_file(package, CSV_FIXTURE) print 'Source uploaded:', source # or: # resource = extract.from_url(package, CSV_URL) # Transform the uploaded file into a well-understood # format (an ``Artifact``): artifact = transform.to_table(source, 'table') print 'Artifact generated:', artifact # In your library: load the artifact into the table.