Example #1
0
def test_parse_with_dates():
    index = open_collection('test', 's3', bucket_name='test.mapthemoney.org')
    package = index.create()
    src = extract.from_file(package, GPC_FIXTURE)
    artifact = transform.to_table(src, 'table')

    assert artifact.name == 'table'
    recs = list(artifact.records())
    assert len(recs) == 23, len(recs)
    assert isinstance(recs[0]['transaction_date'], date)
Example #2
0
def test_parse_with_dates():
    index = open_collection('test', 's3', bucket_name='test.mapthemoney.org')
    package = index.create()
    extract.from_file(package, GPC_FIXTURE)
    pipeline = Pipeline(index, 'foo',
                        {'process': {
                            'table': {
                                'operator': 'table_extract'
                            }
                        }})
    pipeline.process_package(package)

    artifacts = list(package.all(Table))
    assert len(artifacts) == 1, artifacts
    artifact = artifacts[0]
    assert artifact.name == 'table.json'
    recs = list(artifact.records())
    assert len(recs) == 23, len(recs)
    assert isinstance(recs[0]['transaction_date'], date)
Example #3
0
def test_parse_with_dates():
    index = open_collection('test', 's3', bucket_name='test.mapthemoney.org')
    package = index.create()
    extract.from_file(package, GPC_FIXTURE)
    pipeline = Pipeline(index, 'foo', {
        'process': {
            'table': {
                'operator': 'table_extract'
            }
        }
    })
    pipeline.process_package(package)

    artifacts = list(package.all(Table))
    assert len(artifacts) == 1, artifacts
    artifact = artifacts[0]
    assert artifact.name == 'table.json'
    recs = list(artifact.records())
    assert len(recs) == 23, len(recs)
    assert isinstance(recs[0]['transaction_date'], date)
Example #4
0
def test_extract_file():
    index = open_collection('test', 's3', bucket_name='test.mapthemoney.org')
    package = index.create()
    src = extract.from_file(package, CSV_FIXTURE)
    assert src is not None, src

    sources = list(package.all(Source))
    assert len(sources) == 1, sources

    artifacts = list(package.all(Table))
    assert len(artifacts) == 0, artifacts

    assert 'barnet-2009.csv' in src.path, src
Example #5
0
def test_extract_file():
    index = open_collection('test', 's3', bucket_name='test.mapthemoney.org')
    package = index.create()
    src = extract.from_file(package, CSV_FIXTURE)
    assert src is not None, src

    sources = list(package.all(Source))
    assert len(sources) == 1, sources

    artifacts = list(package.all(Artifact))
    assert len(artifacts) == 0, artifacts

    assert 'barnet-2009.csv' in src.path, src
Example #6
0
from loadkit.tests.util import get_bucket, CSV_FIXTURE, CSV_URL # noqa
from loadkit import create, extract, transform


# Connect to a package index on an S3 bucket:
collection = create('file', path='~/tmp/test')

# create a new package within that index:
package = collection.create()

# load a resource from the local file system:
source = extract.from_file(package, CSV_FIXTURE)
print 'Source uploaded:', source

# or:
# resource = extract.from_url(package, CSV_URL)

# Transform the uploaded file into a well-understood
# format (an ``Artifact``):
artifact = transform.to_table(source, 'table')
print 'Artifact generated:', artifact

# In your library: load the artifact into the table.