Python PlenarioETL Examples

Programming Language: Python

Namespace/Package Name: plenario.utils.etl

Class/Type: PlenarioETL

Examples at hotexamples.com: 9

Python PlenarioETL - 9 examples found. These are the top rated real world Python examples of plenario.utils.etl.PlenarioETL extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

PlenarioETL(4)

add(2)

update(2)

_add_weather_info(1)

_get_or_create_data_table(1)

Example #1

Show file

def update_dataset(self, source_url_hash, s3_path=None):
    md = session.query(MetaTable).get(source_url_hash)
    if md.result_ids:
        ids = md.result_ids
        ids.append(self.request.id)
    else:
        ids = [self.request.id]
    with engine.begin() as c:
        c.execute(MetaTable.__table__.update()\
            .where(MetaTable.source_url_hash == source_url_hash)\
            .values(result_ids=ids))
    etl = PlenarioETL(md.as_dict())
    etl.update(s3_path=s3_path)
    return 'Finished updating {0} ({1})'.format(md.human_name, md.source_url_hash)

Example #2

Show file

File: tasks.py Project: hectron/plenario

def update_dataset(self, source_url_hash, s3_path=None):
    md = session.query(MetaTable).get(source_url_hash)
    if md.result_ids:
        ids = md.result_ids
        ids.append(self.request.id)
    else:
        ids = [self.request.id]
    with engine.begin() as c:
        c.execute(
            MetaTable.__table__.update().where(MetaTable.source_url_hash == source_url_hash).values(result_ids=ids)
        )
    etl = PlenarioETL(md.as_dict())
    etl.update(s3_path=s3_path)
    return "Finished updating {0} ({1})".format(md.human_name, md.source_url_hash)

Example #3

Show file

def update_dataset(source_url_hash, s3_path=None):

    md = session.query(MetaTable).get(source_url_hash)
    etl = PlenarioETL(md.as_dict())
    etl.update(s3_path=s3_path)
    return 'Finished updating %s' % md.human_name

Example #4

Show file

def add_dataset(source_url_hash, s3_path=None, data_types=None):
    md = session.query(MetaTable).get(source_url_hash)
    etl = PlenarioETL(md.as_dict(), data_types=data_types)
    etl.add(s3_path=s3_path)
    return 'Finished adding %s' % md.human_name

Example #5

Show file

File: coltypes.py Project: EmilyWebber/plenario

from sqlalchemy import Table, MetaData
from sqlalchemy.sql.sqltypes import DATE
from plenario.database import session, app_engine
from plenario.models import MetaTable
from sqlalchemy.exc import NoSuchTableError
from plenario.utils.helpers import slugify
from plenario.utils.etl import PlenarioETL
import traceback

if __name__ == "__main__":
    them = session.query(MetaTable).all()
    meta = MetaData()
    for t in them:
        try:
            table = Table('dat_{0}'.format(t.dataset_name), meta, 
                autoload=True, autoload_with=app_engine, keep_existing=True)
            try:
               date_col = getattr(table.c, slugify(t.observed_date))
               if type(date_col.type) == DATE:
                   e = PlenarioETL(t.as_dict())
                   e._get_or_create_data_table()
                   e._add_weather_info()
                   print 'added weather for {0}'.format(t.dataset_name)
            except AttributeError, e:
                raise e
                print 'no col {0}'.format(t.observed_date)
                pass
        except NoSuchTableError:
            print 'no table {0}'.format(t.dataset_name) 
            pass

Example #6

Show file

File: shape_tests.py Project: hectron/plenario

    def setUpClass(cls):
        # Assume there exists a test database with postgis at the connection string specified in test_settings.py
        tables_to_drop = [
            "census_blocks",
            "dat_flu_shot_clinic_locations",
            "dat_master",
            "meta_master",
            "meta_shape",
            "plenario_user",
        ]
        drop_tables(tables_to_drop)

        # Create meta, master, user tables
        init_master_meta_user()

        # Ingest the census blocks
        init_census()

        # TODO: support local ingest of csv
        # For now, ingest Chicago's csv of 2013 flu shot locations from the data portal.
        # It's a nice little Chicago dataset that won't change.

        # So, adding the dataset to meta_table happens in view.py.
        # I don't want to mock out a whole response object with form data and such,
        # so here's a janky way.
        url = "https://data.cityofchicago.org/api/views/g5vx-5vqf/rows.csv?accessType=DOWNLOAD"
        url_hash = md5(url).hexdigest()

        d = {
            "dataset_name": u"flu_shot_clinic_locations",
            "human_name": u"flu_shot_clinic_locations",
            "attribution": u"foo",
            "description": u"bar",
            "source_url": url,
            "source_url_hash": url_hash,
            "update_freq": "yearly",
            "business_key": u"Event",
            "observed_date": u"Date",
            "latitude": u"Latitude",
            "longitude": u"Longitude",
            "location": u"Location",
            "contributor_name": u"Frederick Mcgillicutty",
            "contributor_organization": u"StrexCorp",
            "contributor_email": u"*****@*****.**",
            "contributed_data_types": None,
            "approved_status": True,
            "is_socrata_source": False,
        }

        # add this to meta_master
        md = MetaTable(**d)
        session.add(md)
        session.commit()

        meta = {
            "dataset_name": u"flu_shot_clinic_locations",
            "source_url": url,
            "business_key": u"Event",
            "observed_date": u"Date",
            "latitude": u"Latitude",
            "longitude": u"Longitude",
            "location": u"Location",
            "source_url_hash": url_hash,
        }
        point_etl = PlenarioETL(meta)
        point_etl.add()

        cls.app = create_app().test_client()

Example #7

Show file

File: tasks.py Project: jqnatividad/plenario

def update_dataset(source_url_hash, s3_path=None):
    
    md = session.query(MetaTable).get(source_url_hash)
    etl = PlenarioETL(md.as_dict())
    etl.update(s3_path=s3_path)
    return 'Finished updating %s' % md.human_name

Example #8

Show file

File: tasks.py Project: jqnatividad/plenario

def add_dataset(source_url_hash, s3_path=None, data_types=None):
    md = session.query(MetaTable).get(source_url_hash)
    etl = PlenarioETL(md.as_dict(), data_types=data_types)
    etl.add(s3_path=s3_path)
    return 'Finished adding %s' % md.human_name

Example #9

Show file

File: coltypes.py Project: EzanLTD/plenario

from plenario.models import MetaTable
from sqlalchemy.exc import NoSuchTableError
from plenario.utils.helpers import slugify
from plenario.utils.etl import PlenarioETL
import traceback

if __name__ == "__main__":
    them = session.query(MetaTable).all()
    meta = MetaData()
    for t in them:
        try:
            table = Table('dat_{0}'.format(t.dataset_name),
                          meta,
                          autoload=True,
                          autoload_with=app_engine,
                          keep_existing=True)
            try:
                date_col = getattr(table.c, slugify(t.observed_date))
                if type(date_col.type) == DATE:
                    e = PlenarioETL(t.as_dict())
                    e._get_or_create_data_table()
                    e._add_weather_info()
                    print 'added weather for {0}'.format(t.dataset_name)
            except AttributeError, e:
                raise e
                print 'no col {0}'.format(t.observed_date)
                pass
        except NoSuchTableError:
            print 'no table {0}'.format(t.dataset_name)
            pass