예제 #1
0
def update_dataset(self, source_url_hash, s3_path=None):
    md = session.query(MetaTable).get(source_url_hash)
    if md.result_ids:
        ids = md.result_ids
        ids.append(self.request.id)
    else:
        ids = [self.request.id]
    with engine.begin() as c:
        c.execute(MetaTable.__table__.update()\
            .where(MetaTable.source_url_hash == source_url_hash)\
            .values(result_ids=ids))
    etl = PlenarioETL(md.as_dict())
    etl.update(s3_path=s3_path)
    return 'Finished updating {0} ({1})'.format(md.human_name, md.source_url_hash)
예제 #2
0
파일: tasks.py 프로젝트: hectron/plenario
def update_dataset(self, source_url_hash, s3_path=None):
    md = session.query(MetaTable).get(source_url_hash)
    if md.result_ids:
        ids = md.result_ids
        ids.append(self.request.id)
    else:
        ids = [self.request.id]
    with engine.begin() as c:
        c.execute(
            MetaTable.__table__.update().where(MetaTable.source_url_hash == source_url_hash).values(result_ids=ids)
        )
    etl = PlenarioETL(md.as_dict())
    etl.update(s3_path=s3_path)
    return "Finished updating {0} ({1})".format(md.human_name, md.source_url_hash)
예제 #3
0
def update_dataset(source_url_hash, s3_path=None):

    md = session.query(MetaTable).get(source_url_hash)
    etl = PlenarioETL(md.as_dict())
    etl.update(s3_path=s3_path)
    return 'Finished updating %s' % md.human_name
예제 #4
0
def add_dataset(source_url_hash, s3_path=None, data_types=None):
    md = session.query(MetaTable).get(source_url_hash)
    etl = PlenarioETL(md.as_dict(), data_types=data_types)
    etl.add(s3_path=s3_path)
    return 'Finished adding %s' % md.human_name
예제 #5
0
from sqlalchemy import Table, MetaData
from sqlalchemy.sql.sqltypes import DATE
from plenario.database import session, app_engine
from plenario.models import MetaTable
from sqlalchemy.exc import NoSuchTableError
from plenario.utils.helpers import slugify
from plenario.utils.etl import PlenarioETL
import traceback

if __name__ == "__main__":
    them = session.query(MetaTable).all()
    meta = MetaData()
    for t in them:
        try:
            table = Table('dat_{0}'.format(t.dataset_name), meta, 
                autoload=True, autoload_with=app_engine, keep_existing=True)
            try:
               date_col = getattr(table.c, slugify(t.observed_date))
               if type(date_col.type) == DATE:
                   e = PlenarioETL(t.as_dict())
                   e._get_or_create_data_table()
                   e._add_weather_info()
                   print 'added weather for {0}'.format(t.dataset_name)
            except AttributeError, e:
                raise e
                print 'no col {0}'.format(t.observed_date)
                pass
        except NoSuchTableError:
            print 'no table {0}'.format(t.dataset_name) 
            pass
예제 #6
0
    def setUpClass(cls):
        # Assume there exists a test database with postgis at the connection string specified in test_settings.py
        tables_to_drop = [
            "census_blocks",
            "dat_flu_shot_clinic_locations",
            "dat_master",
            "meta_master",
            "meta_shape",
            "plenario_user",
        ]
        drop_tables(tables_to_drop)

        # Create meta, master, user tables
        init_master_meta_user()

        # Ingest the census blocks
        init_census()

        # TODO: support local ingest of csv
        # For now, ingest Chicago's csv of 2013 flu shot locations from the data portal.
        # It's a nice little Chicago dataset that won't change.

        # So, adding the dataset to meta_table happens in view.py.
        # I don't want to mock out a whole response object with form data and such,
        # so here's a janky way.
        url = "https://data.cityofchicago.org/api/views/g5vx-5vqf/rows.csv?accessType=DOWNLOAD"
        url_hash = md5(url).hexdigest()

        d = {
            "dataset_name": u"flu_shot_clinic_locations",
            "human_name": u"flu_shot_clinic_locations",
            "attribution": u"foo",
            "description": u"bar",
            "source_url": url,
            "source_url_hash": url_hash,
            "update_freq": "yearly",
            "business_key": u"Event",
            "observed_date": u"Date",
            "latitude": u"Latitude",
            "longitude": u"Longitude",
            "location": u"Location",
            "contributor_name": u"Frederick Mcgillicutty",
            "contributor_organization": u"StrexCorp",
            "contributor_email": u"*****@*****.**",
            "contributed_data_types": None,
            "approved_status": True,
            "is_socrata_source": False,
        }

        # add this to meta_master
        md = MetaTable(**d)
        session.add(md)
        session.commit()

        meta = {
            "dataset_name": u"flu_shot_clinic_locations",
            "source_url": url,
            "business_key": u"Event",
            "observed_date": u"Date",
            "latitude": u"Latitude",
            "longitude": u"Longitude",
            "location": u"Location",
            "source_url_hash": url_hash,
        }
        point_etl = PlenarioETL(meta)
        point_etl.add()

        cls.app = create_app().test_client()
예제 #7
0
def update_dataset(source_url_hash, s3_path=None):
    
    md = session.query(MetaTable).get(source_url_hash)
    etl = PlenarioETL(md.as_dict())
    etl.update(s3_path=s3_path)
    return 'Finished updating %s' % md.human_name
예제 #8
0
def add_dataset(source_url_hash, s3_path=None, data_types=None):
    md = session.query(MetaTable).get(source_url_hash)
    etl = PlenarioETL(md.as_dict(), data_types=data_types)
    etl.add(s3_path=s3_path)
    return 'Finished adding %s' % md.human_name
예제 #9
0
from plenario.models import MetaTable
from sqlalchemy.exc import NoSuchTableError
from plenario.utils.helpers import slugify
from plenario.utils.etl import PlenarioETL
import traceback

if __name__ == "__main__":
    them = session.query(MetaTable).all()
    meta = MetaData()
    for t in them:
        try:
            table = Table('dat_{0}'.format(t.dataset_name),
                          meta,
                          autoload=True,
                          autoload_with=app_engine,
                          keep_existing=True)
            try:
                date_col = getattr(table.c, slugify(t.observed_date))
                if type(date_col.type) == DATE:
                    e = PlenarioETL(t.as_dict())
                    e._get_or_create_data_table()
                    e._add_weather_info()
                    print 'added weather for {0}'.format(t.dataset_name)
            except AttributeError, e:
                raise e
                print 'no col {0}'.format(t.observed_date)
                pass
        except NoSuchTableError:
            print 'no table {0}'.format(t.dataset_name)
            pass