def update_dataset(self, source_url_hash, s3_path=None): md = session.query(MetaTable).get(source_url_hash) if md.result_ids: ids = md.result_ids ids.append(self.request.id) else: ids = [self.request.id] with engine.begin() as c: c.execute(MetaTable.__table__.update()\ .where(MetaTable.source_url_hash == source_url_hash)\ .values(result_ids=ids)) etl = PlenarioETL(md.as_dict()) etl.update(s3_path=s3_path) return 'Finished updating {0} ({1})'.format(md.human_name, md.source_url_hash)
def update_dataset(self, source_url_hash, s3_path=None): md = session.query(MetaTable).get(source_url_hash) if md.result_ids: ids = md.result_ids ids.append(self.request.id) else: ids = [self.request.id] with engine.begin() as c: c.execute( MetaTable.__table__.update().where(MetaTable.source_url_hash == source_url_hash).values(result_ids=ids) ) etl = PlenarioETL(md.as_dict()) etl.update(s3_path=s3_path) return "Finished updating {0} ({1})".format(md.human_name, md.source_url_hash)
def update_dataset(source_url_hash, s3_path=None): md = session.query(MetaTable).get(source_url_hash) etl = PlenarioETL(md.as_dict()) etl.update(s3_path=s3_path) return 'Finished updating %s' % md.human_name
def add_dataset(source_url_hash, s3_path=None, data_types=None): md = session.query(MetaTable).get(source_url_hash) etl = PlenarioETL(md.as_dict(), data_types=data_types) etl.add(s3_path=s3_path) return 'Finished adding %s' % md.human_name
from sqlalchemy import Table, MetaData from sqlalchemy.sql.sqltypes import DATE from plenario.database import session, app_engine from plenario.models import MetaTable from sqlalchemy.exc import NoSuchTableError from plenario.utils.helpers import slugify from plenario.utils.etl import PlenarioETL import traceback if __name__ == "__main__": them = session.query(MetaTable).all() meta = MetaData() for t in them: try: table = Table('dat_{0}'.format(t.dataset_name), meta, autoload=True, autoload_with=app_engine, keep_existing=True) try: date_col = getattr(table.c, slugify(t.observed_date)) if type(date_col.type) == DATE: e = PlenarioETL(t.as_dict()) e._get_or_create_data_table() e._add_weather_info() print 'added weather for {0}'.format(t.dataset_name) except AttributeError, e: raise e print 'no col {0}'.format(t.observed_date) pass except NoSuchTableError: print 'no table {0}'.format(t.dataset_name) pass
def setUpClass(cls): # Assume there exists a test database with postgis at the connection string specified in test_settings.py tables_to_drop = [ "census_blocks", "dat_flu_shot_clinic_locations", "dat_master", "meta_master", "meta_shape", "plenario_user", ] drop_tables(tables_to_drop) # Create meta, master, user tables init_master_meta_user() # Ingest the census blocks init_census() # TODO: support local ingest of csv # For now, ingest Chicago's csv of 2013 flu shot locations from the data portal. # It's a nice little Chicago dataset that won't change. # So, adding the dataset to meta_table happens in view.py. # I don't want to mock out a whole response object with form data and such, # so here's a janky way. url = "https://data.cityofchicago.org/api/views/g5vx-5vqf/rows.csv?accessType=DOWNLOAD" url_hash = md5(url).hexdigest() d = { "dataset_name": u"flu_shot_clinic_locations", "human_name": u"flu_shot_clinic_locations", "attribution": u"foo", "description": u"bar", "source_url": url, "source_url_hash": url_hash, "update_freq": "yearly", "business_key": u"Event", "observed_date": u"Date", "latitude": u"Latitude", "longitude": u"Longitude", "location": u"Location", "contributor_name": u"Frederick Mcgillicutty", "contributor_organization": u"StrexCorp", "contributor_email": u"*****@*****.**", "contributed_data_types": None, "approved_status": True, "is_socrata_source": False, } # add this to meta_master md = MetaTable(**d) session.add(md) session.commit() meta = { "dataset_name": u"flu_shot_clinic_locations", "source_url": url, "business_key": u"Event", "observed_date": u"Date", "latitude": u"Latitude", "longitude": u"Longitude", "location": u"Location", "source_url_hash": url_hash, } point_etl = PlenarioETL(meta) point_etl.add() cls.app = create_app().test_client()
from plenario.models import MetaTable from sqlalchemy.exc import NoSuchTableError from plenario.utils.helpers import slugify from plenario.utils.etl import PlenarioETL import traceback if __name__ == "__main__": them = session.query(MetaTable).all() meta = MetaData() for t in them: try: table = Table('dat_{0}'.format(t.dataset_name), meta, autoload=True, autoload_with=app_engine, keep_existing=True) try: date_col = getattr(table.c, slugify(t.observed_date)) if type(date_col.type) == DATE: e = PlenarioETL(t.as_dict()) e._get_or_create_data_table() e._add_weather_info() print 'added weather for {0}'.format(t.dataset_name) except AttributeError, e: raise e print 'no col {0}'.format(t.observed_date) pass except NoSuchTableError: print 'no table {0}'.format(t.dataset_name) pass