Exemple #1
0
def add_dataset(self, source_url_hash, s3_path=None, data_types=None):
    md = session.query(MetaTable).get(source_url_hash)
    if md.result_ids:
        ids = md.result_ids
        ids.append(self.request.id)
    else:
        ids = [self.request.id]
    with engine.begin() as c:
        c.execute(
            MetaTable.__table__.update().where(MetaTable.source_url_hash == source_url_hash).values(result_ids=ids)
        )
    etl = PlenarioETL(md.as_dict(), data_types=data_types)
    etl.add(s3_path=s3_path)
    return "Finished adding {0} ({1})".format(md.human_name, md.source_url_hash)
Exemple #2
0
def add_dataset(self, source_url_hash, s3_path=None, data_types=None):
    md = session.query(MetaTable).get(source_url_hash)
    if md.result_ids:
        ids = md.result_ids
        ids.append(self.request.id)
    else:
        ids = [self.request.id]
    with engine.begin() as c:
        c.execute(MetaTable.__table__.update()\
            .where(MetaTable.source_url_hash == source_url_hash)\
            .values(result_ids=ids))
    etl = PlenarioETL(md.as_dict(), data_types=data_types)
    etl.add(s3_path=s3_path)
    return 'Finished adding {0} ({1})'.format(md.human_name, md.source_url_hash)
Exemple #3
0
def add_dataset(source_url_hash, s3_path=None, data_types=None):
    md = session.query(MetaTable).get(source_url_hash)
    etl = PlenarioETL(md.as_dict(), data_types=data_types)
    etl.add(s3_path=s3_path)
    return 'Finished adding %s' % md.human_name
Exemple #4
0
    def setUpClass(cls):
        # Assume there exists a test database with postgis at the connection string specified in test_settings.py
        tables_to_drop = [
            "census_blocks",
            "dat_flu_shot_clinic_locations",
            "dat_master",
            "meta_master",
            "meta_shape",
            "plenario_user",
        ]
        drop_tables(tables_to_drop)

        # Create meta, master, user tables
        init_master_meta_user()

        # Ingest the census blocks
        init_census()

        # TODO: support local ingest of csv
        # For now, ingest Chicago's csv of 2013 flu shot locations from the data portal.
        # It's a nice little Chicago dataset that won't change.

        # So, adding the dataset to meta_table happens in view.py.
        # I don't want to mock out a whole response object with form data and such,
        # so here's a janky way.
        url = "https://data.cityofchicago.org/api/views/g5vx-5vqf/rows.csv?accessType=DOWNLOAD"
        url_hash = md5(url).hexdigest()

        d = {
            "dataset_name": u"flu_shot_clinic_locations",
            "human_name": u"flu_shot_clinic_locations",
            "attribution": u"foo",
            "description": u"bar",
            "source_url": url,
            "source_url_hash": url_hash,
            "update_freq": "yearly",
            "business_key": u"Event",
            "observed_date": u"Date",
            "latitude": u"Latitude",
            "longitude": u"Longitude",
            "location": u"Location",
            "contributor_name": u"Frederick Mcgillicutty",
            "contributor_organization": u"StrexCorp",
            "contributor_email": u"*****@*****.**",
            "contributed_data_types": None,
            "approved_status": True,
            "is_socrata_source": False,
        }

        # add this to meta_master
        md = MetaTable(**d)
        session.add(md)
        session.commit()

        meta = {
            "dataset_name": u"flu_shot_clinic_locations",
            "source_url": url,
            "business_key": u"Event",
            "observed_date": u"Date",
            "latitude": u"Latitude",
            "longitude": u"Longitude",
            "location": u"Location",
            "source_url_hash": url_hash,
        }
        point_etl = PlenarioETL(meta)
        point_etl.add()

        cls.app = create_app().test_client()
Exemple #5
0
def add_dataset(source_url_hash, s3_path=None, data_types=None):
    md = session.query(MetaTable).get(source_url_hash)
    etl = PlenarioETL(md.as_dict(), data_types=data_types)
    etl.add(s3_path=s3_path)
    return 'Finished adding %s' % md.human_name