コード例 #1
0
def add_unique_hash(table_name):
    """
    Adds an md5 hash column of the preexisting columns
    and removes duplicate rows from a table.
    :param table_name: Name of table to add hash to.
    """

    logger.info('Begin (table_name: {})'.format(table_name))
    add_hash = '''
    DROP TABLE IF EXISTS temp;
    CREATE TABLE temp AS
      SELECT DISTINCT *,
             md5(CAST(("{table_name}".*)AS text))
                AS hash FROM "{table_name}";
    DROP TABLE "{table_name}";
    ALTER TABLE temp RENAME TO "{table_name}";
    ALTER TABLE "{table_name}" ADD PRIMARY KEY (hash);
    '''.format(table_name=table_name)

    try:
        postgres_engine.execute(add_hash)
    except Exception as e:
        raise PlenarioETLError(repr(e) +
                               '\n Failed to deduplicate with ' + add_hash)
    logger.info('End.')
コード例 #2
0
    def tearDownClass(cls):

        try:
            postgres_engine.execute("drop table roadworks")
            postgres_engine.execute("delete from meta_master where dataset_name = 'roadworks'")
        except ProgrammingError:
            pass
コード例 #3
0
    def tearDownClass(cls):

        try:
            postgres_engine.execute("drop table roadworks")
            postgres_engine.execute(
                "delete from meta_master where dataset_name = 'roadworks'")
        except ProgrammingError:
            pass
コード例 #4
0
ファイル: test_point.py プロジェクト: vforgione/plenario
    def setUp(self):
        postgres_session.rollback()
        # Ensure we have metadata loaded into the database
        # to mimic the behavior of metadata ingestion preceding file ingestion.
        drop_meta('dog_park_permits')
        drop_meta('community_radio_events')
        drop_meta('public_opera_performances')

        # Make new MetaTable objects
        self.unloaded_meta = MetaTable(url='nightvale.gov/events.csv',
                                      human_name='Community Radio Events',
                                      business_key='Event Name',
                                      observed_date='Date',
                                      latitude='lat', longitude='lon',
                                      approved_status=True)

        self.existing_meta = MetaTable(url='nightvale.gov/dogpark.csv',
                                      human_name='Dog Park Permits',
                                      business_key='Hooded Figure ID',
                                      observed_date='Date',
                                      latitude='lat', longitude='lon',
                                      approved_status=False)

        self.opera_meta = MetaTable(url='nightvale.gov/opera.csv',
                                   human_name='Public Opera Performances',
                                   business_key='Event Name',
                                   observed_date='Date',
                                   location='Location',
                                   approved_status=False)
        postgres_session.add_all([self.existing_meta, self.opera_meta, self.unloaded_meta])
        postgres_session.commit()

        # Also, let's have one table pre-loaded...
        self.existing_table = sa.Table('dog_park_permits', MetaData(),
                                      Column('hooded_figure_id', Integer),
                                      Column('point_date', TIMESTAMP, nullable=False),
                                      Column('date', Date, nullable=True),
                                      Column('lat', Float, nullable=False),
                                      Column('lon', Float, nullable=False),
                                       Column('hash', String(32), primary_key=True),
                                      Column('geom', Geometry('POINT', srid=4326), nullable=True))
        drop_if_exists(self.existing_table.name)
        self.existing_table.create(bind=postgres_engine)

        # ... with some pre-existing data
        ins = self.existing_table.insert().values(hooded_figure_id=1,
                                                  point_date=date(2015, 1, 2),
                                                  lon=-87.6495076896,
                                                  lat=41.7915865543,
                                                  geom=None,
                                                  hash='addde9be7f59e95fc08e54e29b2a947f')
        postgres_engine.execute(ins)
コード例 #5
0
def delete_absent_hashes(staging_name, existing_name):

    logger.info('Begin.')
    logger.info('staging_name: {}'.format(staging_name))
    logger.info('existing_name: {}'.format(existing_name))
    del_ = """DELETE FROM "{existing}"
                  WHERE hash IN
                     (SELECT hash FROM "{existing}"
                        EXCEPT
                      SELECT hash FROM  "{staging}");""".\
            format(existing=existing_name, staging=staging_name)

    try:
        postgres_engine.execute(del_)
    except Exception as e:
        raise PlenarioETLError(repr(e) + '\n Failed to execute' + del_)
    logger.info('End.')
コード例 #6
0
ファイル: test_point.py プロジェクト: vforgione/plenario
    def test_location_col_update(self):
        drop_if_exists(self.opera_meta.dataset_name)

        self.opera_table = sa.Table(self.opera_meta.dataset_name, MetaData(),
                                    Column('event_name', String, primary_key=True),
                                    Column('date', Date, nullable=True),
                                    Column('location', String, nullable=False),
                                    Column('geom', Geometry('POINT', srid=4326), nullable=True),
                                    Column('point_date', TIMESTAMP, nullable=False))
        drop_if_exists(self.existing_table.name)
        self.opera_table.create(bind=postgres_engine)

        ins = self.opera_table.insert().values(event_name='quux',
                                               date=None,
                                               point_date=date(2015, 1, 2),
                                               location='(-87.6495076896,41.7915865543)',
                                               geom=None)
        postgres_engine.execute(ins)
コード例 #7
0
def knn(lng, lat, k, network, sensors):
    """Execute a spatial query to select k nearest neighbors given some point.

    :param lng: (float) longitude
    :param lat: (float) latitude
    :param k: (int) number of results to return
    :returns: (list) of nearest k neighbors
    """
    # Convert lng-lat to geojson point
    point = "'" + json.dumps({
        'type': 'Point',
        'coordinates': [lng, lat]
    }) + "'"

    # How many to limit the initial bounding box query to
    k_10 = k * 10

    # Based off snippet provided on pg 253 of PostGIS In Action (2nd Edition)
    query = """
    WITH bbox_results AS (
      SELECT
        node,
        location,
        array_agg(sensor) AS sensors,
        (SELECT ST_SetSRID(ST_GeomFromGeoJSON({geojson}), 4326)) AS ref_geom
      FROM
        sensor__node_metadata JOIN sensor__sensor_to_node
        ON id=node
      WHERE
          sensor_network = '{network}'
      GROUP BY
        node,
        location
      ORDER BY
        location <#> (SELECT ST_SetSRID(ST_GeomFromGeoJSON ({geojson}), 4326))
      LIMIT {k_10}
    )

    SELECT
      node,
      RANK() OVER(ORDER BY ST_Distance(location, ref_geom)) AS act_r
    FROM bbox_results
    WHERE
      sensors && '{sensors}'::VARCHAR[]
    ORDER BY act_r
    LIMIT {k};
    """.format(
        geojson=point,
        network=network,
        k=k,
        k_10=k_10,
        sensors='{' + ','.join(sensors) + '}'
    )

    return postgres_engine.execute(query).fetchall()
コード例 #8
0
ファイル: shape.py プロジェクト: UrbanCCD-UChicago/plenario
    def add(self):
        staging_name = 'staging_{}'.format(self.table_name)

        with ETLFile(self.source_path, self.source_url, interpret_as='bytes') as file_helper:
            handle = open(file_helper.handle.name, "rb")
            with zipfile.ZipFile(handle) as shapefile_zip:
                import_shapefile(shapefile_zip, staging_name)
                add_unique_hash(staging_name)

        try:
            postgres_engine.execute('drop table {}'.format(self.table_name))
        except ProgrammingError:
            pass

        rename_table = 'alter table {} rename to {}'
        rename_table = rename_table.format(staging_name, self.table_name)
        postgres_engine.execute(rename_table)
        
        self.meta.update_after_ingest()
        postgres_session.commit()
コード例 #9
0
    def __enter__(self):
        """
        Add a table (prefixed with n_) to the database
        with one record for each record found in the staging table
        with a hash not present in the existing table.
        If there are no such records, do nothing.
        """

        # create n_table with point_date, geom, and id columns
        s = self.staging
        e = self.existing
        d = self.dataset

        derived_dates = func.cast(s.c[d.date], TIMESTAMP).label('point_date')
        derived_geoms = self._geom_col()
        cols_to_insert = [s.c['hash'], derived_dates, derived_geoms]

        # Select the hash and the columns we're deriving from the staging table.
        sel = select(cols_to_insert)
        # And limit our results to records
        # whose hashes aren't already present in the existing table.
        sel = sel.select_from(s.outerjoin(e, s.c['hash'] == e.c['hash'])).\
            where(e.c['hash'] == None)

        # Drop the table first out of healthy paranoia
        self._drop()
        try:
            self.table.create(bind=postgres_engine)
        except Exception as e:
            raise PlenarioETLError(repr(e) +
                                   '\nCould not create table n_' + d.name)

        ins = self.table.insert().from_select(cols_to_insert, sel)
        # Populate it with records from our select statement.
        try:
            postgres_engine.execute(ins)
        except Exception as e:
            raise PlenarioETLError(repr(e) + '\n' + str(sel))
        else:
            # Would be nice to check if we have new records or not right here.
            return self
コード例 #10
0
def knn(lng, lat, k, network, sensors):
    """Execute a spatial query to select k nearest neighbors given some point.

    :param lng: (float) longitude
    :param lat: (float) latitude
    :param k: (int) number of results to return
    :returns: (list) of nearest k neighbors
    """
    # Convert lng-lat to geojson point
    point = "'" + json.dumps({
        'type': 'Point',
        'coordinates': [lng, lat]
    }) + "'"

    # How many to limit the initial bounding box query to
    k_10 = k * 10

    # Based off snippet provided on pg 253 of PostGIS In Action (2nd Edition)
    query = """
    WITH bbox_results AS (
      SELECT
        node,
        location,
        array_agg(sensor) AS sensors,
        (SELECT ST_SetSRID(ST_GeomFromGeoJSON({geojson}), 4326)) AS ref_geom
      FROM
        sensor__node_metadata JOIN sensor__sensor_to_node
        ON id=node
      WHERE
          sensor_network = '{network}'
      GROUP BY
        node,
        location
      ORDER BY
        location <#> (SELECT ST_SetSRID(ST_GeomFromGeoJSON ({geojson}), 4326))
      LIMIT {k_10}
    )

    SELECT
      node,
      RANK() OVER(ORDER BY ST_Distance(location, ref_geom)) AS act_r
    FROM bbox_results
    WHERE
      sensors && '{sensors}'::VARCHAR[]
    ORDER BY act_r
    LIMIT {k};
    """.format(geojson=point,
               network=network,
               k=k,
               k_10=k_10,
               sensors='{' + ','.join(sensors) + '}')

    return postgres_engine.execute(query).fetchall()
コード例 #11
0
ファイル: weather_metar.py プロジェクト: vforgione/plenario
def wban2CallSign(wban_code):
    sql = "SELECT call_sign FROM weather_stations where wban_code='%s'" % wban_code
    result = engine.execute(sql)
    # print "result=", result
    x = result.first()
    cs = None
    if x:
        cs = x[0]
        # print "wban=", wban
    else:
        print(("could not find wban:", wban_code))
    return cs
コード例 #12
0
ファイル: shape.py プロジェクト: vforgione/plenario
    def add(self):
        staging_name = 'staging_{}'.format(self.table_name)

        with ETLFile(self.source_path, self.source_url,
                     interpret_as='bytes') as file_helper:
            handle = open(file_helper.handle.name, "rb")
            with zipfile.ZipFile(handle) as shapefile_zip:
                import_shapefile(shapefile_zip, staging_name)
                add_unique_hash(staging_name)

        try:
            postgres_engine.execute('drop table {}'.format(self.table_name))
        except ProgrammingError:
            pass

        rename_table = 'alter table {} rename to {}'
        rename_table = rename_table.format(staging_name, self.table_name)
        postgres_engine.execute(rename_table)

        self.meta.update_after_ingest()
        postgres_session.commit()
コード例 #13
0
ファイル: test_point.py プロジェクト: vforgione/plenario
    def test_update_with_change(self):
        drop_if_exists(self.unloaded_meta.dataset_name)

        etl = PlenarioETL(self.unloaded_meta, source_path=self.radio_path)
        table = etl.add()

        changed_path = os.path.join(fixtures_path, 'community_radio_events_changed.csv')
        etl = PlenarioETL(self.unloaded_meta, source_path=changed_path)
        etl.update()

        sel = sa.select([table.c.date]).where(table.c.event_name == 'baz')
        changed_date = postgres_engine.execute(sel).fetchone()[0]
        self.assertEqual(changed_date, date(1993, 11, 10))
コード例 #14
0
 def test_update(self):
     # Try to ingest slightly changed shape
     fixture = shape_fixtures['changed_neighborhoods']
     # Add the fixture to the registry first
     shape_meta = postgres_session.query(ShapeMetadata).get('chicago_neighborhoods')
     # Do a ShapeETL update
     ShapeETL(meta=shape_meta, source_path=fixture.path).update()
     t = shape_meta.shape_table
     sel = t.select().where(t.c['sec_neigh'] == 'ENGLEWOOD')
     res = engine.execute(sel).fetchall()
     altered_value = res[0]['pri_neigh']
     # I changed Englewood to Englerwood :P
     self.assertEqual(altered_value, 'Englerwood')
コード例 #15
0
ファイル: test_shape.py プロジェクト: vforgione/plenario
 def test_update(self):
     # Try to ingest slightly changed shape
     fixture = shape_fixtures['changed_neighborhoods']
     # Add the fixture to the registry first
     shape_meta = postgres_session.query(ShapeMetadata).get(
         'chicago_neighborhoods')
     # Do a ShapeETL update
     ShapeETL(meta=shape_meta, source_path=fixture.path).update()
     t = shape_meta.shape_table
     sel = t.select().where(t.c['sec_neigh'] == 'ENGLEWOOD')
     res = engine.execute(sel).fetchall()
     altered_value = res[0]['pri_neigh']
     # I changed Englewood to Englerwood :P
     self.assertEqual(altered_value, 'Englerwood')
コード例 #16
0
    def insert(self):
        """
        Join with the staging table
        to insert complete records into existing table.
        """
        derived_cols = [c for c in self.table.c
                        if c.name in {'geom', 'point_date'}]
        staging_cols = [c for c in self.staging.c]
        sel_cols = staging_cols + derived_cols

        sel = select(sel_cols).where(self.staging.c.hash == self.table.c.hash)
        ins = self.existing.insert().from_select(sel_cols, sel)

        try:
            postgres_engine.execute(ins)
        except Exception as e:
            raise PlenarioETLError(repr(e) +
                                   '\n Failed on statement: ' + str(ins))
        try:
            _null_malformed_geoms(self.existing)
        except Exception as e:
            raise PlenarioETLError(repr(e) +
                        '\n Failed to null out geoms with (0,0) geocoding')
コード例 #17
0
def _null_malformed_geoms(existing):
    # We decide to set the geom to NULL when the given lon/lat is (0,0)
    # (off the coast of Africa).
    upd = existing.update().values(geom=None).\
        where(existing.c.geom == select([func.ST_SetSRID(func.ST_MakePoint(0, 0), 4326)]))
    postgres_engine.execute(upd)
コード例 #18
0
 def _add_trigger(self):
     add_trigger = """CREATE TRIGGER audit_after AFTER DELETE OR UPDATE
                      ON "{table}"
                      FOR EACH ROW EXECUTE PROCEDURE audit.if_modified()""".\
                   format(table=self.dataset.name)
     postgres_engine.execute(add_trigger)
コード例 #19
0
ファイル: weather_metar.py プロジェクト: vforgione/plenario
def all_callSigns():
    sql = "SELECT call_sign FROM weather_stations ORDER BY call_sign"
    result = engine.execute(sql)
    return [x[0] for x in result.fetchall()]
コード例 #20
0
 def _drop(self):
     postgres_engine.execute("DROP TABLE IF EXISTS {};".format(self.name))
コード例 #21
0
ファイル: test_point.py プロジェクト: vforgione/plenario
def drop_meta(table_name):
    del_ = "DELETE FROM meta_master WHERE dataset_name = '{}';".format(table_name)
    postgres_engine.execute(del_)