def tearDownClass(cls):

        try:
            app_engine.execute("drop table roadworks")
            app_engine.execute("delete from meta_master where dataset_name = 'roadworks'")
        except ProgrammingError:
            pass
Beispiel #2
0
def find_intersecting_shapes(geojson):
    """
    Respond with all shape datasets that intersect with the geojson provided.
    Also include how many geom rows of the dataset intersect.
    :param geojson: URL encoded geojson.
    """
    fragment = make_fragment_str(extract_first_geometry_fragment(geojson))

    try:
        # First, do a bounding box check on all shape datasets.
        query = '''
        SELECT m.dataset_name
        FROM meta_shape as m
        WHERE m.bbox &&
            (SELECT ST_GeomFromGeoJSON('{geojson_fragment}'));
        '''.format(geojson_fragment=fragment)

        intersecting_datasets = engine.execute(query)
        bounding_box_intersection_names = [
            dataset.dataset_name for dataset in intersecting_datasets
        ]
    except Exception as e:
        print 'Error finding candidates'
        raise e

    try:
        # Then, for the candidates, get a count of the rows that intersect.
        response_objects = []
        for dataset_name in bounding_box_intersection_names:
            num_intersections_query = '''
            SELECT count(g.geom) as num_geoms
            FROM {dataset_name} as g
            WHERE ST_Intersects(g.geom, ST_GeomFromGeoJSON('{geojson_fragment}'))
            '''.format(dataset_name=dataset_name, geojson_fragment=fragment)

            num_intersections = engine.execute(num_intersections_query)\
                                      .first().num_geoms

            if num_intersections > 0:
                response_objects.append({
                    'dataset_name': dataset_name,
                    'num_geoms': num_intersections
                })

    except Exception as e:
        print 'Error narrowing candidates'
        raise e

    response_skeleton = {
        'meta': {
            'status': 'ok',
            'message': '',
        },
        'objects': response_objects
    }

    resp = make_response(json.dumps(response_skeleton), 200)
    return resp
Beispiel #3
0
    def setUp(self):
        session.rollback()
        # Ensure we have metadata loaded into the database
        # to mimic the behavior of metadata ingestion preceding file ingestion.
        drop_meta('dog_park_permits')
        drop_meta('community_radio_events')
        drop_meta('public_opera_performances')

        # Make new MetaTable objects
        self.unloaded_meta = MetaTable(url='nightvale.gov/events.csv',
                                       human_name='Community Radio Events',
                                       business_key='Event Name',
                                       observed_date='Date',
                                       latitude='lat',
                                       longitude='lon',
                                       approved_status=True)

        self.existing_meta = MetaTable(url='nightvale.gov/dogpark.csv',
                                       human_name='Dog Park Permits',
                                       business_key='Hooded Figure ID',
                                       observed_date='Date',
                                       latitude='lat',
                                       longitude='lon',
                                       approved_status=False)

        self.opera_meta = MetaTable(url='nightvale.gov/opera.csv',
                                    human_name='Public Opera Performances',
                                    business_key='Event Name',
                                    observed_date='Date',
                                    location='Location',
                                    approved_status=False)
        session.add_all(
            [self.existing_meta, self.opera_meta, self.unloaded_meta])
        session.commit()

        # Also, let's have one table pre-loaded...
        self.existing_table = sa.Table(
            'dog_park_permits', MetaData(), Column('hooded_figure_id',
                                                   Integer),
            Column('point_date', TIMESTAMP, nullable=False),
            Column('date', Date, nullable=True),
            Column('lat', Float, nullable=False),
            Column('lon', Float, nullable=False),
            Column('hash', String(32), primary_key=True),
            Column('geom', Geometry('POINT', srid=4326), nullable=True))
        drop_if_exists(self.existing_table.name)
        self.existing_table.create(bind=app_engine)

        # ... with some pre-existing data
        ins = self.existing_table.insert().values(
            hooded_figure_id=1,
            point_date=date(2015, 1, 2),
            lon=-87.6495076896,
            lat=41.7915865543,
            geom=None,
            hash='addde9be7f59e95fc08e54e29b2a947f')
        app_engine.execute(ins)
Beispiel #4
0
def find_intersecting_shapes(geojson):
    """
    Respond with all shape datasets that intersect with the geojson provided.
    Also include how many geom rows of the dataset intersect.
    :param geojson: URL encoded geojson.
    """
    fragment = make_fragment_str(extract_first_geometry_fragment(geojson))

    try:
        # First, do a bounding box check on all shape datasets.
        query = '''
        SELECT m.dataset_name
        FROM meta_shape as m
        WHERE m.bbox &&
            (SELECT ST_GeomFromGeoJSON('{geojson_fragment}'));
        '''.format(geojson_fragment=fragment)

        intersecting_datasets = engine.execute(query)
        bounding_box_intersection_names = [dataset.dataset_name for dataset in intersecting_datasets]
    except Exception as e:
        print 'Error finding candidates'
        raise e

    try:
        # Then, for the candidates, get a count of the rows that intersect.
        response_objects = []
        for dataset_name in bounding_box_intersection_names:
            num_intersections_query = '''
            SELECT count(g.geom) as num_geoms
            FROM {dataset_name} as g
            WHERE ST_Intersects(g.geom, ST_GeomFromGeoJSON('{geojson_fragment}'))
            '''.format(dataset_name=dataset_name, geojson_fragment=fragment)

            num_intersections = engine.execute(num_intersections_query)\
                                      .first().num_geoms

            if num_intersections > 0:
                response_objects.append({
                    'dataset_name': dataset_name,
                    'num_geoms': num_intersections
                })

    except Exception as e:
        print 'Error narrowing candidates'
        raise e

    response_skeleton = {
                'meta': {
                    'status': 'ok',
                    'message': '',
                },
                'objects': response_objects
            }

    resp = make_response(json.dumps(response_skeleton), 200)
    return resp
Beispiel #5
0
def delete_absent_hashes(staging_name, existing_name):
    del_ = """DELETE FROM "{existing}"
                  WHERE hash IN
                     (SELECT hash FROM "{existing}"
                        EXCEPT
                      SELECT hash FROM  "{staging}");""".\
            format(existing=existing_name, staging=staging_name)

    try:
        engine.execute(del_)
    except Exception as e:
        raise PlenarioETLError(repr(e) + '\n Failed to execute' + del_)
def table_exists(table_name):
    """Make an inexpensive query to the database. It the table does not exist,
    the query will cause a ProgrammingError.

    :param table_name: (string) table name
    :returns: (bool) true if the table exists, false otherwise"""

    try:
        app_engine.execute("select '{}'::regclass".format(table_name))
        return True
    except ProgrammingError:
        return False
Beispiel #7
0
    def setUp(self):
        session.rollback()
        # Ensure we have metadata loaded into the database
        # to mimic the behavior of metadata ingestion preceding file ingestion.
        drop_meta('dog_park_permits')
        drop_meta('community_radio_events')
        drop_meta('public_opera_performances')

        # Make new MetaTable objects
        self.unloaded_meta = MetaTable(url='nightvale.gov/events.csv',
                                      human_name='Community Radio Events',
                                      business_key='Event Name',
                                      observed_date='Date',
                                      latitude='lat', longitude='lon',
                                      approved_status=True)

        self.existing_meta = MetaTable(url='nightvale.gov/dogpark.csv',
                                      human_name='Dog Park Permits',
                                      business_key='Hooded Figure ID',
                                      observed_date='Date',
                                      latitude='lat', longitude='lon',
                                      approved_status=False)

        self.opera_meta = MetaTable(url='nightvale.gov/opera.csv',
                                   human_name='Public Opera Performances',
                                   business_key='Event Name',
                                   observed_date='Date',
                                   location='Location',
                                   approved_status=False)
        session.add_all([self.existing_meta, self.opera_meta, self.unloaded_meta])
        session.commit()

        # Also, let's have one table pre-loaded...
        self.existing_table = sa.Table('dog_park_permits', MetaData(),
                                      Column('hooded_figure_id', Integer),
                                      Column('point_date', TIMESTAMP, nullable=False),
                                      Column('date', Date, nullable=True),
                                      Column('lat', Float, nullable=False),
                                      Column('lon', Float, nullable=False),
                                       Column('hash', String(32), primary_key=True),
                                      Column('geom', Geometry('POINT', srid=4326), nullable=True))
        drop_if_exists(self.existing_table.name)
        self.existing_table.create(bind=app_engine)

        # ... with some pre-existing data
        ins = self.existing_table.insert().values(hooded_figure_id=1,
                                                  point_date=date(2015, 1, 2),
                                                  lon=-87.6495076896,
                                                  lat=41.7915865543,
                                                  geom=None,
                                                  hash='addde9be7f59e95fc08e54e29b2a947f')
        app_engine.execute(ins)
Beispiel #8
0
    def _hash_update(staging, existing):
        delete_absent_hashes(staging.name, existing.name)

        # Select all records from staging
        # whose hashes are not present in existing.
        join_condition = staging.c['hash'] == existing.c['hash']
        sel = staging.select()\
            .select_from(staging.outerjoin(existing, join_condition)).\
            where(existing.c['hash'] == None)

        # Insert those into existing
        col_names = [col.name for col in existing.columns]
        ins = existing.insert().from_select(col_names, sel)
        try:
            engine.execute(ins)
        except Exception as e:
            raise PlenarioETLError(repr(e) + '\n' + str(sel))
Beispiel #9
0
    def _hash_update(staging, existing):
        delete_absent_hashes(staging.name, existing.name)

        # Select all records from staging
        # whose hashes are not present in existing.
        join_condition = staging.c['hash'] == existing.c['hash']
        sel = staging.select()\
            .select_from(staging.outerjoin(existing, join_condition)).\
            where(existing.c['hash'] == None)

        # Insert those into existing
        col_names = [col.name for col in existing.columns]
        ins = existing.insert().from_select(col_names, sel)
        try:
            engine.execute(ins)
        except Exception as e:
            raise PlenarioETLError(repr(e) + '\n' + str(sel))
Beispiel #10
0
    def test_location_col_update(self):
        drop_if_exists(self.opera_meta.dataset_name)

        self.opera_table = sa.Table(self.opera_meta.dataset_name, MetaData(),
                                    Column('event_name', String, primary_key=True),
                                    Column('date', Date, nullable=True),
                                    Column('location', String, nullable=False),
                                    Column('geom', Geometry('POINT', srid=4326), nullable=True),
                                    Column('point_date', TIMESTAMP, nullable=False))
        drop_if_exists(self.existing_table.name)
        self.opera_table.create(bind=app_engine)

        ins = self.opera_table.insert().values(event_name='quux',
                                               date=None,
                                               point_date=date(2015, 1, 2),
                                               location='(-87.6495076896,41.7915865543)',
                                               geom=None)
        app_engine.execute(ins)
Beispiel #11
0
    def test_location_col_update(self):
        drop_if_exists(self.opera_meta.dataset_name)

        self.opera_table = sa.Table(
            self.opera_meta.dataset_name, MetaData(),
            Column('event_name', String, primary_key=True),
            Column('date', Date, nullable=True),
            Column('location', String, nullable=False),
            Column('geom', Geometry('POINT', srid=4326), nullable=True),
            Column('point_date', TIMESTAMP, nullable=False))
        drop_if_exists(self.existing_table.name)
        self.opera_table.create(bind=app_engine)

        ins = self.opera_table.insert().values(
            event_name='quux',
            date=None,
            point_date=date(2015, 1, 2),
            location='(-87.6495076896,41.7915865543)',
            geom=None)
        app_engine.execute(ins)
Beispiel #12
0
    def __enter__(self):
        """
        Add a table (prefixed with n_) to the database
        with one record for each record found in the staging table
        with a hash not present in the existing table.
        If there are no such records, do nothing.
        """

        # create n_table with point_date, geom, and id columns
        s = self.staging
        e = self.existing
        d = self.dataset

        derived_dates = func.cast(s.c[d.date], TIMESTAMP).label('point_date')
        derived_geoms = self._geom_col()
        cols_to_insert = [s.c['hash'], derived_dates, derived_geoms]

        # Select the hash and the columns we're deriving from the staging table.
        sel = select(cols_to_insert)
        # And limit our results to records
        # whose hashes aren't already present in the existing table.
        sel = sel.select_from(s.outerjoin(e, s.c['hash'] == e.c['hash'])).\
            where(e.c['hash'] == None)

        # Drop the table first out of healthy paranoia
        self._drop()
        try:
            self.table.create(bind=engine)
        except Exception as e:
            raise PlenarioETLError(
                repr(e) + '\nCould not create table n_' + d.name)

        ins = self.table.insert().from_select(cols_to_insert, sel)
        # Populate it with records from our select statement.
        try:
            engine.execute(ins)
        except Exception as e:
            raise PlenarioETLError(repr(e) + '\n' + str(sel))
        else:
            # Would be nice to check if we have new records or not right here.
            return self
Beispiel #13
0
    def __enter__(self):
        """
        Add a table (prefixed with n_) to the database
        with one record for each record found in the staging table
        with a hash not present in the existing table.
        If there are no such records, do nothing.
        """

        # create n_table with point_date, geom, and id columns
        s = self.staging
        e = self.existing
        d = self.dataset

        derived_dates = func.cast(s.c[d.date], TIMESTAMP).label('point_date')
        derived_geoms = self._geom_col()
        cols_to_insert = [s.c['hash'], derived_dates, derived_geoms]

        # Select the hash and the columns we're deriving from the staging table.
        sel = select(cols_to_insert)
        # And limit our results to records
        # whose hashes aren't already present in the existing table.
        sel = sel.select_from(s.outerjoin(e, s.c['hash'] == e.c['hash'])).\
            where(e.c['hash'] == None)

        # Drop the table first out of healthy paranoia
        self._drop()
        try:
            self.table.create(bind=engine)
        except Exception as e:
            raise PlenarioETLError(repr(e) +
                                   '\nCould not create table n_' + d.name)

        ins = self.table.insert().from_select(cols_to_insert, sel)
        # Populate it with records from our select statement.
        try:
            engine.execute(ins)
        except Exception as e:
            raise PlenarioETLError(repr(e) + '\n' + str(sel))
        else:
            # Would be nice to check if we have new records or not right here.
            return self
def wban2CallSign(wban_code):
    sql = "SELECT call_sign FROM weather_stations where wban_code='%s'" % wban_code
    result = engine.execute(sql)
    #print "result=", result
    x = result.first()
    cs = None
    if x:
        cs = x[0]
        #print "wban=", wban
    else:
        print "could not find wban:", wban_code
    return cs
Beispiel #15
0
def wban2CallSign(wban_code):
    sql = "SELECT call_sign FROM weather_stations where wban_code='%s'" % wban_code
    result = engine.execute(sql)
    #print "result=", result
    x = result.first()
    cs = None
    if x:
        cs = x[0]
        #print "wban=", wban
    else:
        print "could not find wban:", wban_code
    return cs
Beispiel #16
0
    def test_update_with_change(self):
        drop_if_exists(self.unloaded_meta.dataset_name)

        etl = PlenarioETL(self.unloaded_meta, source_path=self.radio_path)
        table = etl.add()

        changed_path = os.path.join(fixtures_path, 'community_radio_events_changed.csv')
        etl = PlenarioETL(self.unloaded_meta, source_path=changed_path)
        etl.update()

        sel = sa.select([table.c.date]).where(table.c.event_name == 'baz')
        changed_date = app_engine.execute(sel).fetchone()[0]
        self.assertEqual(changed_date, date(1993, 11, 10))
Beispiel #17
0
 def test_update(self):
     # Try to ingest slightly changed shape
     fixture = fixtures['changed_neighborhoods']
     # Add the fixture to the registry first
     shape_meta = session.query(ShapeMetadata).get('chicago_neighborhoods')
     # Do a ShapeETL update
     ShapeETL(meta=shape_meta, source_path=fixture.path).update()
     t = shape_meta.shape_table
     sel = t.select().where(t.c['sec_neigh'] == 'ENGLEWOOD')
     res = engine.execute(sel).fetchall()
     altered_value = res[0]['pri_neigh']
     # I changed Englewood to Englerwood :P
     self.assertEqual(altered_value, 'Englerwood')
Beispiel #18
0
def add_unique_hash(table_name):
    """
    Adds an md5 hash column of the preexisting columns
    and removes duplicate rows from a table.
    :param table_name: Name of table to add hash to.
    """
    add_hash = '''
    DROP TABLE IF EXISTS temp;
    CREATE TABLE temp AS
      SELECT DISTINCT *,
             md5(CAST(("{table_name}".*)AS text))
                AS hash FROM "{table_name}";
    DROP TABLE "{table_name}";
    ALTER TABLE temp RENAME TO "{table_name}";
    ALTER TABLE "{table_name}" ADD PRIMARY KEY (hash);
    '''.format(table_name=table_name)

    try:
        engine.execute(add_hash)
    except Exception as e:
        raise PlenarioETLError(repr(e) +
                               '\n Failed to deduplicate with ' + add_hash)
Beispiel #19
0
 def test_update(self):
     # Try to ingest slightly changed shape
     fixture = fixtures['changed_neighborhoods']
     # Add the fixture to the registry first
     shape_meta = session.query(ShapeMetadata).get('chicago_neighborhoods')
     # Do a ShapeETL update
     ShapeETL(meta=shape_meta, source_path=fixture.path).update()
     t = shape_meta.shape_table
     sel = t.select().where(t.c['sec_neigh'] == 'ENGLEWOOD')
     res = engine.execute(sel).fetchall()
     altered_value = res[0]['pri_neigh']
     # I changed Englewood to Englerwood :P
     self.assertEqual(altered_value, 'Englerwood')
Beispiel #20
0
    def test_update_with_change(self):
        drop_if_exists(self.unloaded_meta.dataset_name)

        etl = PlenarioETL(self.unloaded_meta, source_path=self.radio_path)
        table = etl.add()

        changed_path = os.path.join(fixtures_path,
                                    'community_radio_events_changed.csv')
        etl = PlenarioETL(self.unloaded_meta, source_path=changed_path)
        etl.update()

        sel = sa.select([table.c.date]).where(table.c.event_name == 'baz')
        changed_date = app_engine.execute(sel).fetchone()[0]
        self.assertEqual(changed_date, date(1993, 11, 10))
Beispiel #21
0
def callSign2Wban(call_sign):
    sql = "SELECT wban_code FROM weather_stations where call_sign='%s'" % call_sign
    result = engine.execute(sql)
    #print "result=", result
    x = result.first()
    wban = None
    if x:
        wban = x[0]
        #print "wban=", wban
        #print "found call sign:", call_sign, "wban=", wban
        return wban
    else:
        #print "could not find call sign:", call_sign
        return None
Beispiel #22
0
    def insert(self):
        """
        Join with the staging table
        to insert complete records into existing table.
        """
        derived_cols = [c for c in self.table.c
                        if c.name in {'geom', 'point_date'}]
        staging_cols = [c for c in self.staging.c]
        sel_cols = staging_cols + derived_cols

        sel = select(sel_cols).where(self.staging.c.hash == self.table.c.hash)
        ins = self.existing.insert().from_select(sel_cols, sel)

        try:
            engine.execute(ins)
        except Exception as e:
            raise PlenarioETLError(repr(e) +
                                   '\n Failed on statement: ' + str(ins))
        try:
            _null_malformed_geoms(self.existing)
        except Exception as e:
            raise PlenarioETLError(repr(e) +
                        '\n Failed to null out geoms with (0,0) geocoding')
Beispiel #23
0
    def insert(self):
        """
        Join with the staging table
        to insert complete records into existing table.
        """
        derived_cols = [c for c in self.table.c
                        if c.name in {'geom', 'point_date'}]
        staging_cols = [c for c in self.staging.c]
        sel_cols = staging_cols + derived_cols

        sel = select(sel_cols).where(self.staging.c.hash == self.table.c.hash)
        ins = self.existing.insert().from_select(sel_cols, sel)

        try:
            engine.execute(ins)
        except Exception as e:
            raise PlenarioETLError(repr(e) +
                                   '\n Failed on statement: ' + str(ins))
        try:
            _null_malformed_geoms(self.existing)
        except Exception as e:
            raise PlenarioETLError(repr(e) +
                        '\n Failed to null out geoms with (0,0) geocoding')
Beispiel #24
0
def callSign2Wban(call_sign):
    sql = "SELECT wban_code FROM weather_stations where call_sign='%s'" % call_sign
    result = engine.execute(sql)
    #print "result=", result
    x = result.first()
    wban = None
    if x:
        wban = x[0]
        #print "wban=", wban
        #print "found call sign:", call_sign, "wban=", wban
        return wban
    else:
        #print "could not find call sign:", call_sign
        return None
Beispiel #25
0
def drop_meta(table_name):
    del_ = "DELETE FROM meta_master WHERE dataset_name = '{}';".format(table_name)
    app_engine.execute(del_)
Beispiel #26
0
def all_callSigns():
    sql = "SELECT call_sign FROM weather_stations ORDER by call_sign"
    result = engine.execute(sql)
    return [x[0] for x in result.fetchall()]
Beispiel #27
0
 def _add_trigger(self):
     add_trigger = """CREATE TRIGGER audit_after AFTER DELETE OR UPDATE
                      ON "{table}"
                      FOR EACH ROW EXECUTE PROCEDURE audit.if_modified()""".\
                   format(table=self.dataset.name)
     engine.execute(add_trigger)
Beispiel #28
0
def drop_tables(table_names):
    drop_template = "DROP TABLE IF EXISTS {};"
    command = "".join([drop_template.format(table_name) for table_name in table_names])
    engine.execute(command)
Beispiel #29
0
def drop_meta(table_name):
    del_ = "DELETE FROM meta_master WHERE dataset_name = '{}';".format(
        table_name)
    app_engine.execute(del_)
Beispiel #30
0
 def _drop(self):
     engine.execute("DROP TABLE IF EXISTS {};".format(self.name))
Beispiel #31
0
 def _drop(self):
     engine.execute("DROP TABLE IF EXISTS {};".format('s_' +
                                                      self.dataset.name))
def all_callSigns():
    sql = "SELECT call_sign FROM weather_stations ORDER by call_sign"
    result=engine.execute(sql)
    return [x[0] for x in result.fetchall()]
Beispiel #33
0
def _null_malformed_geoms(existing):
    # We decide to set the geom to NULL when the given lon/lat is (0,0)
    # (off the coast of Africa).
    upd = existing.update().values(geom=None).\
        where(existing.c.geom == select([func.ST_SetSRID(func.ST_MakePoint(0, 0), 4326)]))
    engine.execute(upd)
Beispiel #34
0
 def _drop(self):
     engine.execute("DROP TABLE IF EXISTS {};".format(self.name))
Beispiel #35
0
 def _add_trigger(self):
     add_trigger = """CREATE TRIGGER audit_after AFTER DELETE OR UPDATE
                      ON "{table}"
                      FOR EACH ROW EXECUTE PROCEDURE audit.if_modified()""".\
                   format(table=self.dataset.name)
     engine.execute(add_trigger)
Beispiel #36
0
def _null_malformed_geoms(existing):
    # We decide to set the geom to NULL when the given lon/lat is (0,0)
    # (off the coast of Africa).
    upd = existing.update().values(geom=None).\
        where(existing.c.geom == select([func.ST_SetSRID(func.ST_MakePoint(0, 0), 4326)]))
    engine.execute(upd)
Beispiel #37
0
 def _drop(self):
     engine.execute("DROP TABLE IF EXISTS {};"
                    .format('s_' + self.dataset.name))