Esempio n. 1
0
    def timeseries_all(cls, table_names, agg_unit, start, end, geom=None):
        # For each table in table_names, generate a query to be unioned
        selects = []
        for name in table_names:
            table = cls.get_by_dataset_name(name)
            ts_select = table.timeseries(agg_unit, start, end, geom)
            selects.append(ts_select)

        # Union the time series selects to get a panel
        panel_query = sa.union(*selects)\
                        .order_by('dataset_name')\
                        .order_by('time_bucket')
        panel_vals = session.execute(panel_query)

        panel = []
        for dataset_name, ts in groupby(panel_vals,
                                        lambda row: row.dataset_name):

            # ts gets closed after it's been iterated over once,
            # so we need to store the rows somewhere to iterate over them twice.
            rows = [row for row in ts]
            # If no records were found, don't include this dataset
            if all([row.count == 0 for row in rows]):
                continue

            ts_dict = {'dataset_name': dataset_name, 'items': []}

            for row in rows:
                ts_dict['items'].append({
                    'datetime': row.time_bucket.date(),  # UTC time
                    'count': row.count
                })
            panel.append(ts_dict)

        return panel
Esempio n. 2
0
    def timeseries_all(cls, table_names, agg_unit, start, end, geom=None):
        # For each table in table_names, generate a query to be unioned
        selects = []
        for name in table_names:
            table = cls.get_by_dataset_name(name)
            ts_select = table.timeseries(agg_unit, start, end, geom)
            selects.append(ts_select)

        # Union the time series selects to get a panel
        panel_query = sa.union(*selects)\
                        .order_by('dataset_name')\
                        .order_by('time_bucket')
        panel_vals = session.execute(panel_query)

        panel = []
        for dataset_name, ts in groupby(panel_vals, lambda row: row.dataset_name):

            # ts gets closed after it's been iterated over once,
            # so we need to store the rows somewhere to iterate over them twice.
            rows = [row for row in ts]
            # If no records were found, don't include this dataset
            if all([row.count == 0 for row in rows]):
                continue

            ts_dict = {'dataset_name': dataset_name,
                       'items': []}

            for row in rows:
                ts_dict['items'].append({
                    'datetime': row.time_bucket.date(),  # UTC time
                    'count':    row.count
                })
            panel.append(ts_dict)

        return panel
Esempio n. 3
0
    def make_grid(self, resolution, geom=None, conditions=None):
        """
        :param resolution: length of side of grid square in meters
        :type resolution: int
        :param geom: string representation of geojson fragment
        :type geom: str
        :param conditions: conditions on columns to filter on
        :type conditions: list of SQLAlchemy binary operations
                          (e.g. col > value)
        :return: grid: result proxy with all result rows
                 size_x, size_y: the horizontal and vertical size
                                    of the grid squares in degrees
        """
        if conditions is None:
            conditions = []

        # We need to convert resolution (given in meters) to degrees
        # - which is the unit of measure for EPSG 4326 -
        # - in order to generate our grid.
        center = self.get_bbox_center()
        # center[1] is longitude
        size_x, size_y = get_size_in_degrees(resolution, center[1])

        # Generate a count for each resolution by resolution square
        t = self.point_table
        q = session.query(func.count(t.c.hash),
                          func.ST_SnapToGrid(t.c.geom, size_x, size_y)
                          .label('squares'))\
            .filter(*conditions)\
            .group_by('squares')

        if geom:
            q = q.filter(t.c.geom.ST_Within(func.ST_GeomFromGeoJSON(geom)))

        return session.execute(q), size_x, size_y
Esempio n. 4
0
    def test_insert_data(self):
        etl = PlenarioETL(self.existing_meta, source_path=self.dog_path)
        etl.update()

        existing = self.existing_table
        all_rows = session.execute(existing.select()).fetchall()
        self.assertEqual(len(all_rows), 5)
Esempio n. 5
0
    def make_grid(self, resolution, geom=None, conditions=None):
        """
        :param resolution: length of side of grid square in meters
        :type resolution: int
        :param geom: string representation of geojson fragment
        :type geom: str
        :param conditions: conditions on columns to filter on
        :type conditions: list of SQLAlchemy binary operations
                          (e.g. col > value)
        :return: grid: result proxy with all result rows
                 size_x, size_y: the horizontal and vertical size
                                    of the grid squares in degrees
        """
        if conditions is None:
            conditions = []

        # We need to convert resolution (given in meters) to degrees
        # - which is the unit of measure for EPSG 4326 -
        # - in order to generate our grid.
        center = self.get_bbox_center()
        # center[1] is longitude
        size_x, size_y = get_size_in_degrees(resolution, center[1])

        # Generate a count for each resolution by resolution square
        t = self.point_table
        q = session.query(func.count(t.c.hash),
                          func.ST_SnapToGrid(t.c.geom, size_x, size_y)
                          .label('squares'))\
            .filter(*conditions)\
            .group_by('squares')

        if geom:
            q = q.filter(t.c.geom.ST_Within(func.ST_GeomFromGeoJSON(geom)))

        return session.execute(q), size_x, size_y
Esempio n. 6
0
 def _get_num_shapes(self):
     table = self.shape_table
     # Arbitrarily select the first column of the table to count against
     count_query = select([func.count(table.c.geom)])
     # Should return only one row.
     # And we want the 0th and only attribute of that row (the count).
     return session.execute(count_query).fetchone()[0]
Esempio n. 7
0
 def test_staging_new_table(self):
     # For the entry in MetaTable without a table, create a staging table.
     # We'll need to read from a fixture csv.
     with Staging(self.unloaded_meta,
                  source_path=self.radio_path) as s_table:
         all_rows = session.execute(s_table.table.select()).fetchall()
     self.assertEqual(len(all_rows), 5)
Esempio n. 8
0
    def test_insert_data(self):
        etl = PlenarioETL(self.existing_meta, source_path=self.dog_path)
        etl.update()

        existing = self.existing_table
        all_rows = session.execute(existing.select()).fetchall()
        self.assertEqual(len(all_rows), 5)
Esempio n. 9
0
 def _get_num_shapes(self):
     table = self.shape_table
     # Arbitrarily select the first column of the table to count against
     count_query = select([func.count(table.c.geom)])
     # Should return only one row.
     # And we want the 0th and only attribute of that row (the count).
     return session.execute(count_query).fetchone()[0]
Esempio n. 10
0
    def timeseries_one(self, agg_unit, start, end, geom=None, column_filters=None):
        ts_select = self.timeseries(agg_unit, start, end, geom, column_filters)
        rows = session.execute(ts_select.order_by('time_bucket'))

        header = [['count', 'datetime']]
        # Discard the name attribute.
        rows = [[count, time_bucket.date()] for _, time_bucket, count in rows]
        return header + rows
Esempio n. 11
0
    def get_metadata_with_etl_result(cls, table_name):
        query = '''
            SELECT meta.*, celery.status, celery.traceback, celery.date_done
            FROM meta_shape as meta
            LEFT JOIN celery_taskmeta as celery
            ON celery.task_id = meta.celery_task_id
            WHERE meta.dataset_name='{}';
        '''.format(table_name)

        metadata = session.execute(query).first()
        return metadata
Esempio n. 12
0
    def test_update_with_delete(self):
        etl = PlenarioETL(self.existing_meta, source_path=self.dog_path)
        etl.update()

        # The same source CSV, but with one less record
        deleted_path = os.path.join(fixtures_path, 'dog_park_permits_deleted.csv')
        etl = PlenarioETL(self.existing_meta, source_path=deleted_path)
        etl.update()

        all_rows = session.execute(self.existing_table.select()).fetchall()
        self.assertEqual(len(all_rows), 4)
Esempio n. 13
0
    def get_metadata_with_etl_result(cls, table_name):
        query = '''
            SELECT meta.*, celery.status, celery.traceback, celery.date_done
            FROM meta_shape as meta
            LEFT JOIN celery_taskmeta as celery
            ON celery.task_id = meta.celery_task_id
            WHERE meta.dataset_name='{}';
        '''.format(table_name)

        metadata = session.execute(query).first()
        return metadata
Esempio n. 14
0
    def test_update_with_delete(self):
        etl = PlenarioETL(self.existing_meta, source_path=self.dog_path)
        etl.update()

        # The same source CSV, but with one less record
        deleted_path = os.path.join(fixtures_path,
                                    'dog_park_permits_deleted.csv')
        etl = PlenarioETL(self.existing_meta, source_path=deleted_path)
        etl.update()

        all_rows = session.execute(self.existing_table.select()).fetchall()
        self.assertEqual(len(all_rows), 4)
Esempio n. 15
0
    def get_all_with_etl_status(cls):
        """
        :return: Every row of meta_shape joined with celery task status.
        """
        shape_query = '''
            SELECT meta.*, celery.status
            FROM meta_shape as meta
            LEFT JOIN celery_taskmeta as celery
            ON celery.task_id = meta.celery_task_id
            WHERE meta.approved_status = TRUE;
        '''

        return list(session.execute(shape_query))
Esempio n. 16
0
    def get_all_with_etl_status(cls):
        """
        :return: Every row of meta_shape joined with celery task status.
        """
        shape_query = '''
            SELECT meta.*, celery.status
            FROM meta_shape as meta
            LEFT JOIN celery_taskmeta as celery
            ON celery.task_id = meta.celery_task_id
            WHERE meta.approved_status = TRUE;
        '''

        return list(session.execute(shape_query))
Esempio n. 17
0
    def test_location_col_add(self):
        drop_if_exists(self.opera_meta.dataset_name)

        etl = PlenarioETL(self.opera_meta, source_path=self.opera_path)
        new_table = etl.add()

        all_rows = session.execute(new_table.select()).fetchall()
        self.assertEqual(len(all_rows), 5)
        session.close()
        new_table.drop(app_engine, checkfirst=True)

        # Did we add a bbox?
        bbox = MetaTable.get_by_dataset_name('public_opera_performances').bbox
        self.assertIsNotNone(bbox)
Esempio n. 18
0
    def test_new_table(self):
        drop_if_exists(self.unloaded_meta.dataset_name)

        etl = PlenarioETL(self.unloaded_meta, source_path=self.radio_path)
        new_table = etl.add()

        all_rows = session.execute(new_table.select()).fetchall()
        self.assertEqual(len(all_rows), 5)
        session.close()
        new_table.drop(app_engine, checkfirst=True)

        # Did we add a bbox?
        bbox = MetaTable.get_by_dataset_name('community_radio_events').bbox
        self.assertIsNotNone(bbox)
Esempio n. 19
0
    def test_location_col_add(self):
        drop_if_exists(self.opera_meta.dataset_name)

        etl = PlenarioETL(self.opera_meta, source_path=self.opera_path)
        new_table = etl.add()

        all_rows = session.execute(new_table.select()).fetchall()
        self.assertEqual(len(all_rows), 5)
        session.close()
        new_table.drop(app_engine, checkfirst=True)

        # Did we add a bbox?
        bbox = MetaTable.get_by_dataset_name('public_opera_performances').bbox
        self.assertIsNotNone(bbox)
Esempio n. 20
0
    def test_new_table(self):
        drop_if_exists(self.unloaded_meta.dataset_name)

        etl = PlenarioETL(self.unloaded_meta, source_path=self.radio_path)
        new_table = etl.add()

        all_rows = session.execute(new_table.select()).fetchall()
        self.assertEqual(len(all_rows), 5)
        session.close()
        new_table.drop(app_engine, checkfirst=True)

        # Did we add a bbox?
        bbox = MetaTable.get_by_dataset_name('community_radio_events').bbox
        self.assertIsNotNone(bbox)
Esempio n. 21
0
    def add_intersections_to_index(listing, geom):
        # For each dataset_name in the listing,
        # get a count of intersections
        # and replace num_geoms

        for row in listing:
            name = row['dataset_name']
            num_intersections_query = '''
            SELECT count(g.geom) as num_geoms
            FROM "{dataset_name}" as g
            WHERE ST_Intersects(g.geom, ST_GeomFromGeoJSON('{geojson_fragment}'))
            '''.format(dataset_name=name, geojson_fragment=geom)

            num_intersections = session.execute(num_intersections_query)\
                                       .first().num_geoms
            row['num_shapes'] = num_intersections

        intersecting_rows = [row for row in listing if row['num_shapes'] > 0]
        return intersecting_rows
Esempio n. 22
0
    def add_intersections_to_index(listing, geom):
        # For each dataset_name in the listing,
        # get a count of intersections
        # and replace num_geoms

        for row in listing:
            name = row['dataset_name']
            num_intersections_query = '''
            SELECT count(g.geom) as num_geoms
            FROM "{dataset_name}" as g
            WHERE ST_Intersects(g.geom, ST_GeomFromGeoJSON('{geojson_fragment}'))
            '''.format(dataset_name=name, geojson_fragment=geom)

            num_intersections = session.execute(num_intersections_query)\
                                       .first().num_geoms
            row['num_shapes'] = num_intersections

        intersecting_rows = [row for row in listing if row['num_shapes'] > 0]
        return intersecting_rows
Esempio n. 23
0
    def timeseries_all(cls, table_names, agg_unit, start, end, geom=None, ctrees=None):
        # For each table in table_names, generate a query to be unioned
        selects = []
        for name in table_names:

            # If we have condition trees specified, apply them.
            # .get will return None for those datasets who don't have filters
            ctree = ctrees.get(name) if ctrees else None
            table = cls.get_by_dataset_name(name)
            ts_select = table.timeseries(agg_unit, start, end, geom, ctree)
            selects.append(ts_select)

        # Union the time series selects to get a panel
        panel_query = sa.union(*selects)\
                        .order_by('dataset_name')\
                        .order_by('time_bucket')
        panel_vals = session.execute(panel_query)

        panel = []
        for dataset_name, ts in groupby(panel_vals, lambda row: row.dataset_name):

            # ts gets closed after it's been iterated over once,
            # so we need to store the rows somewhere to iterate over them twice.
            rows = [row for row in ts]
            # If no records were found, don't include this dataset
            if all([row.count == 0 for row in rows]):
                continue

            ts_dict = {'dataset_name': dataset_name,
                       'items': []}

            for row in rows:
                ts_dict['items'].append({
                    'datetime': row.time_bucket.date(),  # UTC time
                    'count':    row.count
                })
                # Aggregate top-level count across all time slices.
                ts_dict['count'] = sum([i['count'] for i in ts_dict['items']])
            panel.append(ts_dict)

        return panel
Esempio n. 24
0
 def test_staging_existing_table(self):
     # With a fixture CSV whose columns match the existing dataset,
     # create a staging table.
     with Staging(self.existing_meta, source_path=self.dog_path) as s_table:
         all_rows = session.execute(s_table.table.select()).fetchall()
     self.assertEqual(len(all_rows), 5)
Esempio n. 25
0
 def _make_bbox(self):
     bbox_query = 'SELECT ST_Envelope(ST_Union(geom)) FROM {};'.\
         format(self.dataset_name)
     box = session.execute(bbox_query).first().st_envelope
     return box
Esempio n. 26
0
 def remove_table(self):
     if self.is_ingested:
         drop = "DROP TABLE {};".format(self.dataset_name)
         session.execute(drop)
     session.delete(self)
Esempio n. 27
0
 def get_bbox_center(self):
     sel = select([func.ST_AsGeoJSON(func.ST_centroid(self.bbox))])
     result = session.execute(sel)
     # returns [lon, lat]
     return json.loads(result.first()[0])['coordinates']
Esempio n. 28
0
def drop_tables(table_names):
    drop_template = 'DROP TABLE IF EXISTS {};'
    command = ''.join(
        [drop_template.format(table_name) for table_name in table_names])
    session.execute(command)
    session.commit()
Esempio n. 29
0
 def _make_bbox(self):
     bbox_query = 'SELECT ST_Envelope(ST_Union(geom)) FROM {};'.\
         format(self.dataset_name)
     box = session.execute(bbox_query).first().st_envelope
     return box
Esempio n. 30
0
 def remove_table(self):
     if self.is_ingested:
         drop = "DROP TABLE {};".format(self.dataset_name)
         session.execute(drop)
     session.delete(self)
Esempio n. 31
0
 def test_staging_new_table(self):
     # For the entry in MetaTable without a table, create a staging table.
     # We'll need to read from a fixture csv.
     with Staging(self.unloaded_meta, source_path=self.radio_path) as s_table:
         all_rows = session.execute(s_table.table.select()).fetchall()
     self.assertEqual(len(all_rows), 5)
Esempio n. 32
0
 def get_bbox_center(self):
     sel = select([func.ST_AsGeoJSON(func.ST_centroid(self.bbox))])
     result = session.execute(sel)
     # returns [lon, lat]
     return json.loads(result.first()[0])['coordinates']
Esempio n. 33
0
def drop_tables(table_names):
    drop_template = 'DROP TABLE IF EXISTS {};'
    command = ''.join([drop_template.format(table_name) for table_name in table_names])
    session.execute(command)
    session.commit()
Esempio n. 34
0
 def test_staging_existing_table(self):
     # With a fixture CSV whose columns match the existing dataset,
     # create a staging table.
     with Staging(self.existing_meta, source_path=self.dog_path) as s_table:
         all_rows = session.execute(s_table.table.select()).fetchall()
     self.assertEqual(len(all_rows), 5)