def edit_shape(dataset_name): form = EditShapeForm() meta = session.query(ShapeMetadata).get(dataset_name) if form.validate_on_submit(): upd = { 'human_name': form.human_name.data, 'description': form.description.data, 'attribution': form.attribution.data, 'update_freq': form.update_freq.data, } session.query(ShapeMetadata)\ .filter(ShapeMetadata.dataset_name == meta.dataset_name)\ .update(upd) session.commit() if not meta.approved_status: approve_shape(dataset_name) flash('%s updated successfully!' % meta.human_name, 'success') return redirect(url_for('views.view_datasets')) else: pass context = { 'form': form, 'meta': meta, } return render_template('admin/edit-shape.html', **context)
def update_meta(metatable, table): """ After ingest/update, update the metatable registry to reflect table information. :param metatable: MetaTable instance to update. :param table: Table instance to update from. :returns: None """ metatable.update_date_added() metatable.obs_from, metatable.obs_to = session.query( func.min(table.c.point_date), func.max(table.c.point_date) ).first() metatable.bbox = session.query( func.ST_SetSRID( func.ST_Envelope(func.ST_Union(table.c.geom)), 4326 ) ).first()[0] session.add(metatable) try: session.commit() except: session.rollback() raise
def test_delete_shape(self): # Can we remove a shape that's fully ingested? city_meta = session.query(ShapeMetadata).get(fixtures['city'].table_name) self.assertIsNotNone(city_meta) city_meta.remove_table() session.commit() city_meta = session.query(ShapeMetadata).get(fixtures['city'].table_name) self.assertIsNone(city_meta) # Can we remove a shape that's only in the metadata? dummy_meta = session.query(ShapeMetadata).get(self.dummy_name) self.assertIsNotNone(dummy_meta) dummy_meta.remove_table() session.commit() dummy_meta = session.query(ShapeMetadata).get(self.dummy_name) self.assertIsNone(dummy_meta) # Add them back to return to original test state ShapeTests.ingest_fixture(fixtures['city']) ShapeMetadata.add(human_name=u'Dummy Name', source_url=None, update_freq='yearly', approved_status=False) session.commit()
def view_datasets(): datasets_pending = session.query(MetaTable)\ .filter(MetaTable.approved_status != 'true')\ .all() counts = { 'master_row_count': table_row_estimate('dat_master'), 'weather_daily_row_count': table_row_estimate('dat_weather_observations_daily'), 'weather_hourly_row_count': table_row_estimate('dat_weather_observations_hourly'), 'census_block_row_count': table_row_estimate('census_blocks'), } try: celery_table = Table('celery_taskmeta', Base.metadata, autoload=True, autoload_with=engine) q = text(''' SELECT m.*, c.status, c.task_id FROM meta_master AS m LEFT JOIN celery_taskmeta AS c ON c.id = ( SELECT id FROM celery_taskmeta WHERE task_id = ANY(m.result_ids) ORDER BY date_done DESC LIMIT 1 ) WHERE m.approved_status = 'true' ''') datasets = [] with engine.begin() as c: datasets = list(c.execute(q)) except NoSuchTableError, e: datasets = session.query(MetaTable)\ .filter(MetaTable.approved_status == 'true')\ .all()
def test_delete_shape(self): # Can we remove a shape that's fully ingested? city_meta = session.query(ShapeMetadata).get( fixtures['city'].table_name) self.assertIsNotNone(city_meta) city_meta.remove_table() session.commit() city_meta = session.query(ShapeMetadata).get( fixtures['city'].table_name) self.assertIsNone(city_meta) # Can we remove a shape that's only in the metadata? dummy_meta = session.query(ShapeMetadata).get(self.dummy_name) self.assertIsNotNone(dummy_meta) dummy_meta.remove_table() session.commit() dummy_meta = session.query(ShapeMetadata).get(self.dummy_name) self.assertIsNone(dummy_meta) # Add them back to return to original test state ShapeTests.ingest_fixture(fixtures['city']) ShapeMetadata.add(human_name=u'Dummy Name', source_url=None, update_freq='yearly', approved_status=False) session.commit()
def meta(): status_code = 200 resp = { 'meta': { 'status': 'ok', 'message': '', }, 'objects': [] } dataset_name = request.args.get('dataset_name') if dataset_name: metas = session.query(MetaTable)\ .filter(MetaTable.dataset_name == dataset_name) else: metas = session.query(MetaTable) metas=metas.filter(MetaTable.approved_status == 'true') for m in metas.all(): keys = m.as_dict() for e in METATABLE_KEYS_TO_EXCLUDE: del keys[e] resp['objects'].append(keys) resp['meta']['total'] = len(resp['objects']) resp = make_response(json.dumps(resp, default=dthandler), status_code) resp.headers['Content-Type'] = 'application/json' return resp
def edit_dataset(source_url_hash): form = EditDatasetForm() meta = session.query(MetaTable).get(source_url_hash) table = Table('dat_%s' % meta.dataset_name, Base.metadata, autoload=True, autoload_with=engine) fieldnames = table.columns.keys() if form.validate_on_submit(): upd = { 'human_name': form.human_name.data, 'description': form.description.data, 'attribution': form.attribution.data, 'obs_from': form.obs_from.data, 'obs_to': form.obs_to.data, 'update_freq': form.update_freq.data, 'business_key': form.business_key.data, 'latitude': form.latitude.data, 'longitude': form.longitude.data, 'location': form.location.data, 'observed_date': form.observed_date.data, } session.query(MetaTable)\ .filter(MetaTable.source_url_hash == meta.source_url_hash)\ .update(upd) session.commit() flash('%s updated successfully!' % meta.human_name, 'success') return redirect(url_for('views.view_datasets')) context = { 'form': form, 'meta': meta, 'fieldnames': fieldnames, } return render_template('edit-dataset.html', **context)
def edit_dataset(source_url_hash): form = EditDatasetForm() meta = session.query(MetaTable).get(source_url_hash) fieldnames = meta.column_names num_rows = 0 if meta.approved_status: try: table_name = meta.dataset_name table = Table(table_name, Base.metadata, autoload=True, autoload_with=engine) # Would prefer to just get the names from the metadata # without needing to reflect. fieldnames = table.columns.keys() pk_name = [p.name for p in table.primary_key][0] pk = table.c[pk_name] num_rows = session.query(pk).count() except sqlalchemy.exc.NoSuchTableError: # dataset has been approved, but perhaps still processing. pass if form.validate_on_submit(): upd = { 'human_name': form.human_name.data, 'description': form.description.data, 'attribution': form.attribution.data, 'update_freq': form.update_freq.data, 'latitude': form.latitude.data, 'longitude': form.longitude.data, 'location': form.location.data, 'observed_date': form.observed_date.data, } session.query(MetaTable)\ .filter(MetaTable.source_url_hash == meta.source_url_hash)\ .update(upd) session.commit() if not meta.approved_status: approve_dataset(source_url_hash) flash('%s updated successfully!' % meta.human_name, 'success') return redirect(url_for('views.view_datasets')) else: pass context = { 'form': form, 'meta': meta, 'fieldnames': fieldnames, 'num_rows': num_rows, } return render_template('admin/edit-dataset.html', **context)
def add_dataset(): dataset_info = {} errors = [] socrata_source = False url = "" dataset_id = None md = None if request.args.get('dataset_url'): url = request.args.get('dataset_url') (dataset_info, errors, socrata_source) = get_context_for_new_dataset(url) # populate contributor info from session user = session.query(User).get(flask_session['user_id']) dataset_info['contributor_name'] = user.name dataset_info['contributor_organization'] = 'Plenario Admin' dataset_info['contributor_email'] = user.email # check if dataset with the same URL has already been loaded dataset_id = md5(url).hexdigest() md = session.query(MetaTable).get(dataset_id) if md: errors.append( "A dataset with that URL has already been loaded: '%s'" % md.human_name) if request.method == 'POST' and not md: md = add_dataset_to_metatable(request, url, dataset_id, dataset_info, socrata_source, approved_status=True) json_data_types = None if ((not md.is_socrata_source) and md.contributed_data_types): json_data_types = json.loads(md.contributed_data_types) add_dataset_task.delay(md.source_url_hash, data_types=json_data_types) flash('%s added successfully!' % md.human_name, 'success') return redirect(url_for('views.view_datasets')) context = { 'dataset_info': dataset_info, 'errors': errors, 'socrata_source': socrata_source } return render_template('admin/add-dataset.html', **context)
def frequency_update(frequency): # hourly, daily, weekly, monthly, yearly md = session.query(MetaTable)\ .filter(MetaTable.update_freq == frequency).all() for m in md: update_dataset.delay(m.source_url_hash) md = session.query(ShapeMetadata)\ .filter(ShapeMetadata.update_freq == frequency)\ .filter(ShapeMetadata.is_ingested == True)\ .all() for m in md: update_shape.delay(m.dataset_name) return '%s update complete' % frequency
def index(cls, geom=None): # The attributes that we want to pass along as-is as_is_attr_names = [ 'dataset_name', 'human_name', 'date_added', 'attribution', 'description', 'update_freq', 'view_url', 'source_url', 'num_shapes', 'contributor_name', 'contributor_email', 'contributor_organization' ] as_is_attrs = [getattr(cls, name) for name in as_is_attr_names] # We need to apply some processing to the bounding box bbox = func.ST_AsGeoJSON(cls.bbox) attr_names = as_is_attr_names + ['bbox'] attrs = as_is_attrs + [bbox] result = session.query(*attrs).filter(cls.is_ingested) listing = [dict(zip(attr_names, row)) for row in result] for dataset in listing: dataset['date_added'] = str(dataset['date_added']) if geom: listing = cls.add_intersections_to_index(listing, geom) return listing
def form_detail_sql_query(validator, aggregate_points=False): dset = validator.dataset try: q = session.query(dset) if validator.conditions: q = q.filter(*validator.conditions) except Exception as e: return internal_error('Failed to construct column filters.', e) try: # Add time filters maker = FilterMaker(validator.vals, dataset=dset) q = q.filter(*maker.time_filters()) # Add geom filter, if provided geom = validator.get_geom() if geom is not None: geom_filter = maker.geom_filter(geom) q = q.filter(geom_filter) except Exception as e: return internal_error('Failed to construct time and geometry filters.', e) #if the query specified a shape dataset, add a join to the sql query with that dataset shape_table = validator.vals.get('shape') if shape_table != None: shape_columns = ['{}.{} as {}'.format(shape_table.name, col.name, col.name) for col in shape_table.c] if aggregate_points: q = q.from_self(shape_table).filter(dset.c.geom.ST_Intersects(shape_table.c.geom)).group_by(shape_table) else: q = q.join(shape_table, dset.c.geom.ST_Within(shape_table.c.geom)) #add columns from shape dataset to the select statement q = q.add_columns(*shape_columns) return q
def weather_stations(): #print "weather_stations()" raw_query_params = request.args.copy() #print "weather_stations(): raw_query_params=", raw_query_params stations_table = Table('weather_stations', Base.metadata, autoload=True, autoload_with=engine, extend_existing=True) valid_query, query_clauses, resp, status_code = make_query(stations_table,raw_query_params) if valid_query: resp['meta']['status'] = 'ok' base_query = session.query(stations_table) for clause in query_clauses: print "weather_stations(): filtering on clause", clause base_query = base_query.filter(clause) values = [r for r in base_query.all()] fieldnames = [f for f in stations_table.columns.keys()] for value in values: d = {f:getattr(value, f) for f in fieldnames} loc = str(value.location) d['location'] = loads(loc.decode('hex')).__geo_interface__ resp['objects'].append(d) resp['meta']['query'] = raw_query_params resp = make_response(json.dumps(resp, default=dthandler), status_code) resp.headers['Content-Type'] = 'application/json' return resp
def make_grid(self, resolution, geom=None, conditions=None): """ :param resolution: length of side of grid square in meters :type resolution: int :param geom: string representation of geojson fragment :type geom: str :param conditions: conditions on columns to filter on :type conditions: list of SQLAlchemy binary operations (e.g. col > value) :return: grid: result proxy with all result rows size_x, size_y: the horizontal and vertical size of the grid squares in degrees """ if conditions is None: conditions = [] # We need to convert resolution (given in meters) to degrees # - which is the unit of measure for EPSG 4326 - # - in order to generate our grid. center = self.get_bbox_center() # center[1] is longitude size_x, size_y = get_size_in_degrees(resolution, center[1]) # Generate a count for each resolution by resolution square t = self.point_table q = session.query(func.count(t.c.hash), func.ST_SnapToGrid(t.c.geom, size_x, size_y) .label('squares'))\ .filter(*conditions)\ .group_by('squares') if geom: q = q.filter(t.c.geom.ST_Within(func.ST_GeomFromGeoJSON(geom))) return session.execute(q), size_x, size_y
def index(cls, geom=None): # The attributes that we want to pass along as-is as_is_attr_names = ['dataset_name', 'human_name', 'date_added', 'attribution', 'description', 'update_freq', 'view_url', 'source_url', 'num_shapes'] as_is_attrs = [getattr(cls, name) for name in as_is_attr_names] # We need to apply some processing to the bounding box bbox = func.ST_AsGeoJSON(cls.bbox) attr_names = as_is_attr_names + ['bbox'] attrs = as_is_attrs + [bbox] result = session.query(*attrs).filter(cls.is_ingested) listing = [dict(zip(attr_names, row)) for row in result] for dataset in listing: dataset['date_added'] = str(dataset['date_added']) if geom: listing = cls.add_intersections_to_index(listing, geom) listing = cls._add_fields_to_index(listing) return listing
def approve_dataset(source_url_hash): # get the MetaTable row and change the approved_status and bounce back to view-datasets. meta = session.query(MetaTable).get(source_url_hash) json_data_types = None if ((not meta.is_socrata_source) and meta.contributed_data_types): json_data_types = json.loads(meta.contributed_data_types) add_dataset_task.delay(source_url_hash, data_types=json_data_types) upd = {'approved_status': 'true'} meta.approved_status = 'true' session.commit() # Email the user who submitted that their dataset has been approved. # email the response to somebody msg_body = """Hello %s,\r\n \r\n Your dataset has been approved and added to Plenar.io:\r\n \r\n %s\r\n \r\n It should appear on http://plenar.io within 24 hours.\r\n \r\n Thank you!\r\n The Plenario Team\r\n http://plenar.io""" % (meta.contributor_name, meta.human_name) send_mail(subject="Your dataset has been added to Plenar.io", recipient=meta.contributor_email, body=msg_body)
def dataset_fields(dataset_name): try: resp = json_response_base(None, [], query={'dataset_name': dataset_name}) status_code = 200 # get json and convert it to a dictionary columns = session.query(MetaTable.column_names)\ .filter(MetaTable.dataset_name == dataset_name)\ .first()[0] # return formatted list of column information resp['objects'] = [{ 'field_name': key, 'field_type': value } for key, value in columns.items()] resp = make_response(json.dumps(resp), status_code) except NoSuchTableError: error_msg = "'%s' is not a valid table name" % dataset_name resp = bad_request(error_msg) resp.headers['Content-Type'] = 'application/json' return resp
def admin_add_dataset(): user = session.query(User).get(flask_session['user_id']) context = {'is_admin': True, 'contributor_name': user.name, 'contributor_organization': 'Plenario Admin', 'contributor_email': user.email} return add(context)
def weather_stations(): #print "weather_stations()" raw_query_params = request.args.copy() #print "weather_stations(): raw_query_params=", raw_query_params stations_table = Table('weather_stations', Base.metadata, autoload=True, autoload_with=engine, extend_existing=True) valid_query, query_clauses, resp, status_code = make_query( stations_table, raw_query_params) if valid_query: resp['meta']['status'] = 'ok' base_query = session.query(stations_table) for clause in query_clauses: print "weather_stations(): filtering on clause", clause base_query = base_query.filter(clause) values = [r for r in base_query.all()] fieldnames = [f for f in stations_table.columns.keys()] for value in values: d = {f: getattr(value, f) for f in fieldnames} loc = str(value.location) d['location'] = loads(loc.decode('hex')).__geo_interface__ resp['objects'].append(d) resp['meta']['query'] = raw_query_params resp = make_response(json.dumps(resp, default=dthandler), status_code) resp.headers['Content-Type'] = 'application/json' return resp
def contrib_view(): dataset_info = {} errors = [] socrata_source = False url = "" dataset_id = None md = None if request.args.get('dataset_url'): url = request.args.get('dataset_url') (dataset_info, errors, socrata_source) = get_context_for_new_dataset(url) # check if dataset with the same URL has already been loaded dataset_id = md5(url).hexdigest() md = session.query(MetaTable).get(dataset_id) if md: errors.append("A dataset with that URL has already been loaded: '%s'" % md.human_name) if request.method == 'POST' and not md: md = add_dataset_to_metatable(request, url, dataset_id, dataset_info, socrata_source, approved_status=False) # email a confirmation to the submitter msg_body = """Hello %s,\r\n\r\n We received your recent dataset submission to Plenar.io:\r\n\r\n%s\r\n\r\n After we review it, we'll notify you when your data is loaded and available.\r\n\r\n Thank you!\r\nThe Plenario Team\r\nhttp://plenar.io""" % (request.form.get('contributor_name'), md.human_name) send_mail(subject="Your dataset has been submitted to Plenar.io", recipient=request.form.get('contributor_email'), body=msg_body) return redirect(url_for('views.contrib_thankyou')) context = {'dataset_info': dataset_info, 'form': request.form, 'errors': errors, 'socrata_source': socrata_source} return render_template('contribute.html', **context)
def approve_dataset(source_url_hash): # get the MetaTable row and change the approved_status and bounce back to view-datasets. meta = session.query(MetaTable).get(source_url_hash) json_data_types = None if ((not meta.is_socrata_source) and meta.contributed_data_types): json_data_types = json.loads(meta.contributed_data_types) add_dataset_task.delay(source_url_hash, data_types=json_data_types) upd = { 'approved_status': 'true' } meta.approved_status = 'true' session.commit() # Email the user who submitted that their dataset has been approved. # email the response to somebody msg_body = """Hello %s,\r\n \r\n Your dataset has been approved and added to Plenar.io:\r\n \r\n %s\r\n \r\n It should appear on http://plenar.io within 24 hours.\r\n \r\n Thank you!\r\n The Plenario Team\r\n http://plenar.io""" % (meta.contributor_name, meta.human_name) send_mail(subject="Your dataset has been added to Plenar.io", recipient=meta.contributor_email, body=msg_body)
def meta(): # Doesn't require a table lookup, # so no params passed on construction validator = ParamValidator() validator.set_optional('dataset_name', no_op_validator, None)\ .set_optional('location_geom__within', geom_validator, None)\ .set_optional('obs_date__ge', date_validator, None)\ .set_optional('obs_date__le', date_validator, None) err = validator.validate(request.args) if err: return bad_request(err) # Set up base select statement cols_to_return = [ 'human_name', 'dataset_name', 'source_url', 'view_url', 'obs_from', 'obs_to', 'date_added', 'last_update', 'update_freq', 'attribution', 'description', 'column_names' ] col_objects = [getattr(MetaTable, col) for col in cols_to_return] q = session.query(*col_objects) # What params did the user provide? dataset_name = validator.vals['dataset_name'] geom = validator.get_geom() start_date = validator.vals['obs_date__ge'] end_date = validator.vals['obs_date__le'] # Filter over datasets if user provides full date range or geom should_filter = geom or (start_date and end_date) if dataset_name: # If the user specified a name, don't try any filtering. # Just spit back that dataset's metadata. q = q.filter(MetaTable.dataset_name == dataset_name) elif should_filter: if geom: intersects = sa.func.ST_Intersects( sa.func.ST_GeomFromGeoJSON(geom), MetaTable.bbox) q = q.filter(intersects) if start_date and end_date: q = q.filter( sa.and_(MetaTable.obs_from < end_date, MetaTable.obs_to > start_date)) # Otherwise, just send back all the datasets metadata_records = [dict(zip(cols_to_return, row)) for row in q.all()] resp = json_response_base(validator, metadata_records) resp['meta']['total'] = len(resp['objects']) status_code = 200 resp = make_response(json.dumps(resp, default=dthandler), status_code) resp.headers['Content-Type'] = 'application/json' return resp
def approve_dataset(source_url_hash): # Approve it meta = session.query(MetaTable).get(source_url_hash) meta.approved_status = True session.commit() # Ingest it add_dataset_task.delay(source_url_hash) send_approval_email(meta.human_name, meta.contributor_name, meta.contributor_email)
def meta(): # Doesn't require a table lookup, # so no params passed on construction validator = ParamValidator() validator.set_optional('dataset_name', no_op_validator, None)\ .set_optional('location_geom__within', geom_validator, None)\ .set_optional('obs_date__ge', date_validator, None)\ .set_optional('obs_date__le', date_validator, None) err = validator.validate(request.args) if err: return bad_request(err) # Set up base select statement cols_to_return = ['human_name', 'dataset_name', 'source_url', 'view_url', 'obs_from', 'obs_to', 'date_added', 'last_update', 'update_freq', 'attribution', 'description', 'column_names'] col_objects = [getattr(MetaTable, col) for col in cols_to_return] q = session.query(*col_objects) # What params did the user provide? dataset_name = validator.vals['dataset_name'] geom = validator.get_geom() start_date = validator.vals['obs_date__ge'] end_date = validator.vals['obs_date__le'] # Filter over datasets if user provides full date range or geom should_filter = geom or (start_date and end_date) if dataset_name: # If the user specified a name, don't try any filtering. # Just spit back that dataset's metadata. q = q.filter(MetaTable.dataset_name == dataset_name) elif should_filter: if geom: intersects = sa.func.ST_Intersects(sa.func.ST_GeomFromGeoJSON(geom), MetaTable.bbox) q = q.filter(intersects) if start_date and end_date: q = q.filter(sa.and_(MetaTable.obs_from < end_date, MetaTable.obs_to > start_date)) # Otherwise, just send back all the datasets metadata_records = [dict(zip(cols_to_return, row)) for row in q.all()] resp = json_response_base(validator, metadata_records) resp['meta']['total'] = len(resp['objects']) status_code = 200 resp = make_response(json.dumps(resp, default=dthandler), status_code) resp.headers['Content-Type'] = 'application/json' return resp
def validate(self): rv = Form.validate(self) if not rv: return False existing_name = db_session.query(User)\ .filter(User.name == self.name.data).first() if existing_name: self.name.errors.append('Name is already registered') return False existing_email = db_session.query(User)\ .filter(User.email == self.email.data).first() if existing_email: self.email.errors.append('Email address is already registered') return False return True
def approve_shape(dataset_name): # Approve it meta = session.query(ShapeMetadata).get(dataset_name) meta.approved_status = True session.commit() # Ingest it add_shape_task.delay(dataset_name) send_approval_email(meta.human_name, meta.contributor_name, meta.contributor_email)
def detail_query(args, aggregate=False): meta_params = ('dataset', 'shapeset', 'data_type', 'geom', 'offset', 'limit') meta_vals = (args.data.get(k) for k in meta_params) dataset, shapeset, data_type, geom, offset, limit = meta_vals # If there aren't tree filters provided, a little formatting is needed # to make the general filters into an 'and' tree. if not has_tree_filters(args.data): # Creates an AND condition tree and adds it to args. args.data[dataset.name + '__filter'] = request_args_to_condition_tree( request_args=args.data, ignore=['shapeset'] ) # Sort out the filter conditions from the rest of the user arguments. filters = {k: v for k, v in args.data.items() if 'filter' in k} # Get upset if they specify more than a dataset and shapeset filter. if len(filters) > 2: return bad_request("Too many table filters provided.") # Query the point dataset. q = session.query(dataset) # If the user specified a geom, filter results to those within its shape. if geom: q = q.filter(dataset.c.geom.ST_Within( sqlalchemy.func.ST_GeomFromGeoJSON(geom) )) # Retrieve the filters and build conditions from them if they exist. point_ctree = filters.get(dataset.name + '__filter') # If the user specified point dataset filters, parse and apply them. if point_ctree: point_conditions = parse_tree(dataset, point_ctree) q = q.filter(point_conditions) # If a user specified a shape dataset, it was either through the /shapes # enpoint, which uses the aggregate result, or through the /detail endpoint # which uses the joined result. if shapeset is not None: if aggregate: q = q.from_self(shapeset).filter(dataset.c.geom.ST_Intersects(shapeset.c.geom)).group_by(shapeset) else: shape_columns = ['{}.{} as {}'.format(shapeset.name, col.name, col.name) for col in shapeset.c] q = q.join(shapeset, dataset.c.geom.ST_Within(shapeset.c.geom)) q = q.add_columns(*shape_columns) # If there's a filter specified for the shape dataset, apply those conditions. shape_ctree = filters.get(shapeset.name + '__filter') if shape_ctree: shape_conditions = parse_tree(shapeset, shape_ctree) q = q.filter(shape_conditions) return q
def set_shape(self, shape_dataset_name): shape_table_meta = session.query(ShapeMetadata).get(shape_dataset_name) if shape_table_meta: shape_table = shape_table_meta.shape_table self.cols += [ '{}.{}'.format(shape_table.name, key) for key in shape_table.columns.keys() ] self.vals['shape'] = shape_table
def admin_add_dataset(): user = session.query(User).get(flask_session['user_id']) context = { 'is_admin': True, 'contributor_name': user.name, 'contributor_organization': 'Plenario Admin', 'contributor_email': user.email } return add(context)
def edit_dataset(source_url_hash): form = EditDatasetForm() meta = session.query(MetaTable).get(source_url_hash) fieldnames = None num_rows = 0 num_weather_observations = 0 num_rows_w_censusblocks = 0 if (meta.approved_status == 'true'): try: table_name = 'dat_%s' % meta.dataset_name table = Table(table_name, Base.metadata, autoload=True, autoload_with=engine) fieldnames = table.columns.keys() pk_name = [p.name for p in table.primary_key][0] pk = table.c[pk_name] num_rows = session.query(pk).count() dat_master = Table('dat_master', Base.metadata, autoload=True, autoload_with=engine) sel = session.query(func.count(dat_master.c.master_row_id)).filter( and_(dat_master.c.dataset_name == meta.dataset_name, dat_master.c.dataset_row_id == pk, dat_master.c.weather_observation_id.isnot(None))) num_weather_observations = sel.first()[0] sel = session.query(func.count(dat_master.c.master_row_id)).filter( and_(dat_master.c.dataset_name == meta.dataset_name, dat_master.c.dataset_row_id == pk, dat_master.c.census_block.isnot(None))) num_rows_w_censusblocks = sel.first()[0] except sqlalchemy.exc.NoSuchTableError, e: # dataset has been approved, but perhaps still processing. pass
def weather(table): raw_query_params = request.args.copy() weather_table = Table('dat_weather_observations_%s' % table, Base.metadata, autoload=True, autoload_with=engine, extend_existing=True) stations_table = Table('weather_stations', Base.metadata, autoload=True, autoload_with=engine, extend_existing=True) valid_query, query_clauses, resp, status_code = make_query( weather_table, raw_query_params) if valid_query: resp['meta']['status'] = 'ok' base_query = session.query(weather_table, stations_table)\ .join(stations_table, weather_table.c.wban_code == stations_table.c.wban_code) for clause in query_clauses: base_query = base_query.filter(clause) base_query = base_query.order_by(weather_table.c.id.asc()) base_query = base_query.limit( RESPONSE_LIMIT) # returning the top 1000 records if raw_query_params.get('offset'): offset = raw_query_params['offset'] base_query = base_query.offset(int(offset)) values = [r for r in base_query.all()] weather_fields = weather_table.columns.keys() station_fields = stations_table.columns.keys() weather_data = {} station_data = {} for value in values: wd = {f: getattr(value, f) for f in weather_fields} sd = {f: getattr(value, f) for f in station_fields} if weather_data.get(value.wban_code): weather_data[value.wban_code].append(wd) else: weather_data[value.wban_code] = [wd] loc = str(value.location) sd['location'] = loads(loc.decode('hex')).__geo_interface__ station_data[value.wban_code] = sd for station_id in weather_data.keys(): d = { 'station_info': station_data[station_id], 'observations': weather_data[station_id], } resp['objects'].append(d) resp['meta']['total'] = sum( [len(r['observations']) for r in resp['objects']]) resp['meta']['query'] = raw_query_params resp = make_response(json.dumps(resp, default=dthandler), status_code) resp.headers['Content-Type'] = 'application/json' return resp
def meta(): status_code = 200 resp = { 'meta': { 'status': 'ok', 'message': '', }, 'objects': [] } dataset_name = request.args.get('dataset_name') if dataset_name: metas = session.query(MetaTable)\ .filter(MetaTable.dataset_name == dataset_name).all() else: metas = session.query(MetaTable).all() resp['objects'].extend([m.as_dict() for m in metas]) resp['meta']['total'] = len(resp['objects']) resp = make_response(json.dumps(resp, default=dthandler), status_code) resp.headers['Content-Type'] = 'application/json' return resp
def export_dataset_to_response(dataset_name, query=None): """ :param dataset_name: Name of shape dataset. Expected to be found in meta_shape table. :param query: Optional SQL query to be executed on shape dataset to filter results Expected query parameter: `data_type`. We expect it to be one of 'json', 'kml', or 'shapefile'. If none of these (or unspecified), return JSON. :return: response with geoJSON data and response code """ # Do we have this shape? shape_dataset = session.query(ShapeMetadata).get(dataset_name) if not (shape_dataset and shape_dataset.is_ingested): error_message = 'Could not find shape dataset {}'.format(dataset_name) return make_response(error_message, 404) # What file format does the user want it in? export_format = request.args.get('data_type') # json is default export type if not export_format: export_format = u'json' export_format = unicode.lower(export_format) # Make a filename that we are reasonably sure to be unique and not occupied by anyone else. sacrifice_file = tempfile.NamedTemporaryFile() export_path = sacrifice_file.name sacrifice_file.close() # Removes file from system. try: # Write to that filename OgrExport(export_format=export_format, table_name=dataset_name, export_path=export_path, query=query).write_file() # Dump it in the response with open(export_path, 'r') as to_export: resp = make_response(to_export.read(), 200) # Make the downloaded filename look nice resp.headers['Content-Type'] = _shape_format_to_content_header( export_format) disp_header = 'attachment; filename={name}.{ext}'.format( name=shape_dataset.human_name, ext=_shape_format_to_file_extension(export_format)) resp.headers['Content-Disposition'] = disp_header return resp except Exception as e: error_message = 'Failed to export shape dataset {}'.format( dataset_name) print repr(e) return make_response(error_message, 500) finally: # Don't leave that file hanging around. if os.path.isfile(export_path): os.remove(export_path)
def update_shape(self, table_name): # Associate the dataset with this celery task # so we can check on the task's status meta = session.query(ShapeMetadata).get(table_name) meta.celery_task_id = self.request.id session.commit() # Update the shapefile ShapeETL(meta=meta).update() return 'Finished updating shape dataset {} from {}.'.\ format(meta.dataset_name, meta.source_url)
def test_delete_shape(self): # Can we remove a shape that's fully ingested? city_meta = session.query(ShapeMetadata).get(fixtures["city"].table_name) self.assertIsNotNone(city_meta) city_meta.remove_table(caller_session=session) session.commit() city_meta = session.query(ShapeMetadata).get(fixtures["city"].table_name) self.assertIsNone(city_meta) # Can we remove a shape that's only in the metadata? dummy_meta = session.query(ShapeMetadata).get(self.dummy_name) self.assertIsNotNone(dummy_meta) dummy_meta.remove_table(caller_session=session) session.commit() dummy_meta = session.query(ShapeMetadata).get(self.dummy_name) self.assertIsNone(dummy_meta) # Add them back to return to original test state ShapeTests.ingest_fixture(fixtures["city"]) ShapeMetadata.add(caller_session=session, human_name=u"Dummy Name", source_url=None) session.commit()
def update_meta(metadata, table): """ After ingest/update, update the metadata registry to reflect :param metadata: :param table: """ metadata.update_date_added() metadata.obs_from, metadata.obs_to =\ session.query(func.min(table.c.point_date), func.max(table.c.point_date)).first() bbox = session.query( func.ST_SetSRID(func.ST_Envelope(func.ST_Union(table.c.geom)), 4326)).first()[0] metadata.bbox = bbox session.add(metadata) try: session.commit() except: session.rollback() raise
def view_datasets(): datasets_pending = session.query(MetaTable).\ filter(MetaTable.approved_status != True).\ all() shapes_pending = session.query(ShapeMetadata).\ filter(ShapeMetadata.approved_status != True).\ all() try: q = text(''' SELECT m.*, c.status, c.task_id FROM meta_master AS m LEFT JOIN celery_taskmeta AS c ON c.id = ( SELECT id FROM celery_taskmeta WHERE task_id = ANY(m.result_ids) ORDER BY date_done DESC LIMIT 1 ) WHERE m.approved_status = 'true' ''') with engine.begin() as c: datasets = list(c.execute(q)) except NoSuchTableError: datasets = session.query(MetaTable)\ .filter(MetaTable.approved_status == True)\ .all() try: shape_datasets = ShapeMetadata.get_all_with_etl_status() except NoSuchTableError: # If we can't find shape metadata, soldier on. shape_datasets = None return render_template('admin/view-datasets.html', datasets_pending=datasets_pending, shapes_pending=shapes_pending, datasets=datasets, shape_datasets=shape_datasets)
def test_update(self): # Try to ingest slightly changed shape fixture = fixtures['changed_neighborhoods'] # Add the fixture to the registry first shape_meta = session.query(ShapeMetadata).get('chicago_neighborhoods') # Do a ShapeETL update ShapeETL(meta=shape_meta, source_path=fixture.path).update() t = shape_meta.shape_table sel = t.select().where(t.c['sec_neigh'] == 'ENGLEWOOD') res = engine.execute(sel).fetchall() altered_value = res[0]['pri_neigh'] # I changed Englewood to Englerwood :P self.assertEqual(altered_value, 'Englerwood')
def add_dataset(): dataset_info = {} errors = [] socrata_source = False url = "" dataset_id = None md = None if request.args.get('dataset_url'): url = request.args.get('dataset_url') (dataset_info, errors, socrata_source) = get_context_for_new_dataset(url) # populate contributor info from session user = session.query(User).get(flask_session['user_id']) dataset_info['contributor_name'] = user.name dataset_info['contributor_organization'] = 'Plenario Admin' dataset_info['contributor_email'] = user.email # check if dataset with the same URL has already been loaded dataset_id = md5(url).hexdigest() md = session.query(MetaTable).get(dataset_id) if md: errors.append("A dataset with that URL has already been loaded: '%s'" % md.human_name) if request.method == 'POST' and not md: md = add_dataset_to_metatable(request, url, dataset_id, dataset_info, socrata_source, approved_status=True) json_data_types = None if ((not md.is_socrata_source) and md.contributed_data_types): json_data_types = json.loads(md.contributed_data_types) add_dataset_task.delay(md.source_url_hash, data_types=json_data_types) flash('%s added successfully!' % md.human_name, 'success') return redirect(url_for('views.view_datasets')) context = {'dataset_info': dataset_info, 'errors': errors, 'socrata_source': socrata_source} return render_template('admin/add-dataset.html', **context)
def delete_dataset(self, source_url_hash): md = session.query(MetaTable).get(source_url_hash) try: dat_table = md.point_table dat_table.drop(engine, checkfirst=True) except NoSuchTableError: # Move on so we can get rid of the metadata pass session.delete(md) try: session.commit() except InternalError, e: raise delete_dataset.retry(exc=e)
def edit_dataset(source_url_hash): form = EditDatasetForm() meta = session.query(MetaTable).get(source_url_hash) fieldnames = None if (meta.approved_status == 'true'): try: table = Table('dat_%s' % meta.dataset_name, Base.metadata, autoload=True, autoload_with=engine) fieldnames = table.columns.keys() except sqlalchemy.exc.NoSuchTableError, e: # dataset has been approved, but perhaps still processing. pass
def update_meta(metadata, table): """ After ingest/update, update the metadata registry to reflect :param metadata: :param table: """ metadata.update_date_added() metadata.obs_from, metadata.obs_to =\ session.query(func.min(table.c.point_date), func.max(table.c.point_date)).first() bbox = session.query(func.ST_SetSRID( func.ST_Envelope(func.ST_Union(table.c.geom)), 4326 )).first()[0] metadata.bbox = bbox session.add(metadata) try: session.commit() except: session.rollback() raise
def edit_dataset(source_url_hash): form = EditDatasetForm() meta = session.query(MetaTable).get(source_url_hash) fieldnames = None num_rows = 0 num_weather_observations = 0 num_rows_w_censusblocks = 0 if (meta.approved_status == 'true'): try: table_name = 'dat_%s' % meta.dataset_name table = Table(table_name, Base.metadata, autoload=True, autoload_with=engine) fieldnames = table.columns.keys() pk_name =[p.name for p in table.primary_key][0] pk = table.c[pk_name] num_rows = session.query(pk).count() dat_master = Table('dat_master', Base.metadata, autoload=True, autoload_with=engine) sel = session.query(func.count(dat_master.c.master_row_id)).filter(and_(dat_master.c.dataset_name==meta.dataset_name, dat_master.c.dataset_row_id==pk, dat_master.c.weather_observation_id.isnot(None))) num_weather_observations = sel.first()[0] sel = session.query(func.count(dat_master.c.master_row_id)).filter(and_(dat_master.c.dataset_name==meta.dataset_name, dat_master.c.dataset_row_id==pk, dat_master.c.census_block.isnot(None))) num_rows_w_censusblocks = sel.first()[0] except sqlalchemy.exc.NoSuchTableError, e: # dataset has been approved, but perhaps still processing. pass
def dataset_status(): source_url_hash = request.args.get("source_url_hash") q = ''' SELECT m.human_name, m.source_url_hash, c.status, c.date_done, c.traceback, c.task_id FROM meta_master AS m, UNNEST(m.result_ids) AS ids LEFT JOIN celery_taskmeta AS c ON c.task_id = ids WHERE c.date_done IS NOT NULL ''' if source_url_hash: name = session.query(MetaTable).get(source_url_hash).dataset_name q = q + "AND m.source_url_hash = :source_url_hash" else: name = None q = q + " ORDER BY c.id DESC" with engine.begin() as c: results = list(c.execute(text(q), source_url_hash=source_url_hash)) r = [] for result in results: tb = None if result.traceback: tb = result.traceback\ .replace('\r\n', '<br />')\ .replace('\n\r', '<br />')\ .replace('\n', '<br />')\ .replace('\r', '<br />') d = { 'human_name': result.human_name, 'source_url_hash': result.source_url_hash, 'status': result.status, 'task_id': result.task_id, 'traceback': tb, 'date_done': None, } if result.date_done: d['date_done'] = result.date_done.strftime('%B %d, %Y %H:%M'), r.append(d) return render_template('admin/dataset-status.html', results=r, name=name)
def update_dataset(self, source_url_hash): md = session.query(MetaTable).get(source_url_hash) if md.result_ids: ids = md.result_ids ids.append(self.request.id) else: ids = [self.request.id] with engine.begin() as c: c.execute(MetaTable.__table__.update()\ .where(MetaTable.source_url_hash == source_url_hash)\ .values(result_ids=ids)) etl = PlenarioETL(md) etl.update() return 'Finished updating {0} ({1})'.format(md.human_name, md.source_url_hash)
def contrib_view(): dataset_info = {} errors = [] socrata_source = False url = "" dataset_id = None md = None if request.args.get('dataset_url'): url = request.args.get('dataset_url') (dataset_info, errors, socrata_source) = get_context_for_new_dataset(url) # check if dataset with the same URL has already been loaded dataset_id = md5(url).hexdigest() md = session.query(MetaTable).get(dataset_id) if md: errors.append( "A dataset with that URL has already been loaded: '%s'" % md.human_name) if request.method == 'POST' and not md: md = add_dataset_to_metatable(request, url, dataset_id, dataset_info, socrata_source, approved_status=False) # email a confirmation to the submitter msg_body = """Hello %s,\r\n\r\n We received your recent dataset submission to Plenar.io:\r\n\r\n%s\r\n\r\n After we review it, we'll notify you when your data is loaded and available.\r\n\r\n Thank you!\r\nThe Plenario Team\r\nhttp://plenar.io""" % ( request.form.get('contributor_name'), md.human_name) send_mail(subject="Your dataset has been submitted to Plenar.io", recipient=request.form.get('contributor_email'), body=msg_body) return redirect(url_for('views.contrib_thankyou')) context = { 'dataset_info': dataset_info, 'form': request.form, 'errors': errors, 'socrata_source': socrata_source } return render_template('contribute.html', **context)