def _detail_aggregate(args): """Returns a record for every row in the specified dataset with brief temporal and spatial information about the row. This can give a user of the platform a quick overview about what is available within their constraints. :param args: dictionary of request arguments :returns: csv or json response object""" meta_params = ('obs_date__ge', 'obs_date__le', 'agg', 'geom', 'dataset') meta_vals = (args.data.get(k) for k in meta_params) start_date, end_date, agg, geom, dataset = meta_vals time_counts = [] if not has_tree_filters(args.data): # The obs_date arguments set the bounds of all the aggregates. # We don't want to create a condition tree that has point_date filters. args.data[dataset.name + '__filter'] = request_args_to_condition_tree( args.data, ignore=['obs_date__ge', 'obs_date__le'] ) dataset_conditions = {k: v for k, v in args.data.items() if 'filter' in k} for tablename, condition_tree in dataset_conditions.items(): # This pattern matches the last occurrence of the '__' pattern. # Prevents an error that is caused by dataset names with trailing # underscores. tablename = re.split(r'__(?!_)', tablename)[0] table = MetaTable.get_by_dataset_name(tablename).point_table try: conditions = parse_tree(table, condition_tree) except ValueError: # Catches empty condition tree. conditions = None try: ts = MetaTable.get_by_dataset_name(table.name).timeseries_one( agg, start_date, end_date, geom, conditions ) except Exception as e: return internal_error('Failed to construct timeseries', e) time_counts += [{'count': c, 'datetime': d} for c, d in ts[1:]] resp = None datatype = args.data['data_type'] if datatype == 'json': resp = json_response_base(args, time_counts, request.args) resp['count'] = sum([c['count'] for c in time_counts]) resp = make_response(json.dumps(resp, default=unknown_object_json_handler), 200) resp.headers['Content-Type'] = 'application/json' elif datatype == 'csv': resp = form_csv_detail_response(['point_date', 'hash'], time_counts) resp.headers['Content-Type'] = 'text/csv' filedate = datetime.now().strftime('%Y-%m-%d') resp.headers['Content-Disposition'] = 'attachment; filename=%s.csv' % filedate return resp
def _grid(args): meta_params = ('dataset', 'geom', 'resolution', 'buffer', 'obs_date__ge', 'obs_date__le') meta_vals = (args.data.get(k) for k in meta_params) point_table, geom, resolution, buffer_, obs_date__ge, obs_date__le = meta_vals result_rows = [] if not has_tree_filters(args.data): tname = point_table.name args.data[tname + '__filter'] = request_args_to_condition_tree( request_args=args.data, ignore=['buffer', 'resolution'] ) # We only build conditions from values with a key containing 'filter'. # Therefore we only build dataset conditions from condition trees. dataset_conditions = {k: v for k, v in args.data.items() if 'filter' in k} for tablename, condition_tree in dataset_conditions.items(): tablename = tablename.split('__')[0] metatable = MetaTable.get_by_dataset_name(tablename) table = metatable.point_table conditions = parse_tree(table, condition_tree) try: registry_row = MetaTable.get_by_dataset_name(table.name) # make_grid expects conditions to be iterable. grid_rows, size_x, size_y = registry_row.make_grid( resolution, geom, [conditions], {'upper': obs_date__le, 'lower': obs_date__ge} ) result_rows += grid_rows except Exception as e: return internal_error('Could not make grid aggregation.', e) resp = geojson_response_base() for value in result_rows: if value[1]: pt = shapely.wkb.loads(value[1].decode('hex')) south, west = (pt.x - (size_x / 2)), (pt.y - (size_y / 2)) north, east = (pt.x + (size_x / 2)), (pt.y + (size_y / 2)) new_geom = shapely.geometry.box(south, west, north, east).__geo_interface__ else: new_geom = None new_property = {'count': value[0], } add_geojson_feature(resp, new_geom, new_property) resp = make_response(json.dumps(resp, default=date_json_handler), 200) resp.headers['Content-Type'] = 'application/json' return resp
def _grid(args): meta_params = ('dataset', 'geom', 'resolution', 'buffer', 'obs_date__ge', 'obs_date__le') meta_vals = (args.data.get(k) for k in meta_params) point_table, geom, resolution, buffer_, obs_date__ge, obs_date__le = meta_vals result_rows = [] if not has_tree_filters(args.data): tname = point_table.name args.data[tname + '__filter'] = request_args_to_condition_tree( request_args=args.data, ignore=['buffer', 'resolution']) # We only build conditions from values with a key containing 'filter'. # Therefore we only build dataset conditions from condition trees. dataset_conditions = {k: v for k, v in args.data.items() if 'filter' in k} for tablename, condition_tree in dataset_conditions.items(): tablename = tablename.rsplit('__')[0] metatable = MetaTable.get_by_dataset_name(tablename) table = metatable.point_table conditions = parse_tree(table, condition_tree) try: registry_row = MetaTable.get_by_dataset_name(table.name) # make_grid expects conditions to be iterable. grid_rows, size_x, size_y = registry_row.make_grid( resolution, geom, [conditions], { 'upper': obs_date__le, 'lower': obs_date__ge }) result_rows += grid_rows except Exception as e: msg = 'Could not make grid aggregation.' return api_response.make_raw_error('{}: {}'.format(msg, e)) resp = api_response.geojson_response_base() for value in result_rows: if value[1]: pt = shapely.wkb.loads(codecs.decode(value[1], 'hex')) south, west = (pt.x - (size_x / 2)), (pt.y - (size_y / 2)) north, east = (pt.x + (size_x / 2)), (pt.y + (size_y / 2)) new_geom = shapely.geometry.box(south, west, north, east).__geo_interface__ else: new_geom = None new_property = { 'count': value[0], } api_response.add_geojson_feature(resp, new_geom, new_property) return resp
def detail_aggregate(): raw_query_params = request.args.copy() # First, make sure name of dataset was provided... try: dataset_name = raw_query_params.pop('dataset_name') except KeyError: return bad_request("'dataset_name' is required") # and that we have that dataset. try: validator = ParamValidator(dataset_name) except NoSuchTableError: return bad_request("Cannot find dataset named {}".format(dataset_name)) validator\ .set_optional('obs_date__ge', date_validator, datetime.now() - timedelta(days=90))\ .set_optional('obs_date__le', date_validator, datetime.now())\ .set_optional('location_geom__within', geom_validator, None)\ .set_optional('data_type', make_format_validator(['json', 'csv']), 'json')\ .set_optional('agg', agg_validator, 'week') # If any optional parameters are malformed, we're better off bailing and telling the user # than using a default and confusing them. err = validator.validate(raw_query_params) if err: return bad_request(err) start_date = validator.vals['obs_date__ge'] end_date = validator.vals['obs_date__le'] agg = validator.vals['agg'] geom = validator.get_geom() dataset = MetaTable.get_by_dataset_name(dataset_name) try: ts = dataset.timeseries_one(agg_unit=agg, start=start_date, end=end_date, geom=geom, column_filters=validator.conditions) except Exception as e: return internal_error('Failed to construct timeseries', e) datatype = validator.vals['data_type'] if datatype == 'json': time_counts = [{'count': c, 'datetime': d} for c, d in ts[1:]] resp = json_response_base(validator, time_counts) resp['count'] = sum([c['count'] for c in time_counts]) resp = make_response(json.dumps(resp, default=dthandler), 200) resp.headers['Content-Type'] = 'application/json' elif datatype == 'csv': resp = make_csv(ts) resp.headers['Content-Type'] = 'text/csv' filedate = datetime.now().strftime('%Y-%m-%d') resp.headers[ 'Content-Disposition'] = 'attachment; filename=%s.csv' % filedate return resp
def _detail_aggregate(args): """Returns a record for every row in the specified dataset with brief temporal and spatial information about the row. This can give a user of the platform a quick overview about what is available within their constraints. :param args: dictionary of request arguments :returns: csv or json response object """ meta_params = ('obs_date__ge', 'obs_date__le', 'agg', 'geom', 'dataset') meta_vals = (args.data.get(k) for k in meta_params) start_date, end_date, agg, geom, dataset = meta_vals time_counts = [] if not has_tree_filters(args.data): # The obs_date arguments set the bounds of all the aggregates. # We don't want to create a condition tree that has point_date filters. args.data[dataset.name + '__filter'] = request_args_to_condition_tree( args.data, ignore=['obs_date__ge', 'obs_date__le']) dataset_conditions = { k: v for k, v in list(args.data.items()) if 'filter' in k } for tablename, condition_tree in list(dataset_conditions.items()): # This pattern matches the last occurrence of the '__' pattern. # Prevents an error that is caused by dataset names with trailing # underscores. tablename = re.split(r'__(?!_)', tablename)[0] table = MetaTable.get_by_dataset_name(tablename).point_table try: conditions = parse_tree(table, condition_tree) except ValueError: # Catches empty condition tree. conditions = None try: ts = MetaTable.get_by_dataset_name(table.name).timeseries_one( agg, start_date, end_date, geom, conditions) except Exception as e: msg = 'Failed to construct timeseries' return api_response.make_raw_error('{}: {}'.format(msg, e)) time_counts += [{'count': c, 'datetime': d} for c, d in ts[1:]] return time_counts
def grid(): raw_query_params = request.args.copy() # First, make sure name of dataset was provided... try: dataset_name = raw_query_params.pop('dataset_name') except KeyError: return bad_request("'dataset_name' is required") try: validator = ParamValidator(dataset_name) except NoSuchTableError: return bad_request("Could not find dataset named {}.".format(dataset_name)) validator.set_optional('buffer', int_validator, 100)\ .set_optional('resolution', int_validator, 500)\ .set_optional('location_geom__within', geom_validator, None)\ .set_optional('obs_date__ge', date_validator, datetime.now() - timedelta(days=90))\ .set_optional('obs_date__le', date_validator, datetime.now())\ err = validator.validate(raw_query_params) if err: return bad_request(err) # Part 2: Construct SQL query try: dset = validator.dataset maker = FilterMaker(validator.vals, dataset=dset) # Get time filters time_filters = maker.time_filters() # From user params, wither get None or requested geometry geom = validator.get_geom() except Exception as e: return internal_error('Could not make time and geometry filters.', e) resolution = validator.vals['resolution'] try: registry_row = MetaTable.get_by_dataset_name(dataset_name) grid_rows, size_x, size_y = registry_row.make_grid(resolution, geom, validator.conditions + time_filters) except Exception as e: return internal_error('Could not make grid aggregation.', e) resp = geojson_response_base() for value in grid_rows: if value[1]: pt = shapely.wkb.loads(value[1].decode('hex')) south, west = (pt.x - (size_x / 2)), (pt.y - (size_y /2)) north, east = (pt.x + (size_x / 2)), (pt.y + (size_y / 2)) new_geom = shapely.geometry.box(south, west, north, east).__geo_interface__ else: new_geom = None new_property = {'count': value[0],} add_geojson_feature(resp, new_geom, new_property) resp = make_response(json.dumps(resp, default=dthandler), 200) resp.headers['Content-Type'] = 'application/json' return resp
def _detail_aggregate(args): """Returns a record for every row in the specified dataset with brief temporal and spatial information about the row. This can give a user of the platform a quick overview about what is available within their constraints. :param args: dictionary of request arguments :returns: csv or json response object """ meta_params = ('obs_date__ge', 'obs_date__le', 'agg', 'geom', 'dataset') meta_vals = (args.data.get(k) for k in meta_params) start_date, end_date, agg, geom, dataset = meta_vals time_counts = [] if not has_tree_filters(args.data): # The obs_date arguments set the bounds of all the aggregates. # We don't want to create a condition tree that has point_date filters. args.data[dataset.name + '__filter'] = request_args_to_condition_tree( args.data, ignore=['obs_date__ge', 'obs_date__le'] ) dataset_conditions = {k: v for k, v in list(args.data.items()) if 'filter' in k} for tablename, condition_tree in list(dataset_conditions.items()): # This pattern matches the last occurrence of the '__' pattern. # Prevents an error that is caused by dataset names with trailing # underscores. tablename = re.split(r'__(?!_)', tablename)[0] table = MetaTable.get_by_dataset_name(tablename).point_table try: conditions = parse_tree(table, condition_tree) except ValueError: # Catches empty condition tree. conditions = None try: ts = MetaTable.get_by_dataset_name(table.name).timeseries_one( agg, start_date, end_date, geom, conditions ) except Exception as e: msg = 'Failed to construct timeseries' return api_response.make_raw_error('{}: {}'.format(msg, e)) time_counts += [{'count': c, 'datetime': d} for c, d in ts[1:]] return time_counts
def detail_aggregate(): raw_query_params = request.args.copy() # First, make sure name of dataset was provided... try: dataset_name = raw_query_params.pop('dataset_name') except KeyError: return bad_request("'dataset_name' is required") # and that we have that dataset. try: validator = ParamValidator(dataset_name) except NoSuchTableError: return bad_request("Cannot find dataset named {}".format(dataset_name)) validator\ .set_optional('obs_date__ge', date_validator, datetime.now() - timedelta(days=90))\ .set_optional('obs_date__le', date_validator, datetime.now())\ .set_optional('location_geom__within', geom_validator, None)\ .set_optional('data_type', make_format_validator(['json', 'csv']), 'json')\ .set_optional('agg', agg_validator, 'week') # If any optional parameters are malformed, we're better off bailing and telling the user # than using a default and confusing them. err = validator.validate(raw_query_params) if err: return bad_request(err) start_date = validator.vals['obs_date__ge'] end_date = validator.vals['obs_date__le'] agg = validator.vals['agg'] geom = validator.get_geom() dataset = MetaTable.get_by_dataset_name(dataset_name) try: ts = dataset.timeseries_one(agg_unit=agg, start=start_date, end=end_date, geom=geom, column_filters=validator.conditions) except Exception as e: return internal_error('Failed to construct timeseries', e) datatype = validator.vals['data_type'] if datatype == 'json': time_counts = [{'count': c, 'datetime': d} for c, d in ts[1:]] resp = json_response_base(validator, time_counts) resp['count'] = sum([c['count'] for c in time_counts]) resp = make_response(json.dumps(resp, default=dthandler), 200) resp.headers['Content-Type'] = 'application/json' elif datatype == 'csv': resp = make_csv(ts) resp.headers['Content-Type'] = 'text/csv' filedate = datetime.now().strftime('%Y-%m-%d') resp.headers['Content-Disposition'] = 'attachment; filename=%s.csv' % filedate return resp
def test_new_table(self): drop_if_exists(self.unloaded_meta.dataset_name) etl = PlenarioETL(self.unloaded_meta, source_path=self.radio_path) new_table = etl.add() all_rows = postgres_session.execute(new_table.select()).fetchall() self.assertEqual(len(all_rows), 5) postgres_session.close() new_table.drop(postgres_engine, checkfirst=True) # Did we add a bbox? bbox = MetaTable.get_by_dataset_name('community_radio_events').bbox self.assertIsNotNone(bbox)
def test_location_col_add(self): drop_if_exists(self.opera_meta.dataset_name) etl = PlenarioETL(self.opera_meta, source_path=self.opera_path) new_table = etl.add() all_rows = session.execute(new_table.select()).fetchall() self.assertEqual(len(all_rows), 5) session.close() new_table.drop(app_engine, checkfirst=True) # Did we add a bbox? bbox = MetaTable.get_by_dataset_name('public_opera_performances').bbox self.assertIsNotNone(bbox)
def test_location_col_add(self): drop_if_exists(self.opera_meta.dataset_name) etl = PlenarioETL(self.opera_meta, source_path=self.opera_path) new_table = etl.add() all_rows = postgres_session.execute(new_table.select()).fetchall() self.assertEqual(len(all_rows), 5) postgres_session.close() new_table.drop(postgres_engine, checkfirst=True) # Did we add a bbox? bbox = MetaTable.get_by_dataset_name('public_opera_performances').bbox self.assertIsNotNone(bbox)
def test_new_table(self): drop_if_exists(self.unloaded_meta.dataset_name) etl = PlenarioETL(self.unloaded_meta, source_path=self.radio_path) new_table = etl.add() all_rows = session.execute(new_table.select()).fetchall() self.assertEqual(len(all_rows), 5) session.close() new_table.drop(app_engine, checkfirst=True) # Did we add a bbox? bbox = MetaTable.get_by_dataset_name('community_radio_events').bbox self.assertIsNotNone(bbox)
def test_updates_index_and_validates_correctly(self): # Adds a MetaTable record. self.test_client.post('/add?is_shapefile=false', data=roadworks_post_data) meta = MetaTable.get_by_dataset_name('roadworks') # Creates the table. PlenarioETL(meta).add() # Perform a query on the newly added dataset (to check if the # validator allows the query through). query = '/v1/api/detail?dataset_name=roadworks&obs_date__ge=2000' response = self.test_client.get(query) data = json.loads(response.data) self.assertGreaterEqual(len(data['objects']), 100)
def _deserialize(self, value, attr, data): try: return MetaTable.get_by_dataset_name(value).point_table except AttributeError: raise ValidationError('{} is not a valid dataset'.format(value))
# ValidatorResult # =============== # Many methods in response.py rely on information that used to be provided # by the old ParamValidator attributes. This namedtuple carries that same # info around, and allows me to not have to rewrite any response code. ValidatorResult = namedtuple('ValidatorResult', 'data errors warnings') # converters # ========== # Callables which are used to convert request arguments to their correct types. converters = { 'buffer': int, 'dataset': lambda x: MetaTable.get_by_dataset_name(x), 'shape': lambda x: ShapeMetadata.get_by_dataset_name(x).shape_table, 'dataset_name__in': lambda x: x.split(','), 'date__time_of_day_ge': int, 'date__time_of_day_le': int, 'obs_date__ge': lambda x: parser.parse(x).date(), 'obs_date__le': lambda x: parser.parse(x).date(), 'offset': int, 'resolution': int, 'geom': lambda x: make_fragment_str(extract_first_geometry_fragment(x)), } def convert(request_args): """Convert a dictionary of arguments from strings to their types. How the values are converted are specified by the converters dictionary defined
def _deserialize(self, value, attr, data): try: return MetaTable.get_by_dataset_name(value).point_table except AttributeError: raise ValidationError('{} is not a valid dataset'.format(value))
def _timeseries(args): meta_params = ['geom', 'dataset', 'dataset_name__in', 'obs_date__ge', 'obs_date__le', 'agg'] meta_vals = [args.data.get(k) for k in meta_params] geom, dataset, table_names, start_date, end_date, agg = meta_vals ctrees = {} if has_tree_filters(args.data): # Timeseries is a little tricky. If there aren't filters, # it would be ridiculous to build a condition tree for every one. for field, value in args.data.items(): if 'filter' in field: # This pattern matches the last occurrence of the '__' pattern. # Prevents an error that is caused by dataset names with trailing # underscores. tablename = re.split(r'__(?!_)', field)[0] metarecord = MetaTable.get_by_dataset_name(tablename) pt = metarecord.point_table ctrees[pt.name] = parse_tree(pt, value) # Just cleanliness, since we don't use this argument. Doesn't have # to show up in the JSON response. del args.data['dataset'] # If no dataset_name__in list was provided, have to fill it in by invoking # MetaTable.index() here! Not in the validator. This way the list stays up # to date. if table_names is None: table_names = MetaTable.index() args.data['dataset_name__in'] = table_names # If a single dataset was provided, it's the only thing we need to consider. if dataset is not None: table_names = [dataset.name] del args.data['dataset_name__in'] # remove table names which wouldn't return anything for the query, given # the time and geom constraints try: table_names = MetaTable.narrow_candidates(table_names, start_date, end_date, geom) except Exception as e: msg = 'Failed to gather candidate tables.' return internal_error(msg, e) # If there aren't any table names, it causes an error down the code. Better # to return and inform them that the request wouldn't have found anything. if not table_names: return bad_request("Your request doesn't return any results. Try " "adjusting your time constraint or location " "parameters.") try: panel = MetaTable.timeseries_all( table_names, agg, start_date, end_date, geom, ctrees ) except Exception as e: msg = 'Failed to construct timeseries.' return internal_error(msg, e) panel = MetaTable.attach_metadata(panel) resp = json_response_base(args, panel, args.data) datatype = args.data['data_type'] if datatype == 'json': resp = make_response(json.dumps(resp, default=unknown_object_json_handler), 200) resp.headers['Content-Type'] = 'application/json' elif datatype == 'csv': # response format # temporal_group,dataset_name_1,dataset_name_2 # 2014-02-24 00:00:00,235,653 # 2014-03-03 00:00:00,156,624 fields = ['temporal_group'] for o in resp['objects']: fields.append(o['dataset_name']) csv_resp = [] i = 0 for k, g in groupby(resp['objects'], key=itemgetter('dataset_name')): l_g = list(g)[0] j = 0 for row in l_g['items']: # first iteration, populate the first column with temporal_groups if i == 0: csv_resp.append([row['datetime']]) csv_resp[j].append(row['count']) j += 1 i += 1 csv_resp.insert(0, fields) csv_resp = make_csv(csv_resp) resp = make_response(csv_resp, 200) resp.headers['Content-Type'] = 'text/csv' filedate = datetime.now().strftime('%Y-%m-%d') resp.headers['Content-Disposition'] = 'attachment; filename=%s.csv' % filedate return resp
def validate_dataset(dataset_name): if not MetaTable.get_by_dataset_name(dataset_name): raise ValidationError('Invalid table name: {}.'.format(dataset_name))
# =============== # Many methods in response.py rely on information that used to be provided # by the old ParamValidator attributes. This namedtuple carries that same # info around, and allows me to not have to rewrite any response code. ValidatorResult = namedtuple('ValidatorResult', 'data errors warnings') # converters # ========== # Callables which are used to convert request arguments to their correct types. converters = { 'agg': str, 'buffer': int, 'dataset': lambda x: MetaTable.get_by_dataset_name(x).point_table, 'shapeset': lambda x: ShapeMetadata.get_by_dataset_name(x).shape_table, 'data_type': str, 'shape': lambda x: ShapeMetadata.get_by_dataset_name(x).shape_table, 'dataset_name__in': lambda x: x.split(','), 'date__time_of_day_ge': int, 'date__time_of_day_le': int, 'obs_date__ge': lambda x: parser.parse(x).date(), 'obs_date__le': lambda x: parser.parse(x).date(), 'date': lambda x: parser.parse(x).date(), 'point_date': lambda x: parser.parse(x), 'offset': int, 'resolution': int, 'geom': lambda x: make_fragment_str(extract_first_geometry_fragment(x)), }
def validate(validator, request_args): """Validate a dictionary of arguments. Substitute all missing fields with defaults if not explicitly told to do otherwise. :param validator: what kind of validator to use :param request_args: dictionary of arguments from a request object :returns: ValidatorResult namedtuple""" args = request_args.copy() # For validator dataset_name__in... need to find a better way to # make it play nice with the validator. if args.get('dataset_name__in'): args['dataset_name__in'] = args['dataset_name__in'].split(',') # This first validation step covers conditions that are dataset # agnostic. These are values can be used to apply to all datasets # (ex. obs_date), or concern the format of the response (ex. limit, # datatype, offset). # If there are errors, fail quickly and return. result = validator.load(args) if result.errors: return result # If all arguments are valid, fill in validator defaults. result = validator.dump(result.data) # Certain values will be dumped as strings. This conversion # makes them into their corresponding type. (ex. Table) convert(result.data) # Holds messages concerning unnecessary parameters. These can be either # junk parameters, or redundant column parameters if a tree filter was # used. warnings = [] # At this point validation splits. We can either validate tree-style column # arguments or validate them individually. We don't do both. # Determine unchecked parameters provided in the request. unchecked = set(args.keys()) - set(validator.fields.keys()) # If tree filters were provided, ignore ALL unchecked parameters that are # not tree filters or response format information. if has_tree_filters(request_args): for key in request_args: value = args[key] if 'filter' in key: # This pattern matches the last occurrence of the '__' pattern. # Prevents an error that is caused by dataset names with trailing # underscores. t_name = re.split(r'__(?!_)', key)[0] # Report a filter which specifies a non-existent tree. try: table = MetaTable.get_by_dataset_name(t_name).point_table except (AttributeError, NoSuchTableError): try: table = ShapeMetadata.get_by_dataset_name(t_name).shape_table except (AttributeError, NoSuchTableError): result.errors[t_name] = "Table name {} could not be found.".format(t_name) return result # Report a tree which causes the JSON parser to fail. # Or a tree whose value is not valid. try: cond_tree = json.loads(value) if valid_tree(table, cond_tree): result.data[key] = cond_tree except (ValueError, KeyError) as err: result.errors[t_name] = "Bad tree: {} -- causes error {}.".format(value, err) return result # These keys just have to do with the formatting of the JSON response. # We keep these values around even if they have no effect on a condition # tree. elif key in {'geom', 'offset', 'limit', 'agg', 'obs_date__le', 'obs_date__ge'}: pass # These keys are also ones that should be passed over when searching for # unused params. They are used, just in different forms later on, so no need # to report them. elif key in {'shape', 'dataset_name', 'dataset_name__in'}: pass # If the key is not a filter, and not used to format JSON, report # that we ignored it. else: warnings.append("Unused parameter {}, you cannot specify both " "column and filter arguments.".format(key)) # If no tree filters were provided, see if any of the unchecked parameters # are usable as column conditions. else: try: table = result.data['dataset'] except KeyError: table = result.data.get('shapeset') for param in unchecked: field = param.split('__')[0] if table is not None: try: valid_column_condition(table, field, args[param]) result.data[param] = args[param] except KeyError: warnings.append('Unused parameter value "{}={}"'.format(param, args[param])) warnings.append('{} is not a valid column for {}'.format(param, table)) except ValueError: warnings.append('Unused parameter value "{}={}"'.format(param, args[param])) warnings.append('{} is not a valid value for {}'.format(args[param], param)) # ValidatorResult(dict, dict, list) return ValidatorResult(result.data, result.errors, warnings)
def grid(): raw_query_params = request.args.copy() # First, make sure name of dataset was provided... try: dataset_name = raw_query_params.pop('dataset_name') except KeyError: return bad_request("'dataset_name' is required") try: validator = ParamValidator(dataset_name) except NoSuchTableError: return bad_request( "Could not find dataset named {}.".format(dataset_name)) validator.set_optional('buffer', int_validator, 100)\ .set_optional('resolution', int_validator, 500)\ .set_optional('location_geom__within', geom_validator, None)\ .set_optional('obs_date__ge', date_validator, datetime.now() - timedelta(days=90))\ .set_optional('obs_date__le', date_validator, datetime.now())\ err = validator.validate(raw_query_params) if err: return bad_request(err) # Part 2: Construct SQL query try: dset = validator.dataset maker = FilterMaker(validator.vals, dataset=dset) # Get time filters time_filters = maker.time_filters() # From user params, wither get None or requested geometry geom = validator.get_geom() except Exception as e: return internal_error('Could not make time and geometry filters.', e) resolution = validator.vals['resolution'] try: registry_row = MetaTable.get_by_dataset_name(dataset_name) grid_rows, size_x, size_y = registry_row.make_grid( resolution, geom, validator.conditions + time_filters) except Exception as e: return internal_error('Could not make grid aggregation.', e) resp = geojson_response_base() for value in grid_rows: if value[1]: pt = shapely.wkb.loads(value[1].decode('hex')) south, west = (pt.x - (size_x / 2)), (pt.y - (size_y / 2)) north, east = (pt.x + (size_x / 2)), (pt.y + (size_y / 2)) new_geom = shapely.geometry.box(south, west, north, east).__geo_interface__ else: new_geom = None new_property = { 'count': value[0], } add_geojson_feature(resp, new_geom, new_property) resp = make_response(json.dumps(resp, default=dthandler), 200) resp.headers['Content-Type'] = 'application/json' return resp
def timeseries(): validator = TimeseriesValidator() deserialized_arguments = validator.load(request.args) serialized_arguments = json.loads(validator.dumps(deserialized_arguments.data).data) if deserialized_arguments.errors: return make_error(deserialized_arguments.errors, 400, serialized_arguments) qargs = deserialized_arguments.data agg = qargs['agg'] data_type = qargs['data_type'] geom = qargs['location_geom__within'] pointset = qargs['dataset_name'] pointsets = qargs['dataset_name__in'] start_date = qargs['obs_date__ge'] end_date = qargs['obs_date__le'] ctrees = {} raw_ctrees = {} if has_tree_filters(request.args): # Timeseries is a little tricky. If there aren't filters, # it would be ridiculous to build a condition tree for every one. for field, value in list(request.args.items()): if 'filter' in field: # This pattern matches the last occurrence of the '__' pattern. # Prevents an error that is caused by dataset names with trailing # underscores. tablename = re.split(r'__(?!_)', field)[0] metarecord = MetaTable.get_by_dataset_name(tablename) pt = metarecord.point_table ctrees[pt.name] = parse_tree(pt, json.loads(value)) raw_ctrees[pt.name] = json.loads(value) point_set_names = [p.name for p in pointsets + [pointset] if p is not None] if not point_set_names: point_set_names = MetaTable.index() results = MetaTable.timeseries_all(point_set_names, agg, start_date, end_date, geom, ctrees) payload = { 'meta': { 'message': [], 'query': serialized_arguments, 'status': 'ok', 'total': len(results) }, 'objects': results } if ctrees: payload['meta']['query']['filters'] = raw_ctrees if data_type == 'json': return jsonify(payload) elif data_type == 'csv': # response format # temporal_group,dataset_name_1,dataset_name_2 # 2014-02-24 00:00:00,235,653 # 2014-03-03 00:00:00,156,624 fields = ['temporal_group'] for o in payload['objects']: fields.append(o['dataset_name']) csv_resp = [] i = 0 for k, g in groupby(payload['objects'], key=itemgetter('dataset_name')): l_g = list(g)[0] j = 0 for row in l_g['items']: # first iteration, populate the first column with temporal_groups if i == 0: csv_resp.append([row['datetime']]) csv_resp[j].append(row['count']) j += 1 i += 1 csv_resp.insert(0, fields) csv_resp = make_csv(csv_resp) resp = make_response(csv_resp, 200) resp.headers['Content-Type'] = 'text/csv' filedate = datetime.now().strftime('%Y-%m-%d') resp.headers['Content-Disposition'] = 'attachment; filename=%s.csv' % filedate return resp