Ejemplo n.º 1
0
def _detail_aggregate(args):
    """Returns a record for every row in the specified dataset with brief
    temporal and spatial information about the row. This can give a user of the
    platform a quick overview about what is available within their constraints.

    :param args: dictionary of request arguments

    :returns: csv or json response object"""

    meta_params = ('obs_date__ge', 'obs_date__le', 'agg', 'geom', 'dataset')
    meta_vals = (args.data.get(k) for k in meta_params)
    start_date, end_date, agg, geom, dataset = meta_vals

    time_counts = []

    if not has_tree_filters(args.data):
        # The obs_date arguments set the bounds of all the aggregates.
        # We don't want to create a condition tree that has point_date filters.
        args.data[dataset.name + '__filter'] = request_args_to_condition_tree(
            args.data, ignore=['obs_date__ge', 'obs_date__le']
        )

    dataset_conditions = {k: v for k, v in args.data.items() if 'filter' in k}
    for tablename, condition_tree in dataset_conditions.items():
        # This pattern matches the last occurrence of the '__' pattern.
        # Prevents an error that is caused by dataset names with trailing
        # underscores.
        tablename = re.split(r'__(?!_)', tablename)[0]
        table = MetaTable.get_by_dataset_name(tablename).point_table
        try:
            conditions = parse_tree(table, condition_tree)
        except ValueError:  # Catches empty condition tree.
            conditions = None

        try:
            ts = MetaTable.get_by_dataset_name(table.name).timeseries_one(
                agg, start_date, end_date, geom, conditions
            )
        except Exception as e:
            return internal_error('Failed to construct timeseries', e)

        time_counts += [{'count': c, 'datetime': d} for c, d in ts[1:]]

    resp = None

    datatype = args.data['data_type']
    if datatype == 'json':
        resp = json_response_base(args, time_counts, request.args)
        resp['count'] = sum([c['count'] for c in time_counts])
        resp = make_response(json.dumps(resp, default=unknown_object_json_handler), 200)
        resp.headers['Content-Type'] = 'application/json'

    elif datatype == 'csv':
        resp = form_csv_detail_response(['point_date', 'hash'], time_counts)
        resp.headers['Content-Type'] = 'text/csv'
        filedate = datetime.now().strftime('%Y-%m-%d')
        resp.headers['Content-Disposition'] = 'attachment; filename=%s.csv' % filedate

    return resp
Ejemplo n.º 2
0
def _grid(args):

    meta_params = ('dataset', 'geom', 'resolution', 'buffer', 'obs_date__ge',
                   'obs_date__le')
    meta_vals = (args.data.get(k) for k in meta_params)
    point_table, geom, resolution, buffer_, obs_date__ge, obs_date__le = meta_vals

    result_rows = []

    if not has_tree_filters(args.data):
        tname = point_table.name
        args.data[tname + '__filter'] = request_args_to_condition_tree(
            request_args=args.data,
            ignore=['buffer', 'resolution']
        )

    # We only build conditions from values with a key containing 'filter'.
    # Therefore we only build dataset conditions from condition trees.
    dataset_conditions = {k: v for k, v in args.data.items() if 'filter' in k}
    for tablename, condition_tree in dataset_conditions.items():

        tablename = tablename.split('__')[0]

        metatable = MetaTable.get_by_dataset_name(tablename)
        table = metatable.point_table
        conditions = parse_tree(table, condition_tree)

        try:
            registry_row = MetaTable.get_by_dataset_name(table.name)
            # make_grid expects conditions to be iterable.
            grid_rows, size_x, size_y = registry_row.make_grid(
                resolution,
                geom,
                [conditions],
                {'upper': obs_date__le, 'lower': obs_date__ge}
            )
            result_rows += grid_rows
        except Exception as e:
            return internal_error('Could not make grid aggregation.', e)

    resp = geojson_response_base()
    for value in result_rows:
        if value[1]:
            pt = shapely.wkb.loads(value[1].decode('hex'))
            south, west = (pt.x - (size_x / 2)), (pt.y - (size_y / 2))
            north, east = (pt.x + (size_x / 2)), (pt.y + (size_y / 2))
            new_geom = shapely.geometry.box(south, west, north, east).__geo_interface__
        else:
            new_geom = None
        new_property = {'count': value[0], }
        add_geojson_feature(resp, new_geom, new_property)

    resp = make_response(json.dumps(resp, default=date_json_handler), 200)
    resp.headers['Content-Type'] = 'application/json'
    return resp
Ejemplo n.º 3
0
def _grid(args):
    meta_params = ('dataset', 'geom', 'resolution', 'buffer', 'obs_date__ge',
                   'obs_date__le')
    meta_vals = (args.data.get(k) for k in meta_params)
    point_table, geom, resolution, buffer_, obs_date__ge, obs_date__le = meta_vals

    result_rows = []

    if not has_tree_filters(args.data):
        tname = point_table.name
        args.data[tname + '__filter'] = request_args_to_condition_tree(
            request_args=args.data, ignore=['buffer', 'resolution'])

    # We only build conditions from values with a key containing 'filter'.
    # Therefore we only build dataset conditions from condition trees.
    dataset_conditions = {k: v for k, v in args.data.items() if 'filter' in k}
    for tablename, condition_tree in dataset_conditions.items():

        tablename = tablename.rsplit('__')[0]

        metatable = MetaTable.get_by_dataset_name(tablename)
        table = metatable.point_table
        conditions = parse_tree(table, condition_tree)

        try:
            registry_row = MetaTable.get_by_dataset_name(table.name)
            # make_grid expects conditions to be iterable.
            grid_rows, size_x, size_y = registry_row.make_grid(
                resolution, geom, [conditions], {
                    'upper': obs_date__le,
                    'lower': obs_date__ge
                })
            result_rows += grid_rows
        except Exception as e:
            msg = 'Could not make grid aggregation.'
            return api_response.make_raw_error('{}: {}'.format(msg, e))

    resp = api_response.geojson_response_base()
    for value in result_rows:
        if value[1]:
            pt = shapely.wkb.loads(codecs.decode(value[1], 'hex'))
            south, west = (pt.x - (size_x / 2)), (pt.y - (size_y / 2))
            north, east = (pt.x + (size_x / 2)), (pt.y + (size_y / 2))
            new_geom = shapely.geometry.box(south, west, north,
                                            east).__geo_interface__
        else:
            new_geom = None
        new_property = {
            'count': value[0],
        }
        api_response.add_geojson_feature(resp, new_geom, new_property)

    return resp
Ejemplo n.º 4
0
def detail_aggregate():
    raw_query_params = request.args.copy()
    # First, make sure name of dataset was provided...
    try:
        dataset_name = raw_query_params.pop('dataset_name')
    except KeyError:
        return bad_request("'dataset_name' is required")

    # and that we have that dataset.
    try:
        validator = ParamValidator(dataset_name)
    except NoSuchTableError:
        return bad_request("Cannot find dataset named {}".format(dataset_name))

    validator\
        .set_optional('obs_date__ge', date_validator, datetime.now() - timedelta(days=90))\
        .set_optional('obs_date__le', date_validator, datetime.now())\
        .set_optional('location_geom__within', geom_validator, None)\
        .set_optional('data_type', make_format_validator(['json', 'csv']), 'json')\
        .set_optional('agg', agg_validator, 'week')

    # If any optional parameters are malformed, we're better off bailing and telling the user
    # than using a default and confusing them.
    err = validator.validate(raw_query_params)
    if err:
        return bad_request(err)

    start_date = validator.vals['obs_date__ge']
    end_date = validator.vals['obs_date__le']
    agg = validator.vals['agg']
    geom = validator.get_geom()
    dataset = MetaTable.get_by_dataset_name(dataset_name)

    try:
        ts = dataset.timeseries_one(agg_unit=agg,
                                    start=start_date,
                                    end=end_date,
                                    geom=geom,
                                    column_filters=validator.conditions)
    except Exception as e:
        return internal_error('Failed to construct timeseries', e)

    datatype = validator.vals['data_type']
    if datatype == 'json':
        time_counts = [{'count': c, 'datetime': d} for c, d in ts[1:]]
        resp = json_response_base(validator, time_counts)
        resp['count'] = sum([c['count'] for c in time_counts])
        resp = make_response(json.dumps(resp, default=dthandler), 200)
        resp.headers['Content-Type'] = 'application/json'

    elif datatype == 'csv':
        resp = make_csv(ts)
        resp.headers['Content-Type'] = 'text/csv'
        filedate = datetime.now().strftime('%Y-%m-%d')
        resp.headers[
            'Content-Disposition'] = 'attachment; filename=%s.csv' % filedate

    return resp
Ejemplo n.º 5
0
def _detail_aggregate(args):
    """Returns a record for every row in the specified dataset with brief
    temporal and spatial information about the row. This can give a user of the
    platform a quick overview about what is available within their constraints.

    :param args: dictionary of request arguments
    :returns: csv or json response object
    """
    meta_params = ('obs_date__ge', 'obs_date__le', 'agg', 'geom', 'dataset')
    meta_vals = (args.data.get(k) for k in meta_params)
    start_date, end_date, agg, geom, dataset = meta_vals

    time_counts = []

    if not has_tree_filters(args.data):
        # The obs_date arguments set the bounds of all the aggregates.
        # We don't want to create a condition tree that has point_date filters.
        args.data[dataset.name + '__filter'] = request_args_to_condition_tree(
            args.data, ignore=['obs_date__ge', 'obs_date__le'])

    dataset_conditions = {
        k: v
        for k, v in list(args.data.items()) if 'filter' in k
    }
    for tablename, condition_tree in list(dataset_conditions.items()):
        # This pattern matches the last occurrence of the '__' pattern.
        # Prevents an error that is caused by dataset names with trailing
        # underscores.
        tablename = re.split(r'__(?!_)', tablename)[0]
        table = MetaTable.get_by_dataset_name(tablename).point_table
        try:
            conditions = parse_tree(table, condition_tree)
        except ValueError:  # Catches empty condition tree.
            conditions = None

        try:
            ts = MetaTable.get_by_dataset_name(table.name).timeseries_one(
                agg, start_date, end_date, geom, conditions)
        except Exception as e:
            msg = 'Failed to construct timeseries'
            return api_response.make_raw_error('{}: {}'.format(msg, e))

        time_counts += [{'count': c, 'datetime': d} for c, d in ts[1:]]

    return time_counts
Ejemplo n.º 6
0
def grid():
    raw_query_params = request.args.copy()

    # First, make sure name of dataset was provided...
    try:
        dataset_name = raw_query_params.pop('dataset_name')
    except KeyError:
        return bad_request("'dataset_name' is required")

    try:
        validator = ParamValidator(dataset_name)
    except NoSuchTableError:
        return bad_request("Could not find dataset named {}.".format(dataset_name))

    validator.set_optional('buffer', int_validator, 100)\
             .set_optional('resolution', int_validator, 500)\
             .set_optional('location_geom__within', geom_validator, None)\
             .set_optional('obs_date__ge', date_validator, datetime.now() - timedelta(days=90))\
             .set_optional('obs_date__le', date_validator, datetime.now())\

    err = validator.validate(raw_query_params)
    if err:
        return bad_request(err)

    # Part 2: Construct SQL query
    try:
        dset = validator.dataset
        maker = FilterMaker(validator.vals, dataset=dset)
        # Get time filters
        time_filters = maker.time_filters()
        # From user params, wither get None or requested geometry
        geom = validator.get_geom()
    except Exception as e:
        return internal_error('Could not make time and geometry filters.', e)

    resolution = validator.vals['resolution']
    try:
        registry_row = MetaTable.get_by_dataset_name(dataset_name)
        grid_rows, size_x, size_y = registry_row.make_grid(resolution, geom, validator.conditions + time_filters)
    except Exception as e:
        return internal_error('Could not make grid aggregation.', e)

    resp = geojson_response_base()
    for value in grid_rows:
        if value[1]:
            pt = shapely.wkb.loads(value[1].decode('hex'))
            south, west = (pt.x - (size_x / 2)), (pt.y - (size_y /2))
            north, east = (pt.x + (size_x / 2)), (pt.y + (size_y / 2))
            new_geom = shapely.geometry.box(south, west, north, east).__geo_interface__
        else:
            new_geom = None
        new_property = {'count': value[0],}
        add_geojson_feature(resp, new_geom, new_property)

    resp = make_response(json.dumps(resp, default=dthandler), 200)
    resp.headers['Content-Type'] = 'application/json'
    return resp
Ejemplo n.º 7
0
def _detail_aggregate(args):
    """Returns a record for every row in the specified dataset with brief
    temporal and spatial information about the row. This can give a user of the
    platform a quick overview about what is available within their constraints.

    :param args: dictionary of request arguments
    :returns: csv or json response object
    """
    meta_params = ('obs_date__ge', 'obs_date__le', 'agg', 'geom', 'dataset')
    meta_vals = (args.data.get(k) for k in meta_params)
    start_date, end_date, agg, geom, dataset = meta_vals

    time_counts = []

    if not has_tree_filters(args.data):
        # The obs_date arguments set the bounds of all the aggregates.
        # We don't want to create a condition tree that has point_date filters.
        args.data[dataset.name + '__filter'] = request_args_to_condition_tree(
            args.data, ignore=['obs_date__ge', 'obs_date__le']
        )

    dataset_conditions = {k: v for k, v in list(args.data.items()) if 'filter' in k}
    for tablename, condition_tree in list(dataset_conditions.items()):
        # This pattern matches the last occurrence of the '__' pattern.
        # Prevents an error that is caused by dataset names with trailing
        # underscores.
        tablename = re.split(r'__(?!_)', tablename)[0]
        table = MetaTable.get_by_dataset_name(tablename).point_table
        try:
            conditions = parse_tree(table, condition_tree)
        except ValueError:  # Catches empty condition tree.
            conditions = None

        try:
            ts = MetaTable.get_by_dataset_name(table.name).timeseries_one(
                agg, start_date, end_date, geom, conditions
            )
        except Exception as e:
            msg = 'Failed to construct timeseries'
            return api_response.make_raw_error('{}: {}'.format(msg, e))

        time_counts += [{'count': c, 'datetime': d} for c, d in ts[1:]]

    return time_counts
Ejemplo n.º 8
0
def detail_aggregate():
    raw_query_params = request.args.copy()
    # First, make sure name of dataset was provided...
    try:
        dataset_name = raw_query_params.pop('dataset_name')
    except KeyError:
        return bad_request("'dataset_name' is required")

    # and that we have that dataset.
    try:
        validator = ParamValidator(dataset_name)
    except NoSuchTableError:
        return bad_request("Cannot find dataset named {}".format(dataset_name))

    validator\
        .set_optional('obs_date__ge', date_validator, datetime.now() - timedelta(days=90))\
        .set_optional('obs_date__le', date_validator, datetime.now())\
        .set_optional('location_geom__within', geom_validator, None)\
        .set_optional('data_type', make_format_validator(['json', 'csv']), 'json')\
        .set_optional('agg', agg_validator, 'week')

    # If any optional parameters are malformed, we're better off bailing and telling the user
    # than using a default and confusing them.
    err = validator.validate(raw_query_params)
    if err:
        return bad_request(err)

    start_date = validator.vals['obs_date__ge']
    end_date = validator.vals['obs_date__le']
    agg = validator.vals['agg']
    geom = validator.get_geom()
    dataset = MetaTable.get_by_dataset_name(dataset_name)

    try:
        ts = dataset.timeseries_one(agg_unit=agg, start=start_date,
                                    end=end_date, geom=geom,
                                    column_filters=validator.conditions)
    except Exception as e:
        return internal_error('Failed to construct timeseries', e)

    datatype = validator.vals['data_type']
    if datatype == 'json':
        time_counts = [{'count': c, 'datetime': d} for c, d in ts[1:]]
        resp = json_response_base(validator, time_counts)
        resp['count'] = sum([c['count'] for c in time_counts])
        resp = make_response(json.dumps(resp, default=dthandler), 200)
        resp.headers['Content-Type'] = 'application/json'

    elif datatype == 'csv':
        resp = make_csv(ts)
        resp.headers['Content-Type'] = 'text/csv'
        filedate = datetime.now().strftime('%Y-%m-%d')
        resp.headers['Content-Disposition'] = 'attachment; filename=%s.csv' % filedate

    return resp
Ejemplo n.º 9
0
    def test_new_table(self):
        drop_if_exists(self.unloaded_meta.dataset_name)

        etl = PlenarioETL(self.unloaded_meta, source_path=self.radio_path)
        new_table = etl.add()

        all_rows = postgres_session.execute(new_table.select()).fetchall()
        self.assertEqual(len(all_rows), 5)
        postgres_session.close()
        new_table.drop(postgres_engine, checkfirst=True)

        # Did we add a bbox?
        bbox = MetaTable.get_by_dataset_name('community_radio_events').bbox
        self.assertIsNotNone(bbox)
Ejemplo n.º 10
0
    def test_location_col_add(self):
        drop_if_exists(self.opera_meta.dataset_name)

        etl = PlenarioETL(self.opera_meta, source_path=self.opera_path)
        new_table = etl.add()

        all_rows = session.execute(new_table.select()).fetchall()
        self.assertEqual(len(all_rows), 5)
        session.close()
        new_table.drop(app_engine, checkfirst=True)

        # Did we add a bbox?
        bbox = MetaTable.get_by_dataset_name('public_opera_performances').bbox
        self.assertIsNotNone(bbox)
Ejemplo n.º 11
0
    def test_location_col_add(self):
        drop_if_exists(self.opera_meta.dataset_name)

        etl = PlenarioETL(self.opera_meta, source_path=self.opera_path)
        new_table = etl.add()

        all_rows = postgres_session.execute(new_table.select()).fetchall()
        self.assertEqual(len(all_rows), 5)
        postgres_session.close()
        new_table.drop(postgres_engine, checkfirst=True)

        # Did we add a bbox?
        bbox = MetaTable.get_by_dataset_name('public_opera_performances').bbox
        self.assertIsNotNone(bbox)
Ejemplo n.º 12
0
    def test_new_table(self):
        drop_if_exists(self.unloaded_meta.dataset_name)

        etl = PlenarioETL(self.unloaded_meta, source_path=self.radio_path)
        new_table = etl.add()

        all_rows = session.execute(new_table.select()).fetchall()
        self.assertEqual(len(all_rows), 5)
        session.close()
        new_table.drop(app_engine, checkfirst=True)

        # Did we add a bbox?
        bbox = MetaTable.get_by_dataset_name('community_radio_events').bbox
        self.assertIsNotNone(bbox)
Ejemplo n.º 13
0
    def test_updates_index_and_validates_correctly(self):

        # Adds a MetaTable record.
        self.test_client.post('/add?is_shapefile=false', data=roadworks_post_data)
        meta = MetaTable.get_by_dataset_name('roadworks')
        # Creates the table.
        PlenarioETL(meta).add()

        # Perform a query on the newly added dataset (to check if the
        # validator allows the query through).
        query = '/v1/api/detail?dataset_name=roadworks&obs_date__ge=2000'
        response = self.test_client.get(query)
        data = json.loads(response.data)

        self.assertGreaterEqual(len(data['objects']), 100)
Ejemplo n.º 14
0
 def _deserialize(self, value, attr, data):
     try:
         return MetaTable.get_by_dataset_name(value).point_table
     except AttributeError:
         raise ValidationError('{} is not a valid dataset'.format(value))
Ejemplo n.º 15
0
# ValidatorResult
# ===============
# Many methods in response.py rely on information that used to be provided
# by the old ParamValidator attributes. This namedtuple carries that same
# info around, and allows me to not have to rewrite any response code.

ValidatorResult = namedtuple('ValidatorResult', 'data errors warnings')


# converters
# ==========
# Callables which are used to convert request arguments to their correct types.

converters = {
    'buffer': int,
    'dataset': lambda x: MetaTable.get_by_dataset_name(x),
    'shape': lambda x: ShapeMetadata.get_by_dataset_name(x).shape_table,
    'dataset_name__in': lambda x: x.split(','),
    'date__time_of_day_ge': int,
    'date__time_of_day_le': int,
    'obs_date__ge': lambda x: parser.parse(x).date(),
    'obs_date__le': lambda x: parser.parse(x).date(),
    'offset': int,
    'resolution': int,
    'geom': lambda x: make_fragment_str(extract_first_geometry_fragment(x)),
}


def convert(request_args):
    """Convert a dictionary of arguments from strings to their types. How the
    values are converted are specified by the converters dictionary defined
Ejemplo n.º 16
0
 def _deserialize(self, value, attr, data):
     try:
         return MetaTable.get_by_dataset_name(value).point_table
     except AttributeError:
         raise ValidationError('{} is not a valid dataset'.format(value))
Ejemplo n.º 17
0
def _timeseries(args):

    meta_params = ['geom', 'dataset', 'dataset_name__in', 'obs_date__ge', 'obs_date__le', 'agg']
    meta_vals = [args.data.get(k) for k in meta_params]
    geom, dataset, table_names, start_date, end_date, agg = meta_vals

    ctrees = {}

    if has_tree_filters(args.data):
        # Timeseries is a little tricky. If there aren't filters,
        # it would be ridiculous to build a condition tree for every one.
        for field, value in args.data.items():
            if 'filter' in field:
                # This pattern matches the last occurrence of the '__' pattern.
                # Prevents an error that is caused by dataset names with trailing
                # underscores.
                tablename = re.split(r'__(?!_)', field)[0]
                metarecord = MetaTable.get_by_dataset_name(tablename)
                pt = metarecord.point_table
                ctrees[pt.name] = parse_tree(pt, value)
        # Just cleanliness, since we don't use this argument. Doesn't have
        # to show up in the JSON response.
        del args.data['dataset']

    # If no dataset_name__in list was provided, have to fill it in by invoking
    # MetaTable.index() here! Not in the validator. This way the list stays up
    # to date.
    if table_names is None:
        table_names = MetaTable.index()
        args.data['dataset_name__in'] = table_names

    # If a single dataset was provided, it's the only thing we need to consider.
    if dataset is not None:
        table_names = [dataset.name]
        del args.data['dataset_name__in']

    # remove table names which wouldn't return anything for the query, given
    # the time and geom constraints
    try:
        table_names = MetaTable.narrow_candidates(table_names, start_date, end_date, geom)
    except Exception as e:
        msg = 'Failed to gather candidate tables.'
        return internal_error(msg, e)

    # If there aren't any table names, it causes an error down the code. Better
    # to return and inform them that the request wouldn't have found anything.
    if not table_names:
        return bad_request("Your request doesn't return any results. Try "
                           "adjusting your time constraint or location "
                           "parameters.")

    try:
        panel = MetaTable.timeseries_all(
            table_names, agg, start_date, end_date, geom, ctrees
        )
    except Exception as e:
        msg = 'Failed to construct timeseries.'
        return internal_error(msg, e)

    panel = MetaTable.attach_metadata(panel)
    resp = json_response_base(args, panel, args.data)

    datatype = args.data['data_type']
    if datatype == 'json':
        resp = make_response(json.dumps(resp, default=unknown_object_json_handler), 200)
        resp.headers['Content-Type'] = 'application/json'
    elif datatype == 'csv':

        # response format
        # temporal_group,dataset_name_1,dataset_name_2
        # 2014-02-24 00:00:00,235,653
        # 2014-03-03 00:00:00,156,624

        fields = ['temporal_group']
        for o in resp['objects']:
            fields.append(o['dataset_name'])

        csv_resp = []
        i = 0
        for k, g in groupby(resp['objects'], key=itemgetter('dataset_name')):
            l_g = list(g)[0]

            j = 0
            for row in l_g['items']:
                # first iteration, populate the first column with temporal_groups
                if i == 0:
                    csv_resp.append([row['datetime']])
                csv_resp[j].append(row['count'])
                j += 1
            i += 1

        csv_resp.insert(0, fields)
        csv_resp = make_csv(csv_resp)
        resp = make_response(csv_resp, 200)
        resp.headers['Content-Type'] = 'text/csv'
        filedate = datetime.now().strftime('%Y-%m-%d')
        resp.headers['Content-Disposition'] = 'attachment; filename=%s.csv' % filedate

    return resp
Ejemplo n.º 18
0
def validate_dataset(dataset_name):
    if not MetaTable.get_by_dataset_name(dataset_name):
        raise ValidationError('Invalid table name: {}.'.format(dataset_name))
Ejemplo n.º 19
0
# ===============
# Many methods in response.py rely on information that used to be provided
# by the old ParamValidator attributes. This namedtuple carries that same
# info around, and allows me to not have to rewrite any response code.

ValidatorResult = namedtuple('ValidatorResult', 'data errors warnings')


# converters
# ==========
# Callables which are used to convert request arguments to their correct types.

converters = {
    'agg': str,
    'buffer': int,
    'dataset': lambda x: MetaTable.get_by_dataset_name(x).point_table,
    'shapeset': lambda x: ShapeMetadata.get_by_dataset_name(x).shape_table,
    'data_type': str,
    'shape': lambda x: ShapeMetadata.get_by_dataset_name(x).shape_table,
    'dataset_name__in': lambda x: x.split(','),
    'date__time_of_day_ge': int,
    'date__time_of_day_le': int,
    'obs_date__ge': lambda x: parser.parse(x).date(),
    'obs_date__le': lambda x: parser.parse(x).date(),
    'date': lambda x: parser.parse(x).date(),
    'point_date': lambda x: parser.parse(x),
    'offset': int,
    'resolution': int,
    'geom': lambda x: make_fragment_str(extract_first_geometry_fragment(x)),
}
Ejemplo n.º 20
0
def validate(validator, request_args):
    """Validate a dictionary of arguments. Substitute all missing fields with
    defaults if not explicitly told to do otherwise.

    :param validator: what kind of validator to use
    :param request_args: dictionary of arguments from a request object

    :returns: ValidatorResult namedtuple"""

    args = request_args.copy()

    # For validator dataset_name__in... need to find a better way to
    # make it play nice with the validator.
    if args.get('dataset_name__in'):
        args['dataset_name__in'] = args['dataset_name__in'].split(',')

    # This first validation step covers conditions that are dataset
    # agnostic. These are values can be used to apply to all datasets
    # (ex. obs_date), or concern the format of the response (ex. limit,
    # datatype, offset).

    # If there are errors, fail quickly and return.
    result = validator.load(args)
    if result.errors:
        return result

    # If all arguments are valid, fill in validator defaults.
    result = validator.dump(result.data)

    # Certain values will be dumped as strings. This conversion
    # makes them into their corresponding type. (ex. Table)
    convert(result.data)

    # Holds messages concerning unnecessary parameters. These can be either
    # junk parameters, or redundant column parameters if a tree filter was
    # used.
    warnings = []

    # At this point validation splits. We can either validate tree-style column
    # arguments or validate them individually. We don't do both.

    # Determine unchecked parameters provided in the request.
    unchecked = set(args.keys()) - set(validator.fields.keys())

    # If tree filters were provided, ignore ALL unchecked parameters that are
    # not tree filters or response format information.
    if has_tree_filters(request_args):

        for key in request_args:
            value = args[key]
            if 'filter' in key:
                # This pattern matches the last occurrence of the '__' pattern.
                # Prevents an error that is caused by dataset names with trailing
                # underscores.
                t_name = re.split(r'__(?!_)', key)[0]

                # Report a filter which specifies a non-existent tree.
                try:
                    table = MetaTable.get_by_dataset_name(t_name).point_table
                except (AttributeError, NoSuchTableError):
                    try:
                        table = ShapeMetadata.get_by_dataset_name(t_name).shape_table
                    except (AttributeError, NoSuchTableError):
                        result.errors[t_name] = "Table name {} could not be found.".format(t_name)
                        return result

                # Report a tree which causes the JSON parser to fail.
                # Or a tree whose value is not valid.
                try:
                    cond_tree = json.loads(value)
                    if valid_tree(table, cond_tree):
                        result.data[key] = cond_tree
                except (ValueError, KeyError) as err:
                    result.errors[t_name] = "Bad tree: {} -- causes error {}.".format(value, err)
                    return result

            # These keys just have to do with the formatting of the JSON response.
            # We keep these values around even if they have no effect on a condition
            # tree.
            elif key in {'geom', 'offset', 'limit', 'agg', 'obs_date__le', 'obs_date__ge'}:
                pass

            # These keys are also ones that should be passed over when searching for
            # unused params. They are used, just in different forms later on, so no need
            # to report them.
            elif key in {'shape', 'dataset_name', 'dataset_name__in'}:
                pass

            # If the key is not a filter, and not used to format JSON, report
            # that we ignored it.
            else:
                warnings.append("Unused parameter {}, you cannot specify both "
                                "column and filter arguments.".format(key))

    # If no tree filters were provided, see if any of the unchecked parameters
    # are usable as column conditions.
    else:
        try:
            table = result.data['dataset']
        except KeyError:
            table = result.data.get('shapeset')
        for param in unchecked:
            field = param.split('__')[0]
            if table is not None:
                try:
                    valid_column_condition(table, field, args[param])
                    result.data[param] = args[param]
                except KeyError:
                    warnings.append('Unused parameter value "{}={}"'.format(param, args[param]))
                    warnings.append('{} is not a valid column for {}'.format(param, table))
                except ValueError:
                    warnings.append('Unused parameter value "{}={}"'.format(param, args[param]))
                    warnings.append('{} is not a valid value for {}'.format(args[param], param))

    # ValidatorResult(dict, dict, list)
    return ValidatorResult(result.data, result.errors, warnings)
Ejemplo n.º 21
0
def grid():
    raw_query_params = request.args.copy()

    # First, make sure name of dataset was provided...
    try:
        dataset_name = raw_query_params.pop('dataset_name')
    except KeyError:
        return bad_request("'dataset_name' is required")

    try:
        validator = ParamValidator(dataset_name)
    except NoSuchTableError:
        return bad_request(
            "Could not find dataset named {}.".format(dataset_name))

    validator.set_optional('buffer', int_validator, 100)\
             .set_optional('resolution', int_validator, 500)\
             .set_optional('location_geom__within', geom_validator, None)\
             .set_optional('obs_date__ge', date_validator, datetime.now() - timedelta(days=90))\
             .set_optional('obs_date__le', date_validator, datetime.now())\

    err = validator.validate(raw_query_params)
    if err:
        return bad_request(err)

    # Part 2: Construct SQL query
    try:
        dset = validator.dataset
        maker = FilterMaker(validator.vals, dataset=dset)
        # Get time filters
        time_filters = maker.time_filters()
        # From user params, wither get None or requested geometry
        geom = validator.get_geom()
    except Exception as e:
        return internal_error('Could not make time and geometry filters.', e)

    resolution = validator.vals['resolution']
    try:
        registry_row = MetaTable.get_by_dataset_name(dataset_name)
        grid_rows, size_x, size_y = registry_row.make_grid(
            resolution, geom, validator.conditions + time_filters)
    except Exception as e:
        return internal_error('Could not make grid aggregation.', e)

    resp = geojson_response_base()
    for value in grid_rows:
        if value[1]:
            pt = shapely.wkb.loads(value[1].decode('hex'))
            south, west = (pt.x - (size_x / 2)), (pt.y - (size_y / 2))
            north, east = (pt.x + (size_x / 2)), (pt.y + (size_y / 2))
            new_geom = shapely.geometry.box(south, west, north,
                                            east).__geo_interface__
        else:
            new_geom = None
        new_property = {
            'count': value[0],
        }
        add_geojson_feature(resp, new_geom, new_property)

    resp = make_response(json.dumps(resp, default=dthandler), 200)
    resp.headers['Content-Type'] = 'application/json'
    return resp
Ejemplo n.º 22
0
def timeseries():
    validator = TimeseriesValidator()

    deserialized_arguments = validator.load(request.args)
    serialized_arguments = json.loads(validator.dumps(deserialized_arguments.data).data)

    if deserialized_arguments.errors:
        return make_error(deserialized_arguments.errors, 400, serialized_arguments)

    qargs = deserialized_arguments.data

    agg = qargs['agg']
    data_type = qargs['data_type']
    geom = qargs['location_geom__within']
    pointset = qargs['dataset_name']
    pointsets = qargs['dataset_name__in']
    start_date = qargs['obs_date__ge']
    end_date = qargs['obs_date__le']

    ctrees = {}
    raw_ctrees = {}

    if has_tree_filters(request.args):
        # Timeseries is a little tricky. If there aren't filters,
        # it would be ridiculous to build a condition tree for every one.
        for field, value in list(request.args.items()):
            if 'filter' in field:
                # This pattern matches the last occurrence of the '__' pattern.
                # Prevents an error that is caused by dataset names with trailing
                # underscores.
                tablename = re.split(r'__(?!_)', field)[0]
                metarecord = MetaTable.get_by_dataset_name(tablename)
                pt = metarecord.point_table
                ctrees[pt.name] = parse_tree(pt, json.loads(value))
                raw_ctrees[pt.name] = json.loads(value)

    point_set_names = [p.name for p in pointsets + [pointset] if p is not None]
    if not point_set_names:
        point_set_names = MetaTable.index()

    results = MetaTable.timeseries_all(point_set_names, agg, start_date, end_date, geom, ctrees)

    payload = {
        'meta': {
            'message': [],
            'query': serialized_arguments,
            'status': 'ok',
            'total': len(results)
        },
        'objects': results
    }

    if ctrees:
        payload['meta']['query']['filters'] = raw_ctrees

    if data_type == 'json':
        return jsonify(payload)

    elif data_type == 'csv':

        # response format
        # temporal_group,dataset_name_1,dataset_name_2
        # 2014-02-24 00:00:00,235,653
        # 2014-03-03 00:00:00,156,624

        fields = ['temporal_group']
        for o in payload['objects']:
            fields.append(o['dataset_name'])

        csv_resp = []
        i = 0
        for k, g in groupby(payload['objects'], key=itemgetter('dataset_name')):
            l_g = list(g)[0]

            j = 0
            for row in l_g['items']:
                # first iteration, populate the first column with temporal_groups
                if i == 0:
                    csv_resp.append([row['datetime']])
                csv_resp[j].append(row['count'])
                j += 1
            i += 1

        csv_resp.insert(0, fields)
        csv_resp = make_csv(csv_resp)
        resp = make_response(csv_resp, 200)
        resp.headers['Content-Type'] = 'text/csv'
        filedate = datetime.now().strftime('%Y-%m-%d')
        resp.headers['Content-Disposition'] = 'attachment; filename=%s.csv' % filedate

        return resp