Esempio n. 1
0
def timeseries():
    validator = ParamValidator()\
        .set_optional('agg', agg_validator, 'week')\
        .set_optional('data_type', make_format_validator(['json', 'csv']), 'json')\
        .set_optional('dataset_name__in', list_of_datasets_validator, MetaTable.index)\
        .set_optional('obs_date__ge', date_validator, datetime.now() - timedelta(days=90))\
        .set_optional('obs_date__le', date_validator, datetime.now())\
        .set_optional('location_geom__within', geom_validator, None)\
        .set_optional('buffer', int_validator, 100)

    err = validator.validate(request.args)
    if err:
        return bad_request(err)

    geom = validator.get_geom()
    table_names = validator.vals['dataset_name__in']
    start_date = validator.vals['obs_date__ge']
    end_date = validator.vals['obs_date__le']
    agg = validator.vals['agg']

    # Only examine tables that have a chance of containing records within the date and space boundaries.
    try:
        table_names = MetaTable.narrow_candidates(table_names, start_date,
                                                  end_date, geom)
    except Exception as e:
        msg = 'Failed to gather candidate tables.'
        return internal_error(msg, e)

    try:
        panel = MetaTable.timeseries_all(table_names=table_names,
                                         agg_unit=agg,
                                         start=start_date,
                                         end=end_date,
                                         geom=geom)
    except Exception as e:
        msg = 'Failed to construct timeseries.'
        return internal_error(msg, e)

    panel = MetaTable.attach_metadata(panel)
    resp = json_response_base(validator, panel)

    datatype = validator.vals['data_type']
    if datatype == 'json':
        resp = make_response(json.dumps(resp, default=dthandler), 200)
        resp.headers['Content-Type'] = 'application/json'
    elif datatype == 'csv':

        # response format
        # temporal_group,dataset_name_1,dataset_name_2
        # 2014-02-24 00:00:00,235,653
        # 2014-03-03 00:00:00,156,624

        fields = ['temporal_group']
        for o in resp['objects']:
            fields.append(o['dataset_name'])

        csv_resp = []
        i = 0
        for k, g in groupby(resp['objects'], key=itemgetter('dataset_name')):
            l_g = list(g)[0]

            j = 0
            for row in l_g['items']:
                # first iteration, populate the first column with temporal_groups
                if i == 0:
                    csv_resp.append([row['datetime']])
                csv_resp[j].append(row['count'])
                j += 1
            i += 1

        csv_resp.insert(0, fields)
        csv_resp = make_csv(csv_resp)
        resp = make_response(csv_resp, 200)
        resp.headers['Content-Type'] = 'text/csv'
        filedate = datetime.now().strftime('%Y-%m-%d')
        resp.headers[
            'Content-Disposition'] = 'attachment; filename=%s.csv' % filedate
    return resp
Esempio n. 2
0
def _timeseries(args):

    meta_params = ['geom', 'dataset', 'dataset_name__in', 'obs_date__ge', 'obs_date__le', 'agg']
    meta_vals = [args.data.get(k) for k in meta_params]
    geom, dataset, table_names, start_date, end_date, agg = meta_vals

    ctrees = {}

    if has_tree_filters(args.data):
        # Timeseries is a little tricky. If there aren't filters,
        # it would be ridiculous to build a condition tree for every one.
        for field, value in args.data.items():
            if 'filter' in field:
                # This pattern matches the last occurrence of the '__' pattern.
                # Prevents an error that is caused by dataset names with trailing
                # underscores.
                tablename = re.split(r'__(?!_)', field)[0]
                metarecord = MetaTable.get_by_dataset_name(tablename)
                pt = metarecord.point_table
                ctrees[pt.name] = parse_tree(pt, value)
        # Just cleanliness, since we don't use this argument. Doesn't have
        # to show up in the JSON response.
        del args.data['dataset']

    # If no dataset_name__in list was provided, have to fill it in by invoking
    # MetaTable.index() here! Not in the validator. This way the list stays up
    # to date.
    if table_names is None:
        table_names = MetaTable.index()
        args.data['dataset_name__in'] = table_names

    # If a single dataset was provided, it's the only thing we need to consider.
    if dataset is not None:
        table_names = [dataset.name]
        del args.data['dataset_name__in']

    # remove table names which wouldn't return anything for the query, given
    # the time and geom constraints
    try:
        table_names = MetaTable.narrow_candidates(table_names, start_date, end_date, geom)
    except Exception as e:
        msg = 'Failed to gather candidate tables.'
        return internal_error(msg, e)

    # If there aren't any table names, it causes an error down the code. Better
    # to return and inform them that the request wouldn't have found anything.
    if not table_names:
        return bad_request("Your request doesn't return any results. Try "
                           "adjusting your time constraint or location "
                           "parameters.")

    try:
        panel = MetaTable.timeseries_all(
            table_names, agg, start_date, end_date, geom, ctrees
        )
    except Exception as e:
        msg = 'Failed to construct timeseries.'
        return internal_error(msg, e)

    panel = MetaTable.attach_metadata(panel)
    resp = json_response_base(args, panel, args.data)

    datatype = args.data['data_type']
    if datatype == 'json':
        resp = make_response(json.dumps(resp, default=unknown_object_json_handler), 200)
        resp.headers['Content-Type'] = 'application/json'
    elif datatype == 'csv':

        # response format
        # temporal_group,dataset_name_1,dataset_name_2
        # 2014-02-24 00:00:00,235,653
        # 2014-03-03 00:00:00,156,624

        fields = ['temporal_group']
        for o in resp['objects']:
            fields.append(o['dataset_name'])

        csv_resp = []
        i = 0
        for k, g in groupby(resp['objects'], key=itemgetter('dataset_name')):
            l_g = list(g)[0]

            j = 0
            for row in l_g['items']:
                # first iteration, populate the first column with temporal_groups
                if i == 0:
                    csv_resp.append([row['datetime']])
                csv_resp[j].append(row['count'])
                j += 1
            i += 1

        csv_resp.insert(0, fields)
        csv_resp = make_csv(csv_resp)
        resp = make_response(csv_resp, 200)
        resp.headers['Content-Type'] = 'text/csv'
        filedate = datetime.now().strftime('%Y-%m-%d')
        resp.headers['Content-Disposition'] = 'attachment; filename=%s.csv' % filedate

    return resp
Esempio n. 3
0
def timeseries():
    validator = ParamValidator()\
        .set_optional('agg', agg_validator, 'week')\
        .set_optional('data_type', make_format_validator(['json', 'csv']), 'json')\
        .set_optional('dataset_name__in', list_of_datasets_validator, MetaTable.index)\
        .set_optional('obs_date__ge', date_validator, datetime.now() - timedelta(days=90))\
        .set_optional('obs_date__le', date_validator, datetime.now())\
        .set_optional('location_geom__within', geom_validator, None)\
        .set_optional('buffer', int_validator, 100)

    err = validator.validate(request.args)
    if err:
        return bad_request(err)

    geom = validator.get_geom()
    table_names = validator.vals['dataset_name__in']
    start_date = validator.vals['obs_date__ge']
    end_date = validator.vals['obs_date__le']
    agg = validator.vals['agg']

    # Only examine tables that have a chance of containing records within the date and space boundaries.
    try:
        table_names = MetaTable.narrow_candidates(table_names, start_date, end_date, geom)
    except Exception as e:
        msg = 'Failed to gather candidate tables.'
        return internal_error(msg, e)

    try:
        panel = MetaTable.timeseries_all(table_names=table_names,
                                         agg_unit=agg,
                                         start=start_date,
                                         end=end_date,
                                         geom=geom)
    except Exception as e:
        msg = 'Failed to construct timeseries.'
        return internal_error(msg, e)

    panel = MetaTable.attach_metadata(panel)
    resp = json_response_base(validator, panel)

    datatype = validator.vals['data_type']
    if datatype == 'json':
        resp = make_response(json.dumps(resp, default=dthandler), 200)
        resp.headers['Content-Type'] = 'application/json'
    elif datatype == 'csv':

        # response format
        # temporal_group,dataset_name_1,dataset_name_2
        # 2014-02-24 00:00:00,235,653
        # 2014-03-03 00:00:00,156,624

        fields = ['temporal_group']
        for o in resp['objects']:
            fields.append(o['dataset_name'])

        csv_resp = []
        i = 0
        for k,g in groupby(resp['objects'], key=itemgetter('dataset_name')):
            l_g = list(g)[0]

            j = 0
            for row in l_g['items']:
                # first iteration, populate the first column with temporal_groups
                if i == 0:
                    csv_resp.append([row['datetime']])
                csv_resp[j].append(row['count'])
                j += 1
            i += 1

        csv_resp.insert(0, fields)
        csv_resp = make_csv(csv_resp)
        resp = make_response(csv_resp, 200)
        resp.headers['Content-Type'] = 'text/csv'
        filedate = datetime.now().strftime('%Y-%m-%d')
        resp.headers['Content-Disposition'] = 'attachment; filename=%s.csv' % filedate
    return resp
Esempio n. 4
0
def _timeseries(args):

    geom = args.data['geom']
    dataset = args.data.get('dataset')
    table_names = args.data['dataset_name__in']
    start_date = args.data['obs_date__ge']
    end_date = args.data['obs_date__le']
    agg = args.data['agg']

    # if a single dataset was provided, it's the only thing we need to consider
    if dataset is not None:
        table_names = [dataset.name]
        # for the query's meta information, so that it doesn't show the index
        del args.data['dataset_name__in']

    # remove table names which wouldn't return anything for the query, given
    # the time and geom constraints
    try:
        table_names = MetaTable.narrow_candidates(table_names, start_date, end_date, geom)
    except Exception as e:
        msg = 'Failed to gather candidate tables.'
        return internal_error(msg, e)

    # If there aren't any table names, it causes an error down the code. Better
    # to return and inform them that the request wouldn't have found anything.
    if not table_names:
        return bad_request("Your request doesn't return any results. Try "
                           "adjusting your time constraint or location "
                           "parameters.")

    try:
        panel = MetaTable.timeseries_all(
            table_names, agg, start_date, end_date, geom
        )
    except Exception as e:
        msg = 'Failed to construct timeseries.'
        return internal_error(msg, e)

    panel = MetaTable.attach_metadata(panel)
    resp = json_response_base(args, panel, args.data)

    datatype = args.data['data_type']
    if datatype == 'json':
        resp = make_response(json.dumps(resp, default=unknown_object_json_handler), 200)
        resp.headers['Content-Type'] = 'application/json'
    elif datatype == 'csv':

        # response format
        # temporal_group,dataset_name_1,dataset_name_2
        # 2014-02-24 00:00:00,235,653
        # 2014-03-03 00:00:00,156,624

        fields = ['temporal_group']
        for o in resp['objects']:
            fields.append(o['dataset_name'])

        csv_resp = []
        i = 0
        for k, g in groupby(resp['objects'], key=itemgetter('dataset_name')):
            l_g = list(g)[0]

            j = 0
            for row in l_g['items']:
                # first iteration, populate the first column with temporal_groups
                if i == 0:
                    csv_resp.append([row['datetime']])
                csv_resp[j].append(row['count'])
                j += 1
            i += 1

        csv_resp.insert(0, fields)
        csv_resp = make_csv(csv_resp)
        resp = make_response(csv_resp, 200)
        resp.headers['Content-Type'] = 'text/csv'
        filedate = datetime.now().strftime('%Y-%m-%d')
        resp.headers['Content-Disposition'] = 'attachment; filename=%s.csv' % filedate

    return resp
Esempio n. 5
0
def timeseries():
    validator = TimeseriesValidator()

    deserialized_arguments = validator.load(request.args)
    serialized_arguments = json.loads(validator.dumps(deserialized_arguments.data).data)

    if deserialized_arguments.errors:
        return make_error(deserialized_arguments.errors, 400, serialized_arguments)

    qargs = deserialized_arguments.data

    agg = qargs['agg']
    data_type = qargs['data_type']
    geom = qargs['location_geom__within']
    pointset = qargs['dataset_name']
    pointsets = qargs['dataset_name__in']
    start_date = qargs['obs_date__ge']
    end_date = qargs['obs_date__le']

    ctrees = {}
    raw_ctrees = {}

    if has_tree_filters(request.args):
        # Timeseries is a little tricky. If there aren't filters,
        # it would be ridiculous to build a condition tree for every one.
        for field, value in list(request.args.items()):
            if 'filter' in field:
                # This pattern matches the last occurrence of the '__' pattern.
                # Prevents an error that is caused by dataset names with trailing
                # underscores.
                tablename = re.split(r'__(?!_)', field)[0]
                metarecord = MetaTable.get_by_dataset_name(tablename)
                pt = metarecord.point_table
                ctrees[pt.name] = parse_tree(pt, json.loads(value))
                raw_ctrees[pt.name] = json.loads(value)

    point_set_names = [p.name for p in pointsets + [pointset] if p is not None]
    if not point_set_names:
        point_set_names = MetaTable.index()

    results = MetaTable.timeseries_all(point_set_names, agg, start_date, end_date, geom, ctrees)

    payload = {
        'meta': {
            'message': [],
            'query': serialized_arguments,
            'status': 'ok',
            'total': len(results)
        },
        'objects': results
    }

    if ctrees:
        payload['meta']['query']['filters'] = raw_ctrees

    if data_type == 'json':
        return jsonify(payload)

    elif data_type == 'csv':

        # response format
        # temporal_group,dataset_name_1,dataset_name_2
        # 2014-02-24 00:00:00,235,653
        # 2014-03-03 00:00:00,156,624

        fields = ['temporal_group']
        for o in payload['objects']:
            fields.append(o['dataset_name'])

        csv_resp = []
        i = 0
        for k, g in groupby(payload['objects'], key=itemgetter('dataset_name')):
            l_g = list(g)[0]

            j = 0
            for row in l_g['items']:
                # first iteration, populate the first column with temporal_groups
                if i == 0:
                    csv_resp.append([row['datetime']])
                csv_resp[j].append(row['count'])
                j += 1
            i += 1

        csv_resp.insert(0, fields)
        csv_resp = make_csv(csv_resp)
        resp = make_response(csv_resp, 200)
        resp.headers['Content-Type'] = 'text/csv'
        filedate = datetime.now().strftime('%Y-%m-%d')
        resp.headers['Content-Disposition'] = 'attachment; filename=%s.csv' % filedate

        return resp