def detail_aggregate(): raw_query_params = request.args.copy() agg, datatype, queries = parse_join_query(raw_query_params) if not agg: agg = 'day' # if no obs_date given, default to >= 90 days ago if not raw_query_params.get('obs_date__ge'): six_months_ago = datetime.now() - timedelta(days=90) raw_query_params['obs_date__ge'] = six_months_ago.strftime('%Y-%m-%d') if not raw_query_params.get('obs_date__le'): raw_query_params['obs_date__le'] = datetime.now().strftime('%Y-%m-%d') mt = MasterTable.__table__ valid_query, base_clauses, resp, status_code = make_query( mt, queries['base']) # check for valid output format if datatype not in VALID_DATA_TYPE: valid_query = False resp['meta']['status'] = 'error' resp['meta']['message'] = "'%s' is an invalid output format" % datatype resp = make_response(json.dumps(resp, default=dthandler), 400) resp.headers['Content-Type'] = 'application/json' # check for valid temporal aggregate if agg not in VALID_AGG: valid_query = False resp['meta']['status'] = 'error' resp['meta'][ 'message'] = "'%s' is an invalid temporal aggregation" % agg resp = make_response(json.dumps(resp, default=dthandler), 400) resp.headers['Content-Type'] = 'application/json' if valid_query: time_agg = func.date_trunc(agg, mt.c['obs_date']) base_query = session.query(time_agg, func.count(mt.c.dataset_row_id)) dname = raw_query_params.get('dataset_name') try: dataset = Table('dat_%s' % dname, Base.metadata, autoload=True, autoload_with=engine, extend_existing=True) valid_query, detail_clauses, resp, status_code = make_query( dataset, queries['detail']) except: valid_query = False resp['meta']['status'] = 'error' if not dname: resp['meta']['message'] = "dataset_name' is required" else: resp['meta']['message'] = "unable to find dataset '%s'" % dname resp = make_response(json.dumps(resp, default=dthandler), 400) resp.headers['Content-Type'] = 'application/json' if valid_query: pk = [p.name for p in dataset.primary_key][0] base_query = base_query.join(dataset, mt.c.dataset_row_id == dataset.c[pk]) for clause in base_clauses: base_query = base_query.filter(clause) for clause in detail_clauses: base_query = base_query.filter(clause) values = [ r for r in base_query.group_by(time_agg).order_by( time_agg).all() ] # init from and to dates ad python datetimes from_date = truncate(parse(raw_query_params['obs_date__ge']), agg) if 'obs_date__le' in raw_query_params.keys(): to_date = parse(raw_query_params['obs_date__le']) else: to_date = datetime.now() items = [] dense_matrix = [] cursor = from_date v_index = 0 while cursor <= to_date: if v_index < len(values) and \ values[v_index][0].replace(tzinfo=None) == cursor: dense_matrix.append((cursor, values[v_index][1])) v_index += 1 else: dense_matrix.append((cursor, 0)) cursor = increment_datetime_aggregate(cursor, agg) dense_matrix = OrderedDict(dense_matrix) for k in dense_matrix: i = { 'datetime': k, 'count': dense_matrix[k], } items.append(i) if datatype == 'json': resp['objects'] = items resp['meta']['status'] = 'ok' resp['meta']['query'] = raw_query_params loc = resp['meta']['query'].get('location_geom__within') if loc: resp['meta']['query'][ 'location_geom__within'] = json.loads(loc) resp['meta']['query']['agg'] = agg resp = make_response(json.dumps(resp, default=dthandler), status_code) resp.headers['Content-Type'] = 'application/json' elif datatype == 'csv': outp = StringIO() writer = csv.DictWriter(outp, fieldnames=items[0].keys()) writer.writeheader() writer.writerows(items) resp = make_response(outp.getvalue(), status_code) resp.headers['Content-Type'] = 'text/csv' filedate = datetime.now().strftime('%Y-%m-%d') resp.headers[ 'Content-Disposition'] = 'attachment; filename=%s.csv' % ( filedate) return resp
def detail_aggregate(): raw_query_params = request.args.copy() agg, datatype, queries = parse_join_query(raw_query_params) if not agg: agg = 'day' # if no obs_date given, default to >= 90 days ago if not raw_query_params.get('obs_date__ge'): six_months_ago = datetime.now() - timedelta(days=90) raw_query_params['obs_date__ge'] = six_months_ago.strftime('%Y-%m-%d') if not raw_query_params.get('obs_date__le'): raw_query_params['obs_date__le'] = datetime.now().strftime('%Y-%m-%d') mt = MasterTable.__table__ valid_query, base_clauses, resp, status_code = make_query(mt, queries['base']) # check for valid output format if datatype not in VALID_DATA_TYPE: valid_query = False resp['meta']['status'] = 'error' resp['meta']['message'] = "'%s' is an invalid output format" % datatype resp = make_response(json.dumps(resp, default=dthandler), 400) resp.headers['Content-Type'] = 'application/json' # check for valid temporal aggregate if agg not in VALID_AGG: valid_query = False resp['meta']['status'] = 'error' resp['meta']['message'] = "'%s' is an invalid temporal aggregation" % agg resp = make_response(json.dumps(resp, default=dthandler), 400) resp.headers['Content-Type'] = 'application/json' if valid_query: time_agg = func.date_trunc(agg, mt.c['obs_date']) base_query = session.query(time_agg, func.count(mt.c.dataset_row_id)) dname = raw_query_params.get('dataset_name') try: dataset = Table('dat_%s' % dname, Base.metadata, autoload=True, autoload_with=engine, extend_existing=True) valid_query, detail_clauses, resp, status_code = make_query(dataset, queries['detail']) except: valid_query = False resp['meta']['status'] = 'error' if not dname: resp['meta']['message'] = "dataset_name' is required" else: resp['meta']['message'] = "unable to find dataset '%s'" % dname resp = make_response(json.dumps(resp, default=dthandler), 400) resp.headers['Content-Type'] = 'application/json' if valid_query: pk = [p.name for p in dataset.primary_key][0] base_query = base_query.join(dataset, mt.c.dataset_row_id == dataset.c[pk]) for clause in base_clauses: base_query = base_query.filter(clause) for clause in detail_clauses: base_query = base_query.filter(clause) values = [r for r in base_query.group_by(time_agg).order_by(time_agg).all()] # init from and to dates ad python datetimes from_date = truncate(parse(raw_query_params['obs_date__ge']), agg) if 'obs_date__le' in raw_query_params.keys(): to_date = parse(raw_query_params['obs_date__le']) else: to_date = datetime.now() items = [] dense_matrix = [] cursor = from_date v_index = 0 while cursor <= to_date: if v_index < len(values) and \ values[v_index][0].replace(tzinfo=None) == cursor: dense_matrix.append((cursor, values[v_index][1])) v_index += 1 else: dense_matrix.append((cursor, 0)) cursor = increment_datetime_aggregate(cursor, agg) dense_matrix = OrderedDict(dense_matrix) for k in dense_matrix: i = { 'datetime': k, 'count': dense_matrix[k], } items.append(i) if datatype == 'json': resp['objects'] = items resp['meta']['status'] = 'ok' resp['meta']['query'] = raw_query_params loc = resp['meta']['query'].get('location_geom__within') if loc: resp['meta']['query']['location_geom__within'] = json.loads(loc) resp['meta']['query']['agg'] = agg resp = make_response(json.dumps(resp, default=dthandler), status_code) resp.headers['Content-Type'] = 'application/json' elif datatype == 'csv': outp = StringIO() writer = csv.DictWriter(outp, fieldnames=items[0].keys()) writer.writeheader() writer.writerows(items) resp = make_response(outp.getvalue(), status_code) resp.headers['Content-Type'] = 'text/csv' filedate = datetime.now().strftime('%Y-%m-%d') resp.headers['Content-Disposition'] = 'attachment; filename=%s.csv' % (filedate) return resp
def dataset(): raw_query_params = request.args.copy() # set default value for temporal aggregation agg = raw_query_params.get('agg') if not agg: agg = 'day' else: del raw_query_params['agg'] # if no obs_date given, default to >= 90 days ago if not raw_query_params.get('obs_date__ge'): six_months_ago = datetime.now() - timedelta(days=90) raw_query_params['obs_date__ge'] = six_months_ago.strftime('%Y-%m-%d') if not raw_query_params.get('obs_date__le'): raw_query_params['obs_date__le'] = datetime.now().strftime('%Y-%m-%d') # set datatype datatype = 'json' if raw_query_params.get('data_type'): datatype = raw_query_params['data_type'] del raw_query_params['data_type'] q = ''' SELECT m.dataset_name FROM meta_master AS m LEFT JOIN celery_taskmeta AS c ON c.id = ( SELECT id FROM celery_taskmeta WHERE task_id = ANY(m.result_ids) ORDER BY date_done DESC LIMIT 1 ) WHERE m.approved_status = 'true' AND c.status = 'SUCCESS' ''' with engine.begin() as c: dataset_names = [d[0] for d in c.execute(q)] raw_query_params['dataset_name__in'] = ','.join(dataset_names) mt = MasterTable.__table__ valid_query, query_clauses, resp, status_code = make_query( mt, raw_query_params) # check for valid output format if datatype not in VALID_DATA_TYPE: valid_query = False resp['meta']['status'] = 'error' resp['meta']['message'] = "'%s' is an invalid output format" % datatype resp = make_response(json.dumps(resp, default=dthandler), 400) resp.headers['Content-Type'] = 'application/json' # check for valid temporal aggregate if agg not in VALID_AGG: valid_query = False resp['meta']['status'] = 'error' resp['meta'][ 'message'] = "'%s' is an invalid temporal aggregation" % agg resp = make_response(json.dumps(resp, default=dthandler), 400) resp.headers['Content-Type'] = 'application/json' if valid_query: time_agg = func.date_trunc(agg, mt.c['obs_date']) base_query = session.query(time_agg, func.count(mt.c['obs_date']), mt.c['dataset_name']) base_query = base_query.filter(mt.c['current_flag'] == True) for clause in query_clauses: base_query = base_query.filter(clause) base_query = base_query.group_by(mt.c['dataset_name'])\ .group_by(time_agg)\ .order_by(time_agg) values = [o for o in base_query.all()] # init from and to dates ad python datetimes from_date = truncate(parse(raw_query_params['obs_date__ge']), agg) if 'obs_date__le' in raw_query_params.keys(): to_date = parse(raw_query_params['obs_date__le']) else: to_date = datetime.now() # build the response results = sorted(values, key=itemgetter(2)) for k, g in groupby(results, key=itemgetter(2)): d = {'dataset_name': k} items = [] dense_matrix = [] cursor = from_date v_index = 0 dataset_values = list(g) while cursor <= to_date: if v_index < len(dataset_values) and \ dataset_values[v_index][0].replace(tzinfo=None) == cursor: dense_matrix.append((cursor, dataset_values[v_index][1])) v_index += 1 else: dense_matrix.append((cursor, 0)) cursor = increment_datetime_aggregate(cursor, agg) dense_matrix = OrderedDict(dense_matrix) for k in dense_matrix: i = { 'datetime': k, 'count': dense_matrix[k], } items.append(i) d['items'] = items resp['objects'].append(d) resp['meta']['query'] = raw_query_params loc = resp['meta']['query'].get('location_geom__within') if loc: resp['meta']['query']['location_geom__within'] = json.loads(loc) resp['meta']['query']['agg'] = agg resp['meta']['status'] = 'ok' if datatype == 'json': resp = make_response(json.dumps(resp, default=dthandler), status_code) resp.headers['Content-Type'] = 'application/json' elif datatype == 'csv': # response format # temporal_group,dataset_name_1,dataset_name_2 # 2014-02-24 00:00:00,235,653 # 2014-03-03 00:00:00,156,624 fields = ['temporal_group'] for o in resp['objects']: fields.append(o['dataset_name']) csv_resp = [] i = 0 for k, g in groupby(resp['objects'], key=itemgetter('dataset_name')): l_g = list(g)[0] j = 0 for row in l_g['items']: # first iteration, populate the first column with temporal_groups if i == 0: csv_resp.append([row['datetime']]) csv_resp[j].append(row['count']) j += 1 i += 1 csv_resp.insert(0, fields) csv_resp = make_csv(csv_resp) resp = make_response(csv_resp, 200) resp.headers['Content-Type'] = 'text/csv' filedate = datetime.now().strftime('%Y-%m-%d') resp.headers[ 'Content-Disposition'] = 'attachment; filename=%s.csv' % ( filedate) return resp
def dataset(): raw_query_params = request.args.copy() # set default value for temporal aggregation agg = raw_query_params.get('agg') if not agg: agg = 'day' else: del raw_query_params['agg'] # if no obs_date given, default to >= 90 days ago if not raw_query_params.get('obs_date__ge'): six_months_ago = datetime.now() - timedelta(days=90) raw_query_params['obs_date__ge'] = six_months_ago.strftime('%Y-%m-%d') if not raw_query_params.get('obs_date__le'): raw_query_params['obs_date__le'] = datetime.now().strftime('%Y-%m-%d') # set datatype datatype = 'json' if raw_query_params.get('data_type'): datatype = raw_query_params['data_type'] del raw_query_params['data_type'] mt = MasterTable.__table__ valid_query, query_clauses, resp, status_code = make_query(mt,raw_query_params) # check for valid output format if datatype not in VALID_DATA_TYPE: valid_query = False resp['meta']['status'] = 'error' resp['meta']['message'] = "'%s' is an invalid output format" % datatype resp = make_response(json.dumps(resp, default=dthandler), 400) resp.headers['Content-Type'] = 'application/json' # check for valid temporal aggregate if agg not in VALID_AGG: valid_query = False resp['meta']['status'] = 'error' resp['meta']['message'] = "'%s' is an invalid temporal aggregation" % agg resp = make_response(json.dumps(resp, default=dthandler), 400) resp.headers['Content-Type'] = 'application/json' if valid_query: time_agg = func.date_trunc(agg, mt.c['obs_date']) base_query = session.query(time_agg, func.count(mt.c['obs_date']), mt.c['dataset_name']) base_query = base_query.filter(mt.c['current_flag'] == True) for clause in query_clauses: base_query = base_query.filter(clause) base_query = base_query.group_by(mt.c['dataset_name'])\ .group_by(time_agg)\ .order_by(time_agg) values = [o for o in base_query.all()] # init from and to dates ad python datetimes from_date = truncate(parse(raw_query_params['obs_date__ge']), agg) if 'obs_date__le' in raw_query_params.keys(): to_date = parse(raw_query_params['obs_date__le']) else: to_date = datetime.now() # build the response results = sorted(values, key=itemgetter(2)) for k,g in groupby(results, key=itemgetter(2)): d = {'dataset_name': k} items = [] dense_matrix = [] cursor = from_date v_index = 0 dataset_values = list(g) while cursor <= to_date: if v_index < len(dataset_values) and \ dataset_values[v_index][0].replace(tzinfo=None) == cursor: dense_matrix.append((cursor, dataset_values[v_index][1])) v_index += 1 else: dense_matrix.append((cursor, 0)) cursor = increment_datetime_aggregate(cursor, agg) dense_matrix = OrderedDict(dense_matrix) for k in dense_matrix: i = { 'datetime': k, 'count': dense_matrix[k], } items.append(i) d['items'] = items resp['objects'].append(d) resp['meta']['query'] = raw_query_params loc = resp['meta']['query'].get('location_geom__within') if loc: resp['meta']['query']['location_geom__within'] = json.loads(loc) resp['meta']['query']['agg'] = agg resp['meta']['status'] = 'ok' if datatype == 'json': resp = make_response(json.dumps(resp, default=dthandler), status_code) resp.headers['Content-Type'] = 'application/json' elif datatype == 'csv': # response format # temporal_group,dataset_name_1,dataset_name_2 # 2014-02-24 00:00:00,235,653 # 2014-03-03 00:00:00,156,624 fields = ['temporal_group'] for o in resp['objects']: fields.append(o['dataset_name']) csv_resp = [] i = 0 for k,g in groupby(resp['objects'], key=itemgetter('dataset_name')): l_g = list(g)[0] j = 0 for row in l_g['items']: # first iteration, populate the first column with temporal_groups if i == 0: csv_resp.append([row['datetime']]) csv_resp[j].append(row['count']) j += 1 i += 1 csv_resp.insert(0, fields) csv_resp = make_csv(csv_resp) resp = make_response(csv_resp, 200) resp.headers['Content-Type'] = 'text/csv' filedate = datetime.now().strftime('%Y-%m-%d') resp.headers['Content-Disposition'] = 'attachment; filename=%s.csv' % (filedate) return resp