def test_truncate_to_week(self): self.assertEqual(truncate(self.default_dt, 'week'), self.default_dt.replace(day=9, hour=0, minute=0, second=0, microsecond=0)) self.assertEqual(truncate(self.default_dt.replace(day=9), 'week'), self.default_dt.replace(day=9, hour=0, minute=0, second=0, microsecond=0)) self.assertEqual(truncate(self.default_dt.replace(day=16), 'week'), self.default_dt.replace(day=16, hour=0, minute=0, second=0, microsecond=0)) self.assertEqual(truncate_week(self.default_dt), self.default_dt.replace(day=9, hour=0, minute=0, second=0, microsecond=0)) self.assertEqual(truncate_week(self.default_dt.replace(day=9)), self.default_dt.replace(day=9, hour=0, minute=0, second=0, microsecond=0)) self.assertEqual(truncate_week(self.default_dt.replace(day=16)), self.default_dt.replace(day=16, hour=0, minute=0, second=0, microsecond=0))
def test_truncat_to_half_year(self): self.assertEqual( truncate(self.default_dt.replace(month=6), 'half_year'), self.default_dt.replace(month=1, day=1, hour=0, minute=0, second=0, microsecond=0)) self.assertEqual( truncate_half_year(self.default_dt.replace(month=6)), self.default_dt.replace(month=1, day=1, hour=0, minute=0, second=0, microsecond=0)) self.assertEqual( truncate(self.default_dt, 'half_year'), self.default_dt.replace(month=7, day=1, hour=0, minute=0, second=0, microsecond=0)) self.assertEqual( truncate_half_year(self.default_dt), self.default_dt.replace(month=7, day=1, hour=0, minute=0, second=0, microsecond=0))
def date_starting_periods(user, period_len): start_end = date_start_end(user) start = start_end['start'] end = start_end['end'] start_trunc = truncate(dt.datetime.combine(start, dt.datetime.min.time()), PY_TRUNC[period_len]) end_trunc = truncate(dt.datetime.combine(end, dt.datetime.min.time()), PY_TRUNC[period_len]) date_index = pd.period_range(start=start_trunc, end=end_trunc, freq=period_len.upper()).to_timestamp() return date_index.date
def emit_geo_file(relpath, year): category_patterns = ( 'VEHICLE - STOLEN', 'THEFT FROM MOTOR VEHICLE - PETTY ($950.01 & OVER)', 'ROBBERY', 'THEFT', 'BURGLARY', 'BIKE - STOLEN', 'THEFT-GRAND', ) for d in DictReader(open(relpath, 'r'), delimiter=',', quotechar='"'): description = d['Crm Cd Desc'] if not any([p in description for p in category_patterns]): continue if 'ATTEMPT' in description: continue day = truncate(parse_date(d['DATE OCC']), 'day') if day.year not in (year, year-1): continue latlon = d['Location 1'].strip('()').split(',') if len(latlon) != 2: continue if not eagle_rock.contains(*latlon): continue emit_crime(day, description)
def net_flows_emissions( region: str, data: List[RegionFlowEmissionsResult], interval: TimeInterval, ) -> Dict[str, List[DataQueryResult]]: """ Calculates net region flow emissions for a region from a RegionFlowResult """ output_set = {} for k, v in groupby(data, lambda x: truncate(x.interval, interval.trunc)): values = list(v) if k not in output_set: output_set[k] = { "imports": 0.0, "exports": 0.0, } export_emissions_sum = 0.0 import_emissions_sum = 0.0 # Sum up for es in values: if not es.flow_from: continue if es.flow_from == region: if es.energy > 0: if es.flow_from_intensity: export_emissions_sum += abs(es.flow_from_emissions) else: if es.flow_to_emissions: import_emissions_sum += abs(es.flow_to_emissions) if es.flow_to == region: if es.energy < 0: if es.flow_from_emissions: export_emissions_sum += abs(es.flow_from_emissions) else: if es.flow_to_emissions: import_emissions_sum += abs(es.flow_to_emissions) output_set[k]["imports"] = import_emissions_sum output_set[k]["exports"] = export_emissions_sum imports_list = [] exports_list = [] for interval, data in output_set.items(): imports_list.append( DataQueryResult(interval=interval, group_by="imports", result=data["imports"]) ) exports_list.append( DataQueryResult(interval=interval, group_by="exports", result=data["exports"]) ) return {"imports": imports_list, "exports": exports_list}
def update_type3(coll, doc, bksz, request): coll.update_one( { "tag_group_nm": doc["group_nm"], "tag_nm": doc["tag_nm"], "date": truncate(doc["ts"], str(bksz) + '_minute').strftime("%Y-%m-%d %H%M") }, { "$min": { "first": doc["ts"] }, "$max": { "last": doc["ts"] }, "$inc": { "nsamples": 1, "total": doc["temp"] }, "$set": { "val": doc["temp"] }, "$addToSet": { "tag": { "ts": doc["ts"], "val": doc["temp"] } } #"$addToSet": {"tag": {doc["ts"]:doc["temp"]}} }, upsert=True)
def get_interval(flow_result: RegionFlowResult) -> datetime: value = flow_result.interval if interval: value = truncate(value, interval.trunc) return value
def update_type4(coll, doc, bulkopt, request): coll.update_one( { "tag.group": doc["group_nm"], "tag.nm": doc["tag_nm"], "tag.unit": doc["unit"], "date": truncate(doc["ts"], '10_minute').strftime("%Y-%m-%d %H%M") }, { "$min": { "first": doc["ts"], "val.min": doc["temp"] }, "$max": { "last": doc["ts"], "val.max": doc["temp"] }, "$inc": { "nsamples": 1, "total": doc["temp"] }, "$set": { "val.last": doc["temp"] }, "$addToSet": { "trend": { "ts": doc["ts"], "val": doc["temp"] } } #"$addToSet": {"tag": {doc["ts"]:doc["temp"]}} }, upsert=True)
def truncate_values(values: Iterator[IndicatorValue], unit: str = 'day') -> Dict[str, List[IndicatorValue]]: truncated = collections.defaultdict(list) for v in values: truncated[truncate(v.timestamp, unit)].append(v.value) return truncated
def test_truncate_to_month(self): self.assertEqual( truncate(self.default_dt, 'month'), self.default_dt.replace(day=1, hour=0, minute=0, second=0, microsecond=0))
def test_truncate_to_week(self): self.assertEqual( truncate(self.default_dt, 'week'), self.default_dt.replace(day=9, hour=0, minute=0, second=0, microsecond=0)) self.assertEqual( truncate(self.default_dt.replace(day=9), 'week'), self.default_dt.replace(day=9, hour=0, minute=0, second=0, microsecond=0)) self.assertEqual( truncate(self.default_dt.replace(day=16), 'week'), self.default_dt.replace(day=16, hour=0, minute=0, second=0, microsecond=0)) self.assertEqual( truncate_week(self.default_dt), self.default_dt.replace(day=9, hour=0, minute=0, second=0, microsecond=0)) self.assertEqual( truncate_week(self.default_dt.replace(day=9)), self.default_dt.replace(day=9, hour=0, minute=0, second=0, microsecond=0)) self.assertEqual( truncate_week(self.default_dt.replace(day=16)), self.default_dt.replace(day=16, hour=0, minute=0, second=0, microsecond=0))
def datetime_truncate(d, truncate_to='day'): if isinstance(d, datetime.date): d = datetime.datetime.combine(d, datetime.datetime.min.time()) try: start_d = truncate(d, truncate_to) except: num, day = split(truncate_to, '_') if day == 'day': start_d = add_days(d, num) return start_d
def test_truncat_to_half_year(self): self.assertEqual( truncate(self.default_dt.replace(month=6), 'half_year'), self.default_dt.replace(month=1, day=1, hour=0, minute=0, second=0, microsecond=0) ) self.assertEqual( truncate_half_year(self.default_dt.replace(month=6)), self.default_dt.replace(month=1, day=1, hour=0, minute=0, second=0, microsecond=0) ) self.assertEqual(truncate(self.default_dt, 'half_year'), self.default_dt.replace(month=7, day=1, hour=0, minute=0, second=0, microsecond=0)) self.assertEqual(truncate_half_year(self.default_dt), self.default_dt.replace(month=7, day=1, hour=0, minute=0, second=0, microsecond=0))
def test_truncate_to_quarter(self): self.assertEqual(truncate(self.default_dt.replace(month=2), 'quarter'), self.default_dt.replace(month=1, day=1, hour=0, minute=0, second=0, microsecond=0)) self.assertEqual(truncate(self.default_dt.replace(month=6), 'quarter'), self.default_dt.replace(month=4, day=1, hour=0, minute=0, second=0, microsecond=0)) self.assertEqual(truncate(self.default_dt, 'quarter'), self.default_dt.replace(month=7, day=1, hour=0, minute=0, second=0, microsecond=0)) self.assertEqual( truncate(self.default_dt.replace(month=10), 'quarter'), self.default_dt.replace(month=10, day=1, hour=0, minute=0, second=0, microsecond=0)) self.assertEqual(truncate_quarter(self.default_dt.replace(month=2)), self.default_dt.replace(month=1, day=1, hour=0, minute=0, second=0, microsecond=0)) self.assertEqual(truncate_quarter(self.default_dt.replace(month=6)), self.default_dt.replace(month=4, day=1, hour=0, minute=0, second=0, microsecond=0)) self.assertEqual(truncate_quarter(self.default_dt), self.default_dt.replace(month=7, day=1, hour=0, minute=0, second=0, microsecond=0)) self.assertEqual( truncate_quarter(self.default_dt.replace(month=10)), self.default_dt.replace(month=10, day=1, hour=0, minute=0, second=0, microsecond=0))
def emit_normalized_file(relpath, year): cat_whitelist = ('LARCENY THEFT', 'GRAND THEFT AUTO', 'BURGLARY', 'ROBBERY',) for d in DictReader(open(relpath, 'r'), delimiter=','): cat = d['CATEGORY'] if cat not in cat_whitelist: continue day = truncate(parse_date(d['INCIDENT_DATE']), 'day') # allow for late reporting if day.year not in (year, year-1): continue if d['ZIP'] != '90041': continue if d['DELETED'] != 'N': continue emit_crime(day, cat)
def update_type1(coll, doc, bulkopt, request): ''' request.append( UpdateOne( {"tag_group_nm": doc["group_nm"], "date": doc["ts"].strftime("%Y-%m-%d %H")}, { "$min" : {"first": doc["ts"]}, "$max" : {"last": doc["ts"]}, "$inc" : {"nsamples" : 1}, "$addToSet": {"tag": {"ts":doc["ts"],"tag_nm":doc["tag_nm"],"val":doc["temp"]}} }, upsert=True ) ) if bulkopt % 32 is 0: coll.bulk_write(request) request=[] ''' coll.update_one( { "tag_group_nm": doc["group_nm"], "date": truncate(doc["ts"], '10_minute').strftime("%Y-%m-%d %H%M") }, { "$min": { "first": doc["ts"] }, "$max": { "last": doc["ts"] }, "$inc": { "nsamples": 1 }, "$addToSet": { "tag": { "ts": doc["ts"], "tag_nm": doc["tag_nm"], "val": doc["temp"] } } }, upsert=True)
async def create_meter(db: Session, metric: schemas.Metric, meter: schemas.MeterBase) -> models.Meter: if metric is None: raise Exception("Need a metric before meter") # TODO: nice it if metric.truncate_reading_to is not None: meter.reading_at = truncate(meter.reading_at, truncate_to=metric.truncate_reading_to) # FIXME: this can cause exception due to invalid truncate_reading_to values db_rec = (db.query( models.Meter).filter(models.Meter.metric_id == metric.id).filter( models.Meter.reading_at == meter.reading_at).first()) is_reading_changed = False if db_rec is not None: if db_rec.reading != meter.reading: db_rec.reading = meter.reading db_rec.recorded_at = datetime.datetime.now() is_reading_changed = True logger.info(f"{metric.code}@{metric.org} reading overwritten") else: db_rec = models.Meter( metric_id=metric.id, reading_at=meter.reading_at, reading=meter.reading, recorded_at=datetime.datetime.now(), ) db.add(db_rec) is_reading_changed = True # update last metric values too db_metric = await get_metric(db, org=metric.org, id=metric.id) if is_reading_changed: db_metric.last_reading = db_metric.current_reading db_metric.last_reading_at = db_metric.current_reading_at db_metric.current_reading = db_rec.reading db_metric.current_reading_at = db_rec.reading_at db.commit() db.refresh(db_rec) return db_rec
def test_truncate_to_nth_minute(self): self.assertEqual( truncate(self.default_dt, '5_minute'), self.default_dt.replace(minute=10, second=0, microsecond=0)) self.assertEqual( truncate(self.default_dt, '2_minute'), self.default_dt.replace(minute=14, second=0, microsecond=0)) self.assertEqual( truncate(self.default_dt.replace(minute=40), '13_minute'), self.default_dt.replace(minute=39, second=0, microsecond=0)) self.assertEqual( truncate(self.default_dt.replace(minute=20), '9_minute'), self.default_dt.replace(minute=18, second=0, microsecond=0)) with self.assertRaises(ValueError) as cm: truncate(self.default_dt, '60_minute') assert cm.exception.args[0] == ( '`nth_minute` must be >= 0 and < 60, was 60')
def period_beginning_day(datetime): ''' Sugar for :py:func:`datetime_truncate.truncate(datetime, 'day')` ''' return truncate(datetime, 'day')
def detail_aggregate(): raw_query_params = request.args.copy() agg, datatype, queries = parse_join_query(raw_query_params) if not agg: agg = 'day' # if no obs_date given, default to >= 90 days ago if not raw_query_params.get('obs_date__ge'): six_months_ago = datetime.now() - timedelta(days=90) raw_query_params['obs_date__ge'] = six_months_ago.strftime('%Y-%m-%d') if not raw_query_params.get('obs_date__le'): raw_query_params['obs_date__le'] = datetime.now().strftime('%Y-%m-%d') mt = MasterTable.__table__ valid_query, base_clauses, resp, status_code = make_query(mt, queries['base']) # check for valid output format if datatype not in VALID_DATA_TYPE: valid_query = False resp['meta']['status'] = 'error' resp['meta']['message'] = "'%s' is an invalid output format" % datatype resp = make_response(json.dumps(resp, default=dthandler), 400) resp.headers['Content-Type'] = 'application/json' # check for valid temporal aggregate if agg not in VALID_AGG: valid_query = False resp['meta']['status'] = 'error' resp['meta']['message'] = "'%s' is an invalid temporal aggregation" % agg resp = make_response(json.dumps(resp, default=dthandler), 400) resp.headers['Content-Type'] = 'application/json' if valid_query: time_agg = func.date_trunc(agg, mt.c['obs_date']) base_query = session.query(time_agg, func.count(mt.c.dataset_row_id)) dname = raw_query_params.get('dataset_name') try: dataset = Table('dat_%s' % dname, Base.metadata, autoload=True, autoload_with=engine, extend_existing=True) valid_query, detail_clauses, resp, status_code = make_query(dataset, queries['detail']) except: valid_query = False resp['meta']['status'] = 'error' if not dname: resp['meta']['message'] = "dataset_name' is required" else: resp['meta']['message'] = "unable to find dataset '%s'" % dname resp = make_response(json.dumps(resp, default=dthandler), 400) resp.headers['Content-Type'] = 'application/json' if valid_query: pk = [p.name for p in dataset.primary_key][0] base_query = base_query.join(dataset, mt.c.dataset_row_id == dataset.c[pk]) for clause in base_clauses: base_query = base_query.filter(clause) for clause in detail_clauses: base_query = base_query.filter(clause) values = [r for r in base_query.group_by(time_agg).order_by(time_agg).all()] # init from and to dates ad python datetimes from_date = truncate(parse(raw_query_params['obs_date__ge']), agg) if 'obs_date__le' in raw_query_params.keys(): to_date = parse(raw_query_params['obs_date__le']) else: to_date = datetime.now() items = [] dense_matrix = [] cursor = from_date v_index = 0 while cursor <= to_date: if v_index < len(values) and \ values[v_index][0].replace(tzinfo=None) == cursor: dense_matrix.append((cursor, values[v_index][1])) v_index += 1 else: dense_matrix.append((cursor, 0)) cursor = increment_datetime_aggregate(cursor, agg) dense_matrix = OrderedDict(dense_matrix) for k in dense_matrix: i = { 'datetime': k, 'count': dense_matrix[k], } items.append(i) if datatype == 'json': resp['objects'] = items resp['meta']['status'] = 'ok' resp['meta']['query'] = raw_query_params loc = resp['meta']['query'].get('location_geom__within') if loc: resp['meta']['query']['location_geom__within'] = json.loads(loc) resp['meta']['query']['agg'] = agg resp = make_response(json.dumps(resp, default=dthandler), status_code) resp.headers['Content-Type'] = 'application/json' elif datatype == 'csv': outp = StringIO() writer = csv.DictWriter(outp, fieldnames=items[0].keys()) writer.writeheader() writer.writerows(items) resp = make_response(outp.getvalue(), status_code) resp.headers['Content-Type'] = 'text/csv' filedate = datetime.now().strftime('%Y-%m-%d') resp.headers['Content-Disposition'] = 'attachment; filename=%s.csv' % (filedate) return resp
def dataset(): raw_query_params = request.args.copy() # set default value for temporal aggregation agg = raw_query_params.get('agg') if not agg: agg = 'day' else: del raw_query_params['agg'] # if no obs_date given, default to >= 90 days ago if not raw_query_params.get('obs_date__ge'): six_months_ago = datetime.now() - timedelta(days=90) raw_query_params['obs_date__ge'] = six_months_ago.strftime('%Y-%m-%d') if not raw_query_params.get('obs_date__le'): raw_query_params['obs_date__le'] = datetime.now().strftime('%Y-%m-%d') # set datatype datatype = 'json' if raw_query_params.get('data_type'): datatype = raw_query_params['data_type'] del raw_query_params['data_type'] mt = MasterTable.__table__ valid_query, query_clauses, resp, status_code = make_query(mt,raw_query_params) # check for valid output format if datatype not in VALID_DATA_TYPE: valid_query = False resp['meta']['status'] = 'error' resp['meta']['message'] = "'%s' is an invalid output format" % datatype resp = make_response(json.dumps(resp, default=dthandler), 400) resp.headers['Content-Type'] = 'application/json' # check for valid temporal aggregate if agg not in VALID_AGG: valid_query = False resp['meta']['status'] = 'error' resp['meta']['message'] = "'%s' is an invalid temporal aggregation" % agg resp = make_response(json.dumps(resp, default=dthandler), 400) resp.headers['Content-Type'] = 'application/json' if valid_query: time_agg = func.date_trunc(agg, mt.c['obs_date']) base_query = session.query(time_agg, func.count(mt.c['obs_date']), mt.c['dataset_name']) base_query = base_query.filter(mt.c['current_flag'] == True) for clause in query_clauses: base_query = base_query.filter(clause) base_query = base_query.group_by(mt.c['dataset_name'])\ .group_by(time_agg)\ .order_by(time_agg) values = [o for o in base_query.all()] # init from and to dates ad python datetimes from_date = truncate(parse(raw_query_params['obs_date__ge']), agg) if 'obs_date__le' in raw_query_params.keys(): to_date = parse(raw_query_params['obs_date__le']) else: to_date = datetime.now() # build the response results = sorted(values, key=itemgetter(2)) for k,g in groupby(results, key=itemgetter(2)): d = {'dataset_name': k} items = [] dense_matrix = [] cursor = from_date v_index = 0 dataset_values = list(g) while cursor <= to_date: if v_index < len(dataset_values) and \ dataset_values[v_index][0].replace(tzinfo=None) == cursor: dense_matrix.append((cursor, dataset_values[v_index][1])) v_index += 1 else: dense_matrix.append((cursor, 0)) cursor = increment_datetime_aggregate(cursor, agg) dense_matrix = OrderedDict(dense_matrix) for k in dense_matrix: i = { 'datetime': k, 'count': dense_matrix[k], } items.append(i) d['items'] = items resp['objects'].append(d) resp['meta']['query'] = raw_query_params loc = resp['meta']['query'].get('location_geom__within') if loc: resp['meta']['query']['location_geom__within'] = json.loads(loc) resp['meta']['query']['agg'] = agg resp['meta']['status'] = 'ok' if datatype == 'json': resp = make_response(json.dumps(resp, default=dthandler), status_code) resp.headers['Content-Type'] = 'application/json' elif datatype == 'csv': # response format # temporal_group,dataset_name_1,dataset_name_2 # 2014-02-24 00:00:00,235,653 # 2014-03-03 00:00:00,156,624 fields = ['temporal_group'] for o in resp['objects']: fields.append(o['dataset_name']) csv_resp = [] i = 0 for k,g in groupby(resp['objects'], key=itemgetter('dataset_name')): l_g = list(g)[0] j = 0 for row in l_g['items']: # first iteration, populate the first column with temporal_groups if i == 0: csv_resp.append([row['datetime']]) csv_resp[j].append(row['count']) j += 1 i += 1 csv_resp.insert(0, fields) csv_resp = make_csv(csv_resp) resp = make_response(csv_resp, 200) resp.headers['Content-Type'] = 'text/csv' filedate = datetime.now().strftime('%Y-%m-%d') resp.headers['Content-Disposition'] = 'attachment; filename=%s.csv' % (filedate) return resp
def test_truncate_to_hour(self): self.assertEqual( truncate(self.default_dt, 'hour'), self.default_dt.replace(minute=0, second=0, microsecond=0))
def test_truncate_to_hour(self): self.assertEqual(truncate(self.default_dt, 'hour'), self.default_dt.replace(minute=0, second=0, microsecond=0))
def dataset(): raw_query_params = request.args.copy() # set default value for temporal aggregation agg = raw_query_params.get('agg') if not agg: agg = 'day' else: del raw_query_params['agg'] # if no obs_date given, default to >= 90 days ago if not raw_query_params.get('obs_date__ge'): six_months_ago = datetime.now() - timedelta(days=90) raw_query_params['obs_date__ge'] = six_months_ago.strftime('%Y-%m-%d') if not raw_query_params.get('obs_date__le'): raw_query_params['obs_date__le'] = datetime.now().strftime('%Y-%m-%d') # set datatype datatype = 'json' if raw_query_params.get('data_type'): datatype = raw_query_params['data_type'] del raw_query_params['data_type'] q = ''' SELECT m.dataset_name FROM meta_master AS m LEFT JOIN celery_taskmeta AS c ON c.id = ( SELECT id FROM celery_taskmeta WHERE task_id = ANY(m.result_ids) ORDER BY date_done DESC LIMIT 1 ) WHERE m.approved_status = 'true' AND c.status = 'SUCCESS' ''' with engine.begin() as c: dataset_names = [d[0] for d in c.execute(q)] raw_query_params['dataset_name__in'] = ','.join(dataset_names) mt = MasterTable.__table__ valid_query, query_clauses, resp, status_code = make_query( mt, raw_query_params) # check for valid output format if datatype not in VALID_DATA_TYPE: valid_query = False resp['meta']['status'] = 'error' resp['meta']['message'] = "'%s' is an invalid output format" % datatype resp = make_response(json.dumps(resp, default=dthandler), 400) resp.headers['Content-Type'] = 'application/json' # check for valid temporal aggregate if agg not in VALID_AGG: valid_query = False resp['meta']['status'] = 'error' resp['meta'][ 'message'] = "'%s' is an invalid temporal aggregation" % agg resp = make_response(json.dumps(resp, default=dthandler), 400) resp.headers['Content-Type'] = 'application/json' if valid_query: time_agg = func.date_trunc(agg, mt.c['obs_date']) base_query = session.query(time_agg, func.count(mt.c['obs_date']), mt.c['dataset_name']) base_query = base_query.filter(mt.c['current_flag'] == True) for clause in query_clauses: base_query = base_query.filter(clause) base_query = base_query.group_by(mt.c['dataset_name'])\ .group_by(time_agg)\ .order_by(time_agg) values = [o for o in base_query.all()] # init from and to dates ad python datetimes from_date = truncate(parse(raw_query_params['obs_date__ge']), agg) if 'obs_date__le' in raw_query_params.keys(): to_date = parse(raw_query_params['obs_date__le']) else: to_date = datetime.now() # build the response results = sorted(values, key=itemgetter(2)) for k, g in groupby(results, key=itemgetter(2)): d = {'dataset_name': k} items = [] dense_matrix = [] cursor = from_date v_index = 0 dataset_values = list(g) while cursor <= to_date: if v_index < len(dataset_values) and \ dataset_values[v_index][0].replace(tzinfo=None) == cursor: dense_matrix.append((cursor, dataset_values[v_index][1])) v_index += 1 else: dense_matrix.append((cursor, 0)) cursor = increment_datetime_aggregate(cursor, agg) dense_matrix = OrderedDict(dense_matrix) for k in dense_matrix: i = { 'datetime': k, 'count': dense_matrix[k], } items.append(i) d['items'] = items resp['objects'].append(d) resp['meta']['query'] = raw_query_params loc = resp['meta']['query'].get('location_geom__within') if loc: resp['meta']['query']['location_geom__within'] = json.loads(loc) resp['meta']['query']['agg'] = agg resp['meta']['status'] = 'ok' if datatype == 'json': resp = make_response(json.dumps(resp, default=dthandler), status_code) resp.headers['Content-Type'] = 'application/json' elif datatype == 'csv': # response format # temporal_group,dataset_name_1,dataset_name_2 # 2014-02-24 00:00:00,235,653 # 2014-03-03 00:00:00,156,624 fields = ['temporal_group'] for o in resp['objects']: fields.append(o['dataset_name']) csv_resp = [] i = 0 for k, g in groupby(resp['objects'], key=itemgetter('dataset_name')): l_g = list(g)[0] j = 0 for row in l_g['items']: # first iteration, populate the first column with temporal_groups if i == 0: csv_resp.append([row['datetime']]) csv_resp[j].append(row['count']) j += 1 i += 1 csv_resp.insert(0, fields) csv_resp = make_csv(csv_resp) resp = make_response(csv_resp, 200) resp.headers['Content-Type'] = 'text/csv' filedate = datetime.now().strftime('%Y-%m-%d') resp.headers[ 'Content-Disposition'] = 'attachment; filename=%s.csv' % ( filedate) return resp
def period_beginning_year(datetime): ''' Sugar for :py:func:`datetime_truncate.truncate(datetime, 'year')` ''' return truncate(datetime, 'year')
def get_report_summary(objectives: Iterator[Objective], unit: str, start: datetime, end: datetime, current_span: opentracing.Span) -> List[dict]: summary = [] start = truncate(start) for objective in objectives: days = collections.defaultdict(dict) if not len(objective.targets): current_span.log_kv({ 'objective_skipped': True, 'objective': objective.id }) continue current_span.log_kv({ 'objective_target_count': len(objective.targets), 'objective_id': objective.id }) # Instrument objective summary! objective_summary_span = opentracing.tracer.start_span( operation_name='report_objective_summary', child_of=current_span) objective_summary_span.set_tag('objective_id', objective.id) with objective_summary_span: for target in objective.targets: objective_summary_span.log_kv({ 'target_id': target.id, 'indicator_id': target.indicator_id }) ivs = (IndicatorValue.query.filter( IndicatorValue.indicator_id == target.indicator_id, IndicatorValue.timestamp >= start, IndicatorValue.timestamp < end).order_by( IndicatorValue.timestamp)) target_values_truncated = truncate_values( ivs, parent_span=objective_summary_span) for truncated_date, target_values in target_values_truncated.items( ): target_form = target.target_from or float('-inf') target_to = target.target_to or float('inf') target_count = len(target_values) target_sum = sum(target_values) breaches = target_count - len([ v for v in target_values if v >= target_form and v <= target_to ]) days[truncated_date.isoformat()][target.indicator.name] = { 'aggregation': target.indicator.aggregation, 'avg': target_sum / target_count, 'breaches': breaches, 'count': target_count, 'max': max(target_values), 'min': min(target_values), 'sum': target_sum, } summary.append({ 'title': objective.title, 'description': objective.description, 'id': objective.id, 'targets': [{ 'from': t.target_from, 'to': t.target_to, 'sli_name': t.indicator.name, 'unit': t.indicator.unit, 'aggregation': t.indicator.aggregation } for t in objective.targets], 'days': days }) return summary
def test_truncate_to_day(self): self.assertEqual(truncate(self.default_dt, 'day'), self.default_dt.replace(hour=0, minute=0, second=0, microsecond=0))
def test_truncate_to_second(self): self.assertEqual(truncate(self.default_dt, 'second'), self.default_dt.replace(microsecond=0))
def test_truncate_to_month(self): self.assertEqual(truncate(self.default_dt, 'month'), self.default_dt.replace(day=1, hour=0, minute=0, second=0, microsecond=0))
# Create the final output file output_df = pd.merge( looker_combined_df, chartable_total_df, on=[ 'Show Name', 'Host Name', 'Network', 'Chartable Tracking Y/N', 'Genre', 'Age Demo', 'Ad Type', 'Content Type', 'Test/Core', 'Placement', 'Format', 'Personally Endorsed', 'Downloads', 'Client Rate', 'Broadcast Week', 'Actual Drop Day', 'Percent Male', 'Percent Female', 'next_drop_date' ], how='inner') ### VIEWS: Monthly Calendar View ### df_budget['budget_spend_month'] = df_budget['Actual Drop Day'].apply( lambda x: truncate(x, 'month')) df_budget_grouped = df_budget[ df_budget['Broadcast Week'] <= cutoff_date].groupby( ['Show Name', 'budget_spend_month', 'UTM']).sum()[['Client Rate']].reset_index() # Process leads dataframe df_leads['created_week'] = df_leads['Lead Created Date'].apply( lambda x: truncate(x, 'week')) df_leads['created_month'] = df_leads['Lead Created Date'].apply( lambda x: truncate(x, 'month')) df_leads.rename({'Lead Created Date': 'event_date'}, axis=1, inplace=True) # Process purchases dataframe
def test_truncate_to_day(self): self.assertEqual( truncate(self.default_dt, 'day'), self.default_dt.replace(hour=0, minute=0, second=0, microsecond=0))
def period_beginning_week(datetime): ''' Sugar for :py:func:`datetime_truncate.truncate(datetime, 'week')` ''' return truncate(datetime, 'week')
def period_beginning_month(datetime): ''' Sugar for :py:func:`datetime_truncate.truncate(datetime, 'month')` ''' return truncate(datetime, 'month')
def process_file(self, file): file.instant.datetime = truncate(file.instant.datetime, self.truncate_to) return file
def period_beginning_second(datetime): ''' Sugar for :py:func:`datetime_truncate.truncate(datetime, 'second')` ''' return truncate(datetime, 'second')
def test_truncate_to_quarter(self): self.assertEqual( truncate(self.default_dt.replace(month=2), 'quarter'), self.default_dt.replace(month=1, day=1, hour=0, minute=0, second=0, microsecond=0)) self.assertEqual( truncate(self.default_dt.replace(month=6), 'quarter'), self.default_dt.replace(month=4, day=1, hour=0, minute=0, second=0, microsecond=0)) self.assertEqual( truncate(self.default_dt, 'quarter'), self.default_dt.replace(month=7, day=1, hour=0, minute=0, second=0, microsecond=0)) self.assertEqual( truncate(self.default_dt.replace(month=10), 'quarter'), self.default_dt.replace(month=10, day=1, hour=0, minute=0, second=0, microsecond=0)) self.assertEqual( truncate_quarter(self.default_dt.replace(month=2)), self.default_dt.replace(month=1, day=1, hour=0, minute=0, second=0, microsecond=0)) self.assertEqual( truncate_quarter(self.default_dt.replace(month=6)), self.default_dt.replace(month=4, day=1, hour=0, minute=0, second=0, microsecond=0)) self.assertEqual( truncate_quarter(self.default_dt), self.default_dt.replace(month=7, day=1, hour=0, minute=0, second=0, microsecond=0)) self.assertEqual( truncate_quarter(self.default_dt.replace(month=10)), self.default_dt.replace(month=10, day=1, hour=0, minute=0, second=0, microsecond=0))
def detail_aggregate(): raw_query_params = request.args.copy() agg, datatype, queries = parse_join_query(raw_query_params) if not agg: agg = 'day' # if no obs_date given, default to >= 90 days ago if not raw_query_params.get('obs_date__ge'): six_months_ago = datetime.now() - timedelta(days=90) raw_query_params['obs_date__ge'] = six_months_ago.strftime('%Y-%m-%d') if not raw_query_params.get('obs_date__le'): raw_query_params['obs_date__le'] = datetime.now().strftime('%Y-%m-%d') mt = MasterTable.__table__ valid_query, base_clauses, resp, status_code = make_query( mt, queries['base']) # check for valid output format if datatype not in VALID_DATA_TYPE: valid_query = False resp['meta']['status'] = 'error' resp['meta']['message'] = "'%s' is an invalid output format" % datatype resp = make_response(json.dumps(resp, default=dthandler), 400) resp.headers['Content-Type'] = 'application/json' # check for valid temporal aggregate if agg not in VALID_AGG: valid_query = False resp['meta']['status'] = 'error' resp['meta'][ 'message'] = "'%s' is an invalid temporal aggregation" % agg resp = make_response(json.dumps(resp, default=dthandler), 400) resp.headers['Content-Type'] = 'application/json' if valid_query: time_agg = func.date_trunc(agg, mt.c['obs_date']) base_query = session.query(time_agg, func.count(mt.c.dataset_row_id)) dname = raw_query_params.get('dataset_name') try: dataset = Table('dat_%s' % dname, Base.metadata, autoload=True, autoload_with=engine, extend_existing=True) valid_query, detail_clauses, resp, status_code = make_query( dataset, queries['detail']) except: valid_query = False resp['meta']['status'] = 'error' if not dname: resp['meta']['message'] = "dataset_name' is required" else: resp['meta']['message'] = "unable to find dataset '%s'" % dname resp = make_response(json.dumps(resp, default=dthandler), 400) resp.headers['Content-Type'] = 'application/json' if valid_query: pk = [p.name for p in dataset.primary_key][0] base_query = base_query.join(dataset, mt.c.dataset_row_id == dataset.c[pk]) for clause in base_clauses: base_query = base_query.filter(clause) for clause in detail_clauses: base_query = base_query.filter(clause) values = [ r for r in base_query.group_by(time_agg).order_by( time_agg).all() ] # init from and to dates ad python datetimes from_date = truncate(parse(raw_query_params['obs_date__ge']), agg) if 'obs_date__le' in raw_query_params.keys(): to_date = parse(raw_query_params['obs_date__le']) else: to_date = datetime.now() items = [] dense_matrix = [] cursor = from_date v_index = 0 while cursor <= to_date: if v_index < len(values) and \ values[v_index][0].replace(tzinfo=None) == cursor: dense_matrix.append((cursor, values[v_index][1])) v_index += 1 else: dense_matrix.append((cursor, 0)) cursor = increment_datetime_aggregate(cursor, agg) dense_matrix = OrderedDict(dense_matrix) for k in dense_matrix: i = { 'datetime': k, 'count': dense_matrix[k], } items.append(i) if datatype == 'json': resp['objects'] = items resp['meta']['status'] = 'ok' resp['meta']['query'] = raw_query_params loc = resp['meta']['query'].get('location_geom__within') if loc: resp['meta']['query'][ 'location_geom__within'] = json.loads(loc) resp['meta']['query']['agg'] = agg resp = make_response(json.dumps(resp, default=dthandler), status_code) resp.headers['Content-Type'] = 'application/json' elif datatype == 'csv': outp = StringIO() writer = csv.DictWriter(outp, fieldnames=items[0].keys()) writer.writeheader() writer.writerows(items) resp = make_response(outp.getvalue(), status_code) resp.headers['Content-Type'] = 'text/csv' filedate = datetime.now().strftime('%Y-%m-%d') resp.headers[ 'Content-Disposition'] = 'attachment; filename=%s.csv' % ( filedate) return resp
def query(self, plenario_session_state): dataset = plenario_session_state.get_dataset() if dataset != "dat_master": raise Exception("Querying currently only supported on dat_master!") ngon = plenario_session_state.get_ngon() if ngon is None: raise Exception("You must have an N-gon selected!") if len(ngon) != 5: raise Exception("Querying currently only supported for a 5-gon!") start_date = plenario_session_state.get_start_date() end_date = plenario_session_state.get_end_date() date_aggr = plenario_session_state.get_date_aggr() p0 = ngon[0] p1 = ngon[1] p2 = ngon[2] p3 = ngon[3] p4 = ngon[4] p0 = (41.88, -87.64) p1 = (41.89, -87.64) p2 = (41.89, -87.63 ) p3 = (41.88, -87.63) p4 = (41.88, -87.635) min_gh = geohash.encode(-89.9,-179.9) max_gh = geohash.encode(89.9,179.9) cells = [] for cell in self._conn.batch_scan("dat_master", numthreads=10, scanranges=[Range(srow=min_gh, erow=max_gh)]): cells.append(cell) # Grouping key-value pairs that belong to same entry rows = {} for cell in cells: if cell.cq not in rows: rows[cell.cq] = {} rows[cell.cq]['ghash'] = cell.row rows[cell.cq][cell.cf] = cell.val rows[cell.cq][cell.cf] = cell.val # Filter out those that are not in the temporal range start_date = datetime.strptime(start_date, "%m/%d/%Y") end_date = datetime.strptime(end_date, "%m/%d/%Y") rows_filtered = {} for key, val in rows.iteritems(): obs_date = datetime.strptime(val['obs_date'], "%Y-%m-%d %H:%M:%S") if start_date <= obs_date and obs_date <= end_date: rows_filtered[key] = val # Filter out those that are not in the spatial range, i.e. within the polygon rows = rows_filtered rows_filtered = {} #poly = Polygon([p0,p1,p2,p3,p4]) poly = Polygon([(-90,-180),(90,-180),(90,180),(-90,180)]) for key, val in rows.iteritems(): ghash = val['ghash'] pt = Point(geohash.decode(ghash)) if poly.contains(pt): rows_filtered[key] = val # Truncate date as specified by date_aggr and count group sizes rows = rows_filtered rows_filtered = {} for key, val in rows.iteritems(): date = truncate(datetime.strptime(val['obs_date'], "%Y-%m-%d %H:%M:%S"),date_aggr) tup = (val['dataset_name'],date.isoformat()) if tup not in rows_filtered: rows_filtered[tup] = 1 else: rows_filtered[tup] = rows_filtered[tup]+1 return rows_filtered
from datetime import datetime, timedelta from datetime_truncate import truncate from collections import defaultdict from operator import itemgetter from numpy import array, zeros dates = defaultdict(int) years = set() for fn in os.listdir('data'): for d in DictReader(open('data/'+fn, 'r'), delimiter=','): if d['CATEGORY'] != 'LARCENY THEFT': continue d = truncate(parse_date(d['INCIDENT_DATE']), 'day') if d.year < 2009 or d.year > 2013: continue dates[d] += 1 years.add(d.year) print '\t'.join(['date'] + map(str, list(sorted(years)))) d = datetime(2013, 1, 1) while d <= datetime(2014, 1, 1): week_begin = d stats = zeros(len(years)) for _ in range(7): stats += array([
def rebin(input_generator, start_time = str(datetime.datetime(1970,1,1)), stop_time = str(datetime.datetime(2020,1,1)), binning_unit = 'hours', n_binning_unit = 1, **kwargs ): """ This function must be passed the following positional argument: input_generator Optional keyword arguments are: binning_unit n_binning_unit stop_time start_time The 'input_generator' object must yield tuples like: [interval start time], [interval duration in sec], [interval count] The function return a list of tuples like: [new interval start time], [new interval duration in sec], [new interval count] """ logger = logging.getLogger("rebin") start_time = dt_parser(start_time) stop_time = dt_parser(stop_time) # these are just for keeping track of what range of date/times we observe in the data max_stop_time = datetime.datetime(1970,1,1) min_start_time = datetime.datetime(2020,1,1) input_data = [] # put the data into a list of (TimeBucket, count) tuples for line in input_generator: try: this_start_time = dt_parser(line[0]) except ValueError: continue dt = datetime.timedelta(seconds=int(float(line[1]))) this_stop_time = this_start_time + dt if this_stop_time > stop_time: continue if this_start_time < start_time: continue time_bucket = TimeBucket(this_start_time, this_stop_time) count = line[2] input_data.append((time_bucket, count)) if this_stop_time > max_stop_time: max_stop_time = this_stop_time if this_start_time < min_start_time: min_start_time = this_start_time input_data_sorted = sorted(input_data) # make a grid with appropriate bin size grid_start_time = datetime_truncate.truncate(min_start_time,binning_unit.rstrip('s')) grid_stop_time = datetime_truncate.truncate(max_stop_time,binning_unit.rstrip('s')) grid_dt = datetime.timedelta(**{binning_unit:int(n_binning_unit)}) tb_stop_time = grid_start_time + grid_dt tb = TimeBucket(grid_start_time,tb_stop_time) # make list of TimeBuckets for bins grid = [] while tb.stop_time <= grid_stop_time: #logger.debug("{}".format(tb)) grid.append(tb) tb_start_time = tb.stop_time tb_stop_time = tb_start_time + grid_dt tb = TimeBucket(tb_start_time,tb_stop_time) grid.append(tb) # add data to a dictionary with keys mapped to the grid indicies output_data = collections.defaultdict(float) for input_tb,input_count in input_data_sorted: logger.debug("input. TB: {}, count: {}".format(input_tb,input_count)) for grid_tb in grid: if input_tb in grid_tb: idx = grid.index(grid_tb) output_data[idx] += float(input_count) break elif input_tb.intersects(grid_tb): # assign partial count of input_tb to grid_tb idx_lower = grid.index(grid_tb) frac_lower = input_tb.get_fraction_overlapped_by(grid_tb) output_data[idx_lower] += (float(input_count) * frac_lower) try: idx = idx_lower + 1 frac = input_tb.get_fraction_overlapped_by(grid[idx]) while frac > 0: output_data[idx] += (frac * float(input_count)) idx += 1 frac = input_tb.get_fraction_overlapped_by(grid[idx]) except IndexError: pass break else: pass # put data back into a sorted list of tuples sorted_output_data = [] # use these to strip off leading and trailing zero-count entries prev_count = 0 last_non_zero_ct_idx = -1 # the grid is already time ordered, and the output_data are indexed for idx,dt in enumerate(grid): if idx in output_data: count = output_data[idx] last_non_zero_ct_idx = idx else: count = 0 if count != 0 or prev_count != 0: if count > 0: trimmed_count = int(count) #trimmed_count = round(count, -int(floor(log10(count)))+1) else: trimmed_count = 0 sorted_output_data.append((str(dt.start_time),dt.size().total_seconds(),trimmed_count)) prev_count = count sorted_output_data = sorted_output_data[:last_non_zero_ct_idx+1] # return the data structure return sorted_output_data