def get(self, request, pk, format=None): ts = Timeseries(id=int(pk)) self.check_object_permissions(request, ts) ts.read_from_db(connection) result = StringIO() ts.write(result) return HttpResponse(result.getvalue(), content_type="text/plain")
def handle(self, *args, **options): try: username = args[0] except IndexError: print "I need a username!" return -1 try: if username: user = User.objects.get(username=username) out = [] print "output for {x}".format(x=username) household = Household.objects.get(user=user) timeseries = household \ .timeseries.get(time_step__id=TSTEP_FIFTEEN_MINUTES, variable__id=VAR_PERIOD) series = TSeries(id=timeseries.id) series.read_from_db(db.connection) timestamps = sorted(series.keys()) values = np.array([]) for ts in timestamps: val = series[ts] if isnan(val) or val == 0: continue values = np.append(values, val) perc = np.percentile(values, 90) out.append([ts, val, perc]) _outfile = "timeseries_%s.csv" % username _path = "data/" with open(path.join(_path, _outfile), 'w') as of: a = csv.writer(of, delimiter=',', quotechar='"', quoting=csv.QUOTE_ALL) a.writerows(out) except Exception as e: print "failed with %s" % repr(e)
def update_ts_temp_file(cache_dir, connection, id): full_rewrite = False afilename = os.path.join(cache_dir, '%d.hts'%(id,)) if os.path.exists(afilename): if os.path.getsize(afilename)<3: full_rewrite = True #Update the file in the case of logged data, if this is possible if os.path.exists(afilename) and not full_rewrite: with open(afilename, 'r') as fileobject: xr = xreverse(fileobject, 2048) line = xr.next() lastdate = datetime_from_iso(line.split(',')[0]) ts = Timeseries(id) ts.read_from_db(connection, bottom_only=True) if len(ts)>0: db_start, db_end = ts.bounding_dates() if db_start>lastdate: full_rewrite = True elif db_end>lastdate: lastindex = ts.index(lastdate) with open(afilename, 'a') as fileobject: ts.write(fileobject, start=ts.keys()[lastindex+1]) #Check for tmmp file or else create it if not os.path.exists(afilename) or full_rewrite: ts = Timeseries(id) ts.read_from_db(connection) if not os.path.exists(cache_dir): os.mkdir(cache_dir) tempfile_handle, tempfile_name = tempfile.mkstemp(dir=cache_dir) with os.fdopen(tempfile_handle, 'w') as afile: ts.write(afile) shutil.move(tempfile_name, afilename)
def update_ts_temp_file(cache_dir, connection, id): full_rewrite = False afilename = os.path.join(cache_dir, '%d.hts' % (id, )) if os.path.exists(afilename): if os.path.getsize(afilename) < 3: full_rewrite = True #Update the file in the case of logged data, if this is possible if os.path.exists(afilename) and not full_rewrite: with ropen(afilename) as fileobject: line = fileobject.readline() lastdate = datetime_from_iso(line.split(',')[0]) ts = Timeseries(id) ts.read_from_db(connection, bottom_only=True) if len(ts) > 0: db_start, db_end = ts.bounding_dates() if db_start > lastdate: full_rewrite = True elif db_end > lastdate: lastindex = ts.index(lastdate) with open(afilename, 'a') as fileobject: ts.write(fileobject, start=ts.keys()[lastindex + 1]) #Check for tmmp file or else create it if not os.path.exists(afilename) or full_rewrite: ts = Timeseries(id) ts.read_from_db(connection) if not os.path.exists(cache_dir): os.mkdir(cache_dir) tempfile_handle, tempfile_name = tempfile.mkstemp(dir=cache_dir) with os.fdopen(tempfile_handle, 'w') as afile: ts.write(afile) shutil.move(tempfile_name, afilename)
def process_dma(dma, bounds): """Process DMA timeseries by aggregating all the contained households in the DMA""" print "Process DMA %s" % (dma,) for dma_series in dma.timeseries.all(): print "Process series %s" % (dma_series,) per_capita = dma_series.name.find('capita') > -1 variable = dma_series.variable.id if dma_series.time_step.id == TSTEP_FIFTEEN_MINUTES: start = bounds[variable]['fifteen_start'] end = bounds[variable]['fifteen_end'] # Fifteen minutes process is DEACTIVATED! # We don't process fifteen minutes, it takes too long, # maybe we reactivate later after we optimize the # algorithm to process only new records continue elif dma_series.time_step.id == TSTEP_HOURLY: start = bounds[variable]['hourly_start'] end = bounds[variable]['hourly_end'] elif dma_series.time_step.id == TSTEP_DAILY: start = bounds[variable]['daily_start'] end = bounds[variable]['daily_end'] elif dma_series.time_step.id == TSTEP_MONTHLY: start = bounds[variable]['monthly_start'] end = bounds[variable]['monthly_end'] time_step = ReadTimeStep(dma_series.id, dma_series) tseries = TSeries(time_step = time_step, id=dma_series.id) nhseries = TSeries(time_step = time_step) pointer = start while pointer<=end: tseries[pointer] = 0 nhseries[pointer] = 0 pointer = tseries.time_step.next(pointer) for household in dma.households.all(): for h_series_db in household.timeseries.filter( time_step__id=dma_series.time_step.id, variable__id=variable): hseries = TSeries(id=h_series_db.id) hseries.read_from_db(db.connection) pointer = start while pointer<=end: try: v = hseries[pointer] if math.isnan(v): pointer = tseries.time_step.next(pointer) continue if per_capita: v = v/float(household.num_of_occupants) tseries[pointer] += v nhseries[pointer] += 1 except KeyError: v = 0 pointer = tseries.time_step.next(pointer) pointer = start while pointer<=end: if per_capita and nhseries[pointer]>0: tseries[pointer] = tseries[pointer] / nhseries[pointer] pointer = tseries.time_step.next(pointer) tseries.write_to_db(db.connection, commit=True)#False)
def process_dma(dma, bounds): """Process DMA timeseries by aggregating all the contained households in the DMA""" print "Process DMA %s" % (dma, ) for dma_series in dma.timeseries.all(): print "Process series %s" % (dma_series, ) per_capita = dma_series.name.find('capita') > -1 variable = dma_series.variable.id if dma_series.time_step.id == TSTEP_FIFTEEN_MINUTES: start = bounds[variable]['fifteen_start'] end = bounds[variable]['fifteen_end'] # Fifteen minutes process is DEACTIVATED! # We don't process fifteen minutes, it takes too long, # maybe we reactivate later after we optimize the # algorithm to process only new records continue elif dma_series.time_step.id == TSTEP_HOURLY: start = bounds[variable]['hourly_start'] end = bounds[variable]['hourly_end'] elif dma_series.time_step.id == TSTEP_DAILY: start = bounds[variable]['daily_start'] end = bounds[variable]['daily_end'] elif dma_series.time_step.id == TSTEP_MONTHLY: start = bounds[variable]['monthly_start'] end = bounds[variable]['monthly_end'] time_step = ReadTimeStep(dma_series.id, dma_series) tseries = TSeries(time_step=time_step, id=dma_series.id) nhseries = TSeries(time_step=time_step) pointer = start while pointer <= end: tseries[pointer] = 0 nhseries[pointer] = 0 pointer = tseries.time_step.next(pointer) for household in dma.households.all(): for h_series_db in household.timeseries.filter( time_step__id=dma_series.time_step.id, variable__id=variable): hseries = TSeries(id=h_series_db.id) hseries.read_from_db(db.connection) pointer = start while pointer <= end: try: v = hseries[pointer] if math.isnan(v): pointer = tseries.time_step.next(pointer) continue if per_capita: v = v / float(household.num_of_occupants) tseries[pointer] += v nhseries[pointer] += 1 except KeyError: v = 0 pointer = tseries.time_step.next(pointer) pointer = start while pointer <= end: if per_capita and nhseries[pointer] > 0: tseries[pointer] = tseries[pointer] / nhseries[pointer] pointer = tseries.time_step.next(pointer) tseries.write_to_db(db.connection, commit=True) #False)
def testUploadTsDataUnauthenticated(self): # Attempt to upload some timeseries data, unauthenticated response = self.client.put( "/api/tsdata/1/", encode_multipart(BOUNDARY, {'timeseries_records': '2012-11-06 18:17,20,\n'}), content_type=MULTIPART_CONTENT) t = Timeseries(1) t.read_from_db(connection) self.assertEqual(response.status_code, 403) self.assertEqual(len(t), 0)
def testUploadTsDataUnauthenticated(self): # Attempt to upload some timeseries data, unauthenticated response = self.client.put( "/api/tsdata/1/", encode_multipart(BOUNDARY, {"timeseries_records": "2012-11-06 18:17,20,\n"}), content_type=MULTIPART_CONTENT, ) t = Timeseries(1) t.read_from_db(connection) self.assertEqual(response.status_code, 403) self.assertEqual(len(t), 0)
def testUploadTsDataGarbage(self): self.assert_(self.client.login(username='******', password='******')) response = self.client.put( "/api/tsdata/1/", encode_multipart(BOUNDARY, {'timeseries_records': '2012-aa-06 18:17,20,\n'}), content_type=MULTIPART_CONTENT) t = Timeseries(1) t.read_from_db(connection) self.assertEqual(response.status_code, 400) self.assertEqual(len(t), 0) self.client.logout()
def testUploadTsDataGarbage(self): self.assert_(self.client.login(username='******', password='******')) response = self.client.put( "/api/tsdata/{}/".format(self.timeseries1.id), encode_multipart(BOUNDARY, {'timeseries_records': '2012-aa-06 18:17,20,\n'}), content_type=MULTIPART_CONTENT) self.assertEqual(response.status_code, 400) t = Timeseries(self.timeseries1.id) t.read_from_db(connection) self.assertEqual(len(t), 0) self.client.logout()
def testUploadTsDataGarbage(self): self.assert_(self.client.login(username="******", password="******")) response = self.client.put( "/api/tsdata/1/", encode_multipart(BOUNDARY, {"timeseries_records": "2012-aa-06 18:17,20,\n"}), content_type=MULTIPART_CONTENT, ) t = Timeseries(1) t.read_from_db(connection) self.assertEqual(response.status_code, 400) self.assertEqual(len(t), 0) self.client.logout()
def testUploadTsDataAsWrongUser(self): # Attempt to upload some timeseries data as user 2; should deny self.assert_(self.client.login(username="******", password="******")) response = self.client.put( "/api/tsdata/1/", encode_multipart(BOUNDARY, {"timeseries_records": "2012-11-06 18:17,20,\n"}), content_type=MULTIPART_CONTENT, ) t = Timeseries(1) t.read_from_db(connection) self.assertEqual(response.status_code, 403) self.assertEqual(len(t), 0) self.client.logout()
def testUploadTsDataAsWrongUser(self): # Attempt to upload some timeseries data as user 2; should deny self.assert_(self.client.login(username='******', password='******')) response = self.client.put( "/api/tsdata/{}/".format(self.timeseries1.id), encode_multipart(BOUNDARY, {'timeseries_records': '2012-11-06 18:17,20,\n'}), content_type=MULTIPART_CONTENT) t = Timeseries(self.timeseries1.id) t.read_from_db(connection) self.assertEqual(response.status_code, 403) self.assertEqual(len(t), 0) self.client.logout()
def testUploadTsDataAsWrongUser(self): # Attempt to upload some timeseries data as user 2; should deny self.assert_(self.client.login(username='******', password='******')) response = self.client.put( "/api/tsdata/1/", encode_multipart(BOUNDARY, {'timeseries_records': '2012-11-06 18:17,20,\n'}), content_type=MULTIPART_CONTENT) t = Timeseries(1) t.read_from_db(connection) self.assertEqual(response.status_code, 403) self.assertEqual(len(t), 0) self.client.logout()
def process(): for household in Household.objects.all(): daily_series_db = household.timeseries.get(time_step__id=TSTEP_DAILY) series = TSeries(id=daily_series_db.id) series.read_from_db(db.connection) m = 1000.000 * series.average() if math.isnan(m): continue num_of_occupants = max(1, int(round(m / AVERAGE_UNIT_WATER_CONSUMPTION))) print 'Household with id=%s, average daily consumption %.1f, '\ 'number of occupants set to %s'%(household.id, m, num_of_occupants,) household.num_of_occupants = num_of_occupants household.save()
def process(): for household in Household.objects.all(): daily_series_db = household.timeseries.get( time_step__id=TSTEP_DAILY) series = TSeries(id=daily_series_db.id) series.read_from_db(db.connection) m = 1000.000*series.average() if math.isnan(m): continue num_of_occupants = max(1, int(round(m/AVERAGE_UNIT_WATER_CONSUMPTION))) print 'Household with id=%s, average daily consumption %.1f, '\ 'number of occupants set to %s'%(household.id, m, num_of_occupants,) household.num_of_occupants = num_of_occupants household.save()
def has_burst(household): """ We won't be using this algorithm any more :param household: :return: """ name = household.user.username if not name.startswith('GR'): return 0, 0 timeseries = household \ .timeseries.get(time_step__id=TSTEP_FIFTEEN_MINUTES, variable__id=VAR_PERIOD) series = TSeries(id=timeseries.id) series.read_from_db(db.connection) timestamps = sorted(series.keys()) today = [] # all today's values daily_maxes = {} for i in range(1, len(timestamps)): ts = timestamps[i] prev_ts = timestamps[i - 1] date = ts.date() # if previous value is NaN we don't take this value into consideration # Because it might have all consumption of all the previous NaN times val = series[ts] prev_val = series[prev_ts] if isnan(prev_val): continue if i < len(timestamps) - 100: if not isnan(val) and not val == 0: daily_max = daily_maxes.get(date, 0) if val > daily_max: daily_maxes[date] = val else: tm = "%s-%s-%s %s:%s" % (ts.year, ts.month, ts.day, ts.time().hour, ts.time().minute) if not isnan(val) and not val == 0: today.append((val, tm)) if daily_maxes and today: maxes = np.array(daily_maxes.values()) p = np.percentile(maxes, 90) for cons, tm in today: if cons > p: return cons, tm return 0, 0
def has_burst_old(household): """ We won't be using this algorithm any more :param household: :return: """ name = household.user.username if not name.startswith('GR'): return 0, 0 timeseries = household \ .timeseries.get(time_step__id=TSTEP_FIFTEEN_MINUTES, variable__id=VAR_PERIOD) series = TSeries(id=timeseries.id) series.read_from_db(db.connection) timestamps = sorted(series.keys()) today = [] # all today's values _all = [] for i in range(1, len(timestamps)): ts = timestamps[i] if household.user.username == "GR006047" \ and ts.year == 2015 and ts.month == 2 and ts.day == 9 \ and ts.hour == 17: pass prev_ts = timestamps[i - 1] # if previous value is NaN we don't take this value into consideration # Because it might have all consumption of all the previous NaN times val = series[ts] prev_val = series[prev_ts] if isnan(prev_val): continue if i < len(timestamps) - 100: if not isnan(val) and not val == 0: _all.append(series[ts]) else: tm = "%s:%s" % (ts.time().hour, ts.time().minute) if not isnan(val) and not val == 0: today.append((val, tm)) if _all and today: all1 = np.array(_all) p = np.percentile(all1, 95) for cons, tm in today: if cons > p: return cons, tm return 0, 0
def has_burst_old(household): """ We won't be using this algorithm any more :param household: :return: """ name = household.user.username if not name.startswith('GR'): return 0, 0 timeseries = household \ .timeseries.get(time_step__id=TSTEP_FIFTEEN_MINUTES, variable__id=VAR_PERIOD) series = TSeries(id=timeseries.id) series.read_from_db(db.connection) timestamps = sorted(series.keys()) today = [] # all today's values _all = [] for i in range(1, len(timestamps)): ts = timestamps[i] if household.user.username == "GR006047" \ and ts.year == 2015 and ts.month == 2 and ts.day == 9 \ and ts.hour == 17: pass prev_ts = timestamps[i-1] # if previous value is NaN we don't take this value into consideration # Because it might have all consumption of all the previous NaN times val = series[ts] prev_val = series[prev_ts] if isnan(prev_val): continue if i < len(timestamps) - 100: if not isnan(val) and not val == 0: _all.append(series[ts]) else: tm = "%s:%s" % (ts.time().hour, ts.time().minute) if not isnan(val) and not val == 0: today.append((val, tm)) if _all and today: all1 = np.array(_all) p = np.percentile(all1, 95) for cons, tm in today: if cons > p: return cons, tm return 0, 0
def has_burst(household): """ We won't be using this algorithm any more :param household: :return: """ name = household.user.username if not name.startswith('GR'): return 0, 0 timeseries = household \ .timeseries.get(time_step__id=TSTEP_FIFTEEN_MINUTES, variable__id=VAR_PERIOD) series = TSeries(id=timeseries.id) series.read_from_db(db.connection) timestamps = sorted(series.keys()) today = [] # all today's values daily_maxes = {} for i in range(1, len(timestamps)): ts = timestamps[i] prev_ts = timestamps[i-1] date = ts.date() # if previous value is NaN we don't take this value into consideration # Because it might have all consumption of all the previous NaN times val = series[ts] prev_val = series[prev_ts] if isnan(prev_val): continue if i < len(timestamps) - 100: if not isnan(val) and not val == 0: daily_max = daily_maxes.get(date, 0) if val > daily_max: daily_maxes[date] = val else: tm = "%s-%s-%s %s:%s" % (ts.year, ts.month, ts.day, ts.time().hour, ts.time().minute) if not isnan(val) and not val == 0: today.append((val, tm)) if daily_maxes and today: maxes = np.array(daily_maxes.values()) p = np.percentile(maxes, 90) for cons, tm in today: if cons > p: return cons, tm return 0, 0
def get_values_after(household, dt, variable): timeseries = None if variable == "WaterCold": timeseries = household \ .timeseries.get(time_step__id=TSTEP_FIFTEEN_MINUTES, variable__id=VAR_PERIOD) elif variable == "Electricity": timeseries = household \ .timeseries.get(time_step__id=TSTEP_FIFTEEN_MINUTES, variable__id=VAR_ENERGY_PERIOD) data = [] if timeseries: series = TSeries(id=timeseries.id) series.read_from_db(db.connection) timestamps = sorted(series.keys()) for ts in timestamps: val = series[ts] if ts <= dt: continue data.append((ts, val)) return data
def MultiTimeseriesProcessDb(method, timeseries_arg, out_timeseries_id, db, read_tstep_func, transaction=None, commit=True, options={}): out_timeseries = Timeseries(id = out_timeseries_id) opts = copy.deepcopy(options) if 'append_only' in opts and opts['append_only']: bounds = timeseries_bounding_dates_from_db(db, id = out_timeseries_id) opts['start_date'] = bounds[1] if bounds else None; opts['interval_exclusive'] = True tseries_arg={} for key in timeseries_arg: ts = Timeseries(id=timeseries_arg[key]) if ('append_only' in opts and opts['append_only']) \ and opts['start_date'] is not None: ts.read_from_db(db, bottom_only=True) if ts.bounding_dates()[0]>opts['start_date']: ts.read_from_db(db) else: ts.read_from_db(db) ts.time_step = read_tstep_func(ts.id) tseries_arg[key] = ts MultiTimeseriesProcess(method, tseries_arg, out_timeseries, opts) if 'append_only' in opts and opts['append_only']: out_timeseries.append_to_db(db=db, transaction=transaction, commit=commit) else: out_timeseries.write_to_db(db=db, transaction=transaction, commit=commit)
def handle(self, *args, **options): try: username = args[0] except IndexError: print "I need a username!" return -1 try: if username not in ["GR", "GB", "PT", "GBA"]: users = User.objects.filter(username=username) else: users = User.objects.filter(username__startswith=username) for user in users: out = [] print "output for {x}".format(x=username) household = Household.objects.get(user=user) # ts_raw = household.timeseries.filter(time_step__isnull=True, # variable__id=VAR_CUMULATIVE)[0] # series = TSeries(id=ts_raw.id) timeseries = household \ .timeseries.get(variable__id=VAR_CUMULATIVE) series = TSeries(id=timeseries.id) series.read_from_db(db.connection) timestamps = sorted(series.keys()) values = np.array([]) for ts in timestamps: val = series[ts] if isnan(val) or val == 0: continue values = np.append(values, val) #perc = np.percentile(values, 90) out.append([ts, val]) _outfile = "timeseries_cumulative_%s.csv" % user.username _path = "data/" with open(path.join(_path, _outfile), 'w') as of: a = csv.writer(of, delimiter=',', quotechar='"', quoting=csv.QUOTE_ALL) a.writerows(out) except Exception as e: print "failed with %s" % repr(e)
def AggregateDbTimeseries(source_id, dest_id, db, read_tstep_func, transaction=None, commit=True, missing_allowed=0.0, missing_flag='MISSING', append_only=False, last_incomplete=False, all_incomplete=False): source = Timeseries(id=source_id, time_step=read_tstep_func(source_id)) dest_step = read_tstep_func(dest_id) if append_only: bounds = timeseries_bounding_dates_from_db(db = db, id = dest_id) end_date = bounds[1] if bounds else None source.read_from_db(db) dest = source.aggregate(target_step=dest_step, missing_allowed=missing_allowed, missing_flag=missing_flag, last_incomplete=last_incomplete, all_incomplete=all_incomplete)[0] dest.id = dest_id if append_only: d=dest.bounding_dates() while (d is not None) and (end_date is not None) and d[0]<=end_date: del dest[d[0]] d=dest.bounding_dates() dest.append_to_db(db=db, transaction=transaction, commit=commit) else: dest.write_to_db(db=db, transaction=transaction, commit=commit)
def get_consumption_totals(household, dt, variable): """ Not needed. read_timeseries_tail_from_db does the same thing, faster. :param household: :return: """ if variable == "WaterCold": timeseries = household \ .timeseries.get(variable__id=VAR_CUMULATIVE) raw_series = TSeries(id=timeseries.id) raw_series.read_from_db(db.connection) timestamps = sorted(raw_series.keys()) total = 0 for ts in timestamps: val = raw_series[ts] if isnan(val): continue if ts > dt: break total = raw_series[ts] return total elif variable == "Electricity": timeseries = household \ .timeseries.get(variable__id=VAR_ENERGY_CUMULATIVE) raw_series = TSeries(id=timeseries.id) raw_series.read_from_db(db.connection) timestamps = sorted(raw_series.keys()) total = 0 for ts in timestamps: val = raw_series[ts] if isnan(val): continue if ts > dt: break total = raw_series[ts] return total
def testUploadTsData(self): self.assert_(self.client.login(username='******', password='******')) response = self.client.put( "/api/tsdata/1/", encode_multipart(BOUNDARY, {'timeseries_records': '2012-11-06 18:17,20,\n'}), content_type=MULTIPART_CONTENT) t = Timeseries(1) t.read_from_db(connection) self.assertEqual(response.status_code, 200) self.assertEqual(response.content, '1') self.assertEqual(len(t), 1) self.assertEqual(t.items(0)[0], datetime(2012, 11, 06, 18, 17, 0)) self.assertEqual(t.items(0)[1], 20) self.client.logout() # Append two more records self.assert_(self.client.login(username='******', password='******')) response = self.client.put( "/api/tsdata/1/", encode_multipart( BOUNDARY, { 'timeseries_records': '2012-11-06 18:18,21,\n2012-11-07 18:18,23,\n' }), content_type=MULTIPART_CONTENT) t = Timeseries(1) t.read_from_db(connection) self.assertEqual(response.status_code, 200) self.assertEqual(response.content, '2') self.assertEqual(len(t), 3) self.assertEqual(t.items(0)[0], datetime(2012, 11, 06, 18, 17, 0)) self.assertEqual(t.items(0)[1], 20) self.assertEqual(t.items(1)[0], datetime(2012, 11, 06, 18, 18, 0)) self.assertEqual(t.items(1)[1], 21) self.assertEqual(t.items(2)[0], datetime(2012, 11, 07, 18, 18, 0)) self.assertEqual(t.items(2)[1], 23) self.client.logout() # Try to append an earlier record; should fail self.assert_(self.client.login(username='******', password='******')) response = self.client.put( "/api/tsdata/1/", encode_multipart(BOUNDARY, {'timeseries_records': '2012-11-05 18:18,21,\n'}), content_type=MULTIPART_CONTENT) self.client.logout() t = Timeseries(1) t.read_from_db(connection) self.assertEqual(response.status_code, 409) self.assertEqual(len(t), 3) self.client.logout()
def testUploadTsData(self): self.assert_(self.client.login(username='******', password='******')) response = self.client.put( "/api/tsdata/1/", encode_multipart(BOUNDARY, {'timeseries_records': '2012-11-06 18:17,20,\n'}), content_type=MULTIPART_CONTENT) t = Timeseries(1) t.read_from_db(connection) self.assertEqual(response.status_code, 200) self.assertEqual(response.content, '1') self.assertEqual(len(t), 1) self.assertEqual(t.items(0)[0], datetime(2012, 11, 06, 18, 17, 0)) self.assertEqual(t.items(0)[1], 20) self.client.logout() # Append two more records self.assert_(self.client.login(username='******', password='******')) response = self.client.put( "/api/tsdata/1/", encode_multipart(BOUNDARY, {'timeseries_records': '2012-11-06 18:18,21,\n2012-11-07 18:18,23,\n'}), content_type=MULTIPART_CONTENT) t = Timeseries(1) t.read_from_db(connection) self.assertEqual(response.status_code, 200) self.assertEqual(response.content, '2') self.assertEqual(len(t), 3) self.assertEqual(t.items(0)[0], datetime(2012, 11, 06, 18, 17, 0)) self.assertEqual(t.items(0)[1], 20) self.assertEqual(t.items(1)[0], datetime(2012, 11, 06, 18, 18, 0)) self.assertEqual(t.items(1)[1], 21) self.assertEqual(t.items(2)[0], datetime(2012, 11, 07, 18, 18, 0)) self.assertEqual(t.items(2)[1], 23) self.client.logout() # Try to append an earlier record; should fail self.assert_(self.client.login(username='******', password='******')) response = self.client.put( "/api/tsdata/1/", encode_multipart(BOUNDARY, {'timeseries_records': '2012-11-05 18:18,21,\n'}), content_type=MULTIPART_CONTENT) self.client.logout() t = Timeseries(1) t.read_from_db(connection) self.assertEqual(response.status_code, 409) self.assertEqual(len(t), 3) self.client.logout()
def InterpolateDbTimeseries(source_id, dest_id, curve_type, curve_data, db, data_columns=(0,1), logarithmic=False, offset=0, append_only=False, transaction=None, commit=True): if append_only: bounds = timeseries_bounding_dates_from_db(db, id = dest_id) start_date = bounds[1] if bounds else None; ts = Timeseries(id=source_id) if append_only and start_date is not None: ts.read_from_db(db, bottom_only=True) if ts.bounding_dates()[0]>start_date: ts.read_from_db(db) while ts.bounding_dates()[0]<=start_date: del(ts[ts.bounding_dates()[0]]) if len(ts)==0: return else: ts.read_from_db(db) curve_list = TransientCurveList() if curve_type=='SingleCurve': curve_list.add(logarithmic=logarithmic, offset=CurvePoint(offset, 0)) super(TransientCurve, curve_list[0]).read_fp(StringIO(curve_data), data_columns) elif curve_type=='StageDischargeMulti': curve_list.read_fp(StringIO(curve_data)) else: assert(False) out_timeseries = curve_list.interpolate_ts(ts) out_timeseries.id = dest_id if append_only: out_timeseries.append_to_db(db=db, transaction=transaction, commit=commit) else: out_timeseries.write_to_db(db=db, transaction=transaction, commit=commit)
Assuming that "dir" is the openmeteo directory, run as follows: export PYTHONPATH=dir:dir/enhydris export DJANGO_SETTINGS=settings ./oldopenmeteo2enhydris.sql """ import sys from datetime import timedelta from django.db import connection, transaction from enhydris.hcore import models from pthelma.timeseries import Timeseries transaction.enter_transaction_management() tms = models.Timeseries.objects.filter(time_step__id__in=[4,5]) for tm in tms: sys.stderr.write("Doing timeseries %d..." % (tm.id,)) t = Timeseries(id=tm.id) nt = Timeseries(id=tm.id) t.read_from_db(connection) for (d, value) in t.items(): d += timedelta(hours=1) assert(not d.minute and not d.hour and not d.second and d.day==1, "Invalid date "+str(d)) nt[d] = value nt.write_to_db(connection, transaction=transaction, commit=False) sys.stderr.write(" Done\n") transaction.commit()
def readseries(self,timeseries): time_step = ReadTimeStep(timeseries.id, timeseries) timeseries = Timeseries(time_step=time_step,id=timeseries.id) timeseries.read_from_db(connection) return timeseries.items()
def _load_daily_series_data(ts_daily): time_step = ReadTimeStep(ts_daily.id, ts_daily) timeseries = Timeseries(time_step=time_step, id=ts_daily.id) timeseries.read_from_db(connection) return timeseries
def create_objects(data, usernames, force, z_names, z_dict): """ :param data: meter_id -> consumption_type -> [timestamp, volume] :param force: True to overwrite :return: True for success """ households = [] # Create user (household owner), household, database series placeholders hh_ids = sorted(data.keys()) found = False for hh_id in hh_ids: username = usernames[hh_id] if username == "PT94993": pass try: zone_name = z_dict[username] except KeyError: zone_name = z_names[0] zone = DMA.objects.get(name=zone_name) user, created = create_user(username, hh_id) household, found = create_household(hh_id, user, zone.id) households.append(household) db_series = create_raw_timeseries(household) create_processed_timeseries(household) timeseries_data = {} # Now we will create timeseries.Timeseries() and we will add # parsed values for variable in db_series: if variable not in ('WaterCold', 'Electricity'): continue exists = False s, e = timeseries_bounding_dates_from_db(db.connection, db_series[variable].id) latest_ts = e ts_id = db_series[variable].id # checking to see if timeseries records already exist in order # to append # d = read_timeseries_tail_from_db(db.connection, ts_id) total = 0.0 # if s or e: # exists = True # timeseries = TSeries(ts_id) # timeseries.read_from_db(db.connection) # else: # timeseries = TSeries() # timeseries.id = ts_id _dict = data[hh_id] arr = _dict[variable] series = arr if not series: continue earlier = [] if (not latest_ts) or (latest_ts < series[0][0]): # append timeseries = TSeries() timeseries.id = ts_id try: tail = read_timeseries_tail_from_db(db.connection, ts_id) total = float(tail[1]) # keep up from last value except Exception as e: log.debug(repr(e)) total = 0 for timestamp, value in series: if (not latest_ts) or (timestamp > latest_ts): if not isnan(value): total += value timeseries[timestamp] = total else: timeseries[timestamp] = float('NaN') elif timestamp < latest_ts: earlier.append((timestamp, value)) timeseries.append_to_db(db=db.connection, transaction=transaction, commit=True) elif latest_ts >= series[0][0]: if not force: # ignore continue else: # insert for timestamp, value in series: if timestamp < latest_ts: earlier.append((timestamp, value)) if earlier and ("GR" in username or "GBA" in username): # insert (only for athens) # print "appending %s items for %s" % (len(earlier), username) if variable == "WaterCold": ts15 = household \ .timeseries.get(time_step__id=TSTEP_FIFTEEN_MINUTES, variable__id=VAR_PERIOD) series15 = TSeries(id=ts15.id) elif variable == "Electricity": ts15 = household \ .timeseries.get(time_step__id=TSTEP_FIFTEEN_MINUTES, variable__id=VAR_ENERGY_PERIOD) series15 = TSeries(id=ts15.id) series15.read_from_db(db.connection) for ts, value in earlier: series15[ts] = value series15.write_to_db(db=db.connection, transaction=transaction, commit=True) raw_ts = TSeries(ts_id) # read existing ts raw data raw_ts.read_from_db(db.connection) total = get_consumption_totals(household, earlier[0][0], variable) init = total for timestamp, value in earlier: if not isnan(value): total += value raw_ts[timestamp] = total else: raw_ts[timestamp] = float('NaN') # correct later values, too diff = total - init all_ts = sorted(raw_ts.keys()) for ts in all_ts: if ts <= timestamp: continue curr = raw_ts[ts] raw_ts[ts] = curr + diff raw_ts.write_to_db(db=db.connection, transaction=transaction, commit=True) if 'WaterCold' in timeseries_data and not found: # only for new HH calc_occupancy(timeseries_data['WaterCold'], household) return households
def regularize_raw_series(raw_series_db, proc_series_db, rs, re, ps, pe ): """ This function regularize raw_series_db object from database and writes a processed proc_series_db in database. Raw series is a continuously increasing values time series, aggregating the water consumption. Resulting processed timeseries contains water consumption for each of its interval. I.e. if the timeseries is of 15 minutes time step, then each record contains the water consumption for each record period. """ raw_series = TSeries(id=raw_series_db.id) raw_series.read_from_db(db.connection) # We keep the last value for x-checking reasons, see last print # command test_value = raw_series[raw_series.bounding_dates()[1]] time_step = ReadTimeStep(proc_series_db.id, proc_series_db) proc_series = TSeries(id=proc_series_db.id, time_step = time_step) # The following code can be used in real conditions to append only # new records to db, in a next version #if not pe: # start = proc_series.time_step.down(rs) #else: # start = proc_series.time_step.up(pe) # Instead of the above we use now: start = proc_series.time_step.down(rs) end = proc_series.time_step.up(re) pointer = start # Pass 1: Initialize proc_series while pointer<=end: proc_series[pointer] = float('nan') pointer = proc_series.time_step.next(pointer) # Pass 2: Transfer cummulative raw series to differences series: prev_s = 0 for i in xrange(len(raw_series)): dat, value = raw_series.items(pos=i) if not math.isnan(value): raw_series[dat] = value-prev_s prev_s = value # Pass 3: Regularize step: loop over raw series records and distribute # floating point values to processed series for i in xrange(len(raw_series)): dat, value = raw_series.items(pos=i) if not math.isnan(value): # find previous, next timestamp of the proc time series d1 = proc_series.time_step.down(dat) d2 = proc_series.time_step.up(dat) if math.isnan(proc_series[d1]): proc_series[d1] = 0 if math.isnan(proc_series[d2]): proc_series[d2] = 0 if d1==d2: # if dat on proc step then d1=d2 proc_series[d1] += value continue dif1 = _dif_in_secs(d1, dat) dif2 = _dif_in_secs(dat, d2) dif = dif1+dif2 # Distribute value to d1, d2 proc_series[d1] += (dif2/dif)*value proc_series[d2] += (dif1/dif)*value # Uncomment the following line in order to show debug information. # Usually the three following sums are consistent by equality. If # not equality is satisfied then there is a likelyhood of algorith # error print raw_series.sum(), proc_series.sum(), test_value proc_series.write_to_db(db=db.connection, commit=True) #False) #return the full timeseries return proc_series
def has_leakage(household): """ This method checks for leakages. The way it is done is pretty simple I open the hourly timeseries and retrieve all timestamps. I create a dictionary with keys be the dates (not time) and values arrays be the percentage of night/total consumption :param household: :return: False for no leakage, True for leakage """ name = household.user.username if name == "GR059E35": pass if name.startswith('GB'): # not UK because they send daily data return 0, 0 timeseries = household \ .timeseries.get(time_step__id=TSTEP_HOURLY, variable__id=VAR_PERIOD) series = TSeries(id=timeseries.id) series.read_from_db(db.connection) timestamps = sorted(series.keys()) night_dict = {} total_dict = {} _t = datetime.now().time() _d = datetime.today().date() for ts in timestamps: _d = ts.date() _t = ts.time() val = series[ts] if 3 <= _t.hour <= 5: if val == 0: night_dict[_d] = 0 # make all night 0 if one 0 else: try: night_dict[_d] += val except KeyError: night_dict[_d] = val try: total_dict[_d] += val except KeyError: total_dict[_d] = val #remove last day if not a whole day (_t < 24:00) if _t.hour < 23: try: del total_dict[_d] del night_dict[_d] except (KeyError, IndexError): pass _all = [] # all lengths will be in here _today = [] # today's lengths _dates = sorted(total_dict.keys())[:-1] # all except last day 4 * 15min for 4 hrs for _d in _dates: total = total_dict[_d] # there can be a case when I don't get data for 01:00 -> 04:00 # so night[_d] might not exist. in this case let it be zero try: night = night_dict[_d] except KeyError: night = 0 if total > 0 and night > 0 and not isnan(total) and not isnan(night): _all.append(float(night) / float(total)) # Now we need only the last day. However sometimes we have # some timestamps from the next day because the file has all data from # previous day and one entry from today. So we pick today and yesterday # instead of today. Today is too small. And too fast some times. But that # is for some other time to discuss... _dates = sorted(total_dict.keys())[-1:] # only last day's for _d in _dates: total = total_dict[_d] night = night_dict[_d] if total > 0 and night > 0 and not isnan(total) and not isnan(night): _today.append(float(night) / float(total)) if _all and _today: ts = timestamps[-1] tm = "%s-%s-%s %s:%s" % (ts.year, ts.month, ts.day, ts.time().hour, ts.time().minute) all1 = np.array(_all) p = np.percentile(all1, 90) for val in _today: if val > p: return val, tm return 0, 0
def regularize(raw_series_db, proc_series_db, rs, re): """ This function regularize raw_series_db object from database and writes a processed proc_series_db in database. Raw series is a continuously increasing values time series, aggregating the water consumption. Resulting processed timeseries contains water consumption for each of its interval. I.e. if the timeseries is of 15 minutes time step, then each record contains the water consumption for each record period. """ raw_series = TSeries(id=raw_series_db.id) raw_series.read_from_db(db.connection) # We keep the last value for x-checking reasons, see last print # command try: test_value = raw_series[raw_series.bounding_dates()[1]] except Exception as e: #log.debug("Trying to get test value for raw series %s failed with %s. " # "Skipping!" % (raw_series_db.id, repr(e))) return None time_step = ReadTimeStep(proc_series_db.id, proc_series_db) proc_series = TSeries(id=proc_series_db.id, time_step=time_step) # The following code can be used in real conditions to append only # new records to db, in a next version #if not pe: # start = proc_series.time_step.down(rs) #else: # start = proc_series.time_step.up(pe) # Instead of the above we use now: start = proc_series.time_step.down(rs) end = proc_series.time_step.up(re) pointer = start # Pass 1: Initialize proc_series while pointer <= end: proc_series[pointer] = float('nan') pointer = proc_series.time_step.next(pointer) # Pass 2: Transfer cummulative raw series to differences series: prev_s = 0 for i in xrange(len(raw_series)): dat, value = raw_series.items(pos=i) d = datetime.today() d = d.replace(month=11).replace(day=5) if dat.date() == d.date(): pass if not isnan(value): # "if" Added by Chris Pantazis, because sometimes # We get a negative small value by the meter if prev_s > value: prev_s = value raw_series[dat] = value - prev_s prev_s = value # Pass 3: Regularize step: loop over raw series records and distribute # floating point values to processed series for i in xrange(len(raw_series)): dat, value = raw_series.items(pos=i) if not isnan(value): # find previous, next timestamp of the proc time series d1 = proc_series.time_step.down(dat) d2 = proc_series.time_step.up(dat) if isnan(proc_series[d1]): proc_series[d1] = 0 if isnan(proc_series[d2]): proc_series[d2] = 0 if d1 == d2: # if dat on proc step then d1=d2 proc_series[d1] += value continue dif1 = _dif_in_secs(d1, dat) dif2 = _dif_in_secs(dat, d2) dif = dif1 + dif2 # Distribute value to d1, d2 proc_series[d1] += (dif2 / dif) * value proc_series[d2] += (dif1 / dif) * value # Uncomment the following line in order to show debug information. # Usually the three following sums are consistent by equality. If # not equality is satisfied then there is a likelyhood of algorith # error # log.info("%s = %s = %s ?" % (raw_series.sum(), # proc_series.sum(), test_value)) proc_series.write_to_db(db=db.connection, commit=True) #return the full timeseries return proc_series
def has_leakage(household): """ This method checks for leakages. The way it is done is pretty simple I open the hourly timeseries and retrieve all timestamps. I create a dictionary with keys be the dates (not time) and values arrays be the percentage of night/total consumption :param household: :return: False for no leakage, True for leakage """ name = household.user.username if name == "GR059E35": pass if name.startswith('GB'): # not UK because they send daily data return 0, 0 timeseries = household \ .timeseries.get(time_step__id=TSTEP_HOURLY, variable__id=VAR_PERIOD) series = TSeries(id=timeseries.id) series.read_from_db(db.connection) timestamps = sorted(series.keys()) night_dict = {} total_dict = {} _t = datetime.now().time() _d = datetime.today().date() for ts in timestamps: _d = ts.date() _t = ts.time() val = series[ts] if 3 <= _t.hour <= 5: if val == 0: night_dict[_d] = 0 # make all night 0 if one 0 else: try: night_dict[_d] += val except KeyError: night_dict[_d] = val try: total_dict[_d] += val except KeyError: total_dict[_d] = val #remove last day if not a whole day (_t < 24:00) if _t.hour < 23: try: del total_dict[_d] del night_dict[_d] except (KeyError, IndexError): pass _all = [] # all lengths will be in here _today = [] # today's lengths _dates = sorted( total_dict.keys())[:-1] # all except last day 4 * 15min for 4 hrs for _d in _dates: total = total_dict[_d] # there can be a case when I don't get data for 01:00 -> 04:00 # so night[_d] might not exist. in this case let it be zero try: night = night_dict[_d] except KeyError: night = 0 if total > 0 and night > 0 and not isnan(total) and not isnan(night): _all.append(float(night) / float(total)) # Now we need only the last day. However sometimes we have # some timestamps from the next day because the file has all data from # previous day and one entry from today. So we pick today and yesterday # instead of today. Today is too small. And too fast some times. But that # is for some other time to discuss... _dates = sorted(total_dict.keys())[-1:] # only last day's for _d in _dates: total = total_dict[_d] night = night_dict[_d] if total > 0 and night > 0 and not isnan(total) and not isnan(night): _today.append(float(night) / float(total)) if _all and _today: ts = timestamps[-1] tm = "%s-%s-%s %s:%s" % (ts.year, ts.month, ts.day, ts.time().hour, ts.time().minute) all1 = np.array(_all) p = np.percentile(all1, 90) for val in _today: if val > p: return val, tm return 0, 0