def update_ts_temp_file(cache_dir, connection, id): full_rewrite = False afilename = os.path.join(cache_dir, '%d.hts' % (id, )) if os.path.exists(afilename): if os.path.getsize(afilename) < 3: full_rewrite = True #Update the file in the case of logged data, if this is possible if os.path.exists(afilename) and not full_rewrite: with ropen(afilename) as fileobject: line = fileobject.readline() lastdate = datetime_from_iso(line.split(',')[0]) ts = Timeseries(id) ts.read_from_db(connection, bottom_only=True) if len(ts) > 0: db_start, db_end = ts.bounding_dates() if db_start > lastdate: full_rewrite = True elif db_end > lastdate: lastindex = ts.index(lastdate) with open(afilename, 'a') as fileobject: ts.write(fileobject, start=ts.keys()[lastindex + 1]) #Check for tmmp file or else create it if not os.path.exists(afilename) or full_rewrite: ts = Timeseries(id) ts.read_from_db(connection) if not os.path.exists(cache_dir): os.mkdir(cache_dir) tempfile_handle, tempfile_name = tempfile.mkstemp(dir=cache_dir) with os.fdopen(tempfile_handle, 'w') as afile: ts.write(afile) shutil.move(tempfile_name, afilename)
def update_ts_temp_file(cache_dir, connection, id): full_rewrite = False afilename = os.path.join(cache_dir, '%d.hts'%(id,)) if os.path.exists(afilename): if os.path.getsize(afilename)<3: full_rewrite = True #Update the file in the case of logged data, if this is possible if os.path.exists(afilename) and not full_rewrite: with open(afilename, 'r') as fileobject: xr = xreverse(fileobject, 2048) line = xr.next() lastdate = datetime_from_iso(line.split(',')[0]) ts = Timeseries(id) ts.read_from_db(connection, bottom_only=True) if len(ts)>0: db_start, db_end = ts.bounding_dates() if db_start>lastdate: full_rewrite = True elif db_end>lastdate: lastindex = ts.index(lastdate) with open(afilename, 'a') as fileobject: ts.write(fileobject, start=ts.keys()[lastindex+1]) #Check for tmmp file or else create it if not os.path.exists(afilename) or full_rewrite: ts = Timeseries(id) ts.read_from_db(connection) if not os.path.exists(cache_dir): os.mkdir(cache_dir) tempfile_handle, tempfile_name = tempfile.mkstemp(dir=cache_dir) with os.fdopen(tempfile_handle, 'w') as afile: ts.write(afile) shutil.move(tempfile_name, afilename)
def handle(self, *args, **options): try: username = args[0] except IndexError: print "I need a username!" return -1 try: if username: user = User.objects.get(username=username) out = [] print "output for {x}".format(x=username) household = Household.objects.get(user=user) timeseries = household \ .timeseries.get(time_step__id=TSTEP_FIFTEEN_MINUTES, variable__id=VAR_PERIOD) series = TSeries(id=timeseries.id) series.read_from_db(db.connection) timestamps = sorted(series.keys()) values = np.array([]) for ts in timestamps: val = series[ts] if isnan(val) or val == 0: continue values = np.append(values, val) perc = np.percentile(values, 90) out.append([ts, val, perc]) _outfile = "timeseries_%s.csv" % username _path = "data/" with open(path.join(_path, _outfile), 'w') as of: a = csv.writer(of, delimiter=',', quotechar='"', quoting=csv.QUOTE_ALL) a.writerows(out) except Exception as e: print "failed with %s" % repr(e)
def get_consumption_totals(household, dt, variable): """ Not needed. read_timeseries_tail_from_db does the same thing, faster. :param household: :return: """ if variable == "WaterCold": timeseries = household \ .timeseries.get(variable__id=VAR_CUMULATIVE) raw_series = TSeries(id=timeseries.id) raw_series.read_from_db(db.connection) timestamps = sorted(raw_series.keys()) total = 0 for ts in timestamps: val = raw_series[ts] if isnan(val): continue if ts > dt: break total = raw_series[ts] return total elif variable == "Electricity": timeseries = household \ .timeseries.get(variable__id=VAR_ENERGY_CUMULATIVE) raw_series = TSeries(id=timeseries.id) raw_series.read_from_db(db.connection) timestamps = sorted(raw_series.keys()) total = 0 for ts in timestamps: val = raw_series[ts] if isnan(val): continue if ts > dt: break total = raw_series[ts] return total
def has_burst_old(household): """ We won't be using this algorithm any more :param household: :return: """ name = household.user.username if not name.startswith('GR'): return 0, 0 timeseries = household \ .timeseries.get(time_step__id=TSTEP_FIFTEEN_MINUTES, variable__id=VAR_PERIOD) series = TSeries(id=timeseries.id) series.read_from_db(db.connection) timestamps = sorted(series.keys()) today = [] # all today's values _all = [] for i in range(1, len(timestamps)): ts = timestamps[i] if household.user.username == "GR006047" \ and ts.year == 2015 and ts.month == 2 and ts.day == 9 \ and ts.hour == 17: pass prev_ts = timestamps[i - 1] # if previous value is NaN we don't take this value into consideration # Because it might have all consumption of all the previous NaN times val = series[ts] prev_val = series[prev_ts] if isnan(prev_val): continue if i < len(timestamps) - 100: if not isnan(val) and not val == 0: _all.append(series[ts]) else: tm = "%s:%s" % (ts.time().hour, ts.time().minute) if not isnan(val) and not val == 0: today.append((val, tm)) if _all and today: all1 = np.array(_all) p = np.percentile(all1, 95) for cons, tm in today: if cons > p: return cons, tm return 0, 0
def has_burst(household): """ We won't be using this algorithm any more :param household: :return: """ name = household.user.username if not name.startswith('GR'): return 0, 0 timeseries = household \ .timeseries.get(time_step__id=TSTEP_FIFTEEN_MINUTES, variable__id=VAR_PERIOD) series = TSeries(id=timeseries.id) series.read_from_db(db.connection) timestamps = sorted(series.keys()) today = [] # all today's values daily_maxes = {} for i in range(1, len(timestamps)): ts = timestamps[i] prev_ts = timestamps[i - 1] date = ts.date() # if previous value is NaN we don't take this value into consideration # Because it might have all consumption of all the previous NaN times val = series[ts] prev_val = series[prev_ts] if isnan(prev_val): continue if i < len(timestamps) - 100: if not isnan(val) and not val == 0: daily_max = daily_maxes.get(date, 0) if val > daily_max: daily_maxes[date] = val else: tm = "%s-%s-%s %s:%s" % (ts.year, ts.month, ts.day, ts.time().hour, ts.time().minute) if not isnan(val) and not val == 0: today.append((val, tm)) if daily_maxes and today: maxes = np.array(daily_maxes.values()) p = np.percentile(maxes, 90) for cons, tm in today: if cons > p: return cons, tm return 0, 0
def has_burst_old(household): """ We won't be using this algorithm any more :param household: :return: """ name = household.user.username if not name.startswith('GR'): return 0, 0 timeseries = household \ .timeseries.get(time_step__id=TSTEP_FIFTEEN_MINUTES, variable__id=VAR_PERIOD) series = TSeries(id=timeseries.id) series.read_from_db(db.connection) timestamps = sorted(series.keys()) today = [] # all today's values _all = [] for i in range(1, len(timestamps)): ts = timestamps[i] if household.user.username == "GR006047" \ and ts.year == 2015 and ts.month == 2 and ts.day == 9 \ and ts.hour == 17: pass prev_ts = timestamps[i-1] # if previous value is NaN we don't take this value into consideration # Because it might have all consumption of all the previous NaN times val = series[ts] prev_val = series[prev_ts] if isnan(prev_val): continue if i < len(timestamps) - 100: if not isnan(val) and not val == 0: _all.append(series[ts]) else: tm = "%s:%s" % (ts.time().hour, ts.time().minute) if not isnan(val) and not val == 0: today.append((val, tm)) if _all and today: all1 = np.array(_all) p = np.percentile(all1, 95) for cons, tm in today: if cons > p: return cons, tm return 0, 0
def has_burst(household): """ We won't be using this algorithm any more :param household: :return: """ name = household.user.username if not name.startswith('GR'): return 0, 0 timeseries = household \ .timeseries.get(time_step__id=TSTEP_FIFTEEN_MINUTES, variable__id=VAR_PERIOD) series = TSeries(id=timeseries.id) series.read_from_db(db.connection) timestamps = sorted(series.keys()) today = [] # all today's values daily_maxes = {} for i in range(1, len(timestamps)): ts = timestamps[i] prev_ts = timestamps[i-1] date = ts.date() # if previous value is NaN we don't take this value into consideration # Because it might have all consumption of all the previous NaN times val = series[ts] prev_val = series[prev_ts] if isnan(prev_val): continue if i < len(timestamps) - 100: if not isnan(val) and not val == 0: daily_max = daily_maxes.get(date, 0) if val > daily_max: daily_maxes[date] = val else: tm = "%s-%s-%s %s:%s" % (ts.year, ts.month, ts.day, ts.time().hour, ts.time().minute) if not isnan(val) and not val == 0: today.append((val, tm)) if daily_maxes and today: maxes = np.array(daily_maxes.values()) p = np.percentile(maxes, 90) for cons, tm in today: if cons > p: return cons, tm return 0, 0
def get_values_after(household, dt, variable): timeseries = None if variable == "WaterCold": timeseries = household \ .timeseries.get(time_step__id=TSTEP_FIFTEEN_MINUTES, variable__id=VAR_PERIOD) elif variable == "Electricity": timeseries = household \ .timeseries.get(time_step__id=TSTEP_FIFTEEN_MINUTES, variable__id=VAR_ENERGY_PERIOD) data = [] if timeseries: series = TSeries(id=timeseries.id) series.read_from_db(db.connection) timestamps = sorted(series.keys()) for ts in timestamps: val = series[ts] if ts <= dt: continue data.append((ts, val)) return data
def handle(self, *args, **options): try: username = args[0] except IndexError: print "I need a username!" return -1 try: if username not in ["GR", "GB", "PT", "GBA"]: users = User.objects.filter(username=username) else: users = User.objects.filter(username__startswith=username) for user in users: out = [] print "output for {x}".format(x=username) household = Household.objects.get(user=user) # ts_raw = household.timeseries.filter(time_step__isnull=True, # variable__id=VAR_CUMULATIVE)[0] # series = TSeries(id=ts_raw.id) timeseries = household \ .timeseries.get(variable__id=VAR_CUMULATIVE) series = TSeries(id=timeseries.id) series.read_from_db(db.connection) timestamps = sorted(series.keys()) values = np.array([]) for ts in timestamps: val = series[ts] if isnan(val) or val == 0: continue values = np.append(values, val) #perc = np.percentile(values, 90) out.append([ts, val]) _outfile = "timeseries_cumulative_%s.csv" % user.username _path = "data/" with open(path.join(_path, _outfile), 'w') as of: a = csv.writer(of, delimiter=',', quotechar='"', quoting=csv.QUOTE_ALL) a.writerows(out) except Exception as e: print "failed with %s" % repr(e)
def has_leakage(household): """ This method checks for leakages. The way it is done is pretty simple I open the hourly timeseries and retrieve all timestamps. I create a dictionary with keys be the dates (not time) and values arrays be the percentage of night/total consumption :param household: :return: False for no leakage, True for leakage """ name = household.user.username if name == "GR059E35": pass if name.startswith('GB'): # not UK because they send daily data return 0, 0 timeseries = household \ .timeseries.get(time_step__id=TSTEP_HOURLY, variable__id=VAR_PERIOD) series = TSeries(id=timeseries.id) series.read_from_db(db.connection) timestamps = sorted(series.keys()) night_dict = {} total_dict = {} _t = datetime.now().time() _d = datetime.today().date() for ts in timestamps: _d = ts.date() _t = ts.time() val = series[ts] if 3 <= _t.hour <= 5: if val == 0: night_dict[_d] = 0 # make all night 0 if one 0 else: try: night_dict[_d] += val except KeyError: night_dict[_d] = val try: total_dict[_d] += val except KeyError: total_dict[_d] = val #remove last day if not a whole day (_t < 24:00) if _t.hour < 23: try: del total_dict[_d] del night_dict[_d] except (KeyError, IndexError): pass _all = [] # all lengths will be in here _today = [] # today's lengths _dates = sorted(total_dict.keys())[:-1] # all except last day 4 * 15min for 4 hrs for _d in _dates: total = total_dict[_d] # there can be a case when I don't get data for 01:00 -> 04:00 # so night[_d] might not exist. in this case let it be zero try: night = night_dict[_d] except KeyError: night = 0 if total > 0 and night > 0 and not isnan(total) and not isnan(night): _all.append(float(night) / float(total)) # Now we need only the last day. However sometimes we have # some timestamps from the next day because the file has all data from # previous day and one entry from today. So we pick today and yesterday # instead of today. Today is too small. And too fast some times. But that # is for some other time to discuss... _dates = sorted(total_dict.keys())[-1:] # only last day's for _d in _dates: total = total_dict[_d] night = night_dict[_d] if total > 0 and night > 0 and not isnan(total) and not isnan(night): _today.append(float(night) / float(total)) if _all and _today: ts = timestamps[-1] tm = "%s-%s-%s %s:%s" % (ts.year, ts.month, ts.day, ts.time().hour, ts.time().minute) all1 = np.array(_all) p = np.percentile(all1, 90) for val in _today: if val > p: return val, tm return 0, 0
def has_leakage(household): """ This method checks for leakages. The way it is done is pretty simple I open the hourly timeseries and retrieve all timestamps. I create a dictionary with keys be the dates (not time) and values arrays be the percentage of night/total consumption :param household: :return: False for no leakage, True for leakage """ name = household.user.username if name == "GR059E35": pass if name.startswith('GB'): # not UK because they send daily data return 0, 0 timeseries = household \ .timeseries.get(time_step__id=TSTEP_HOURLY, variable__id=VAR_PERIOD) series = TSeries(id=timeseries.id) series.read_from_db(db.connection) timestamps = sorted(series.keys()) night_dict = {} total_dict = {} _t = datetime.now().time() _d = datetime.today().date() for ts in timestamps: _d = ts.date() _t = ts.time() val = series[ts] if 3 <= _t.hour <= 5: if val == 0: night_dict[_d] = 0 # make all night 0 if one 0 else: try: night_dict[_d] += val except KeyError: night_dict[_d] = val try: total_dict[_d] += val except KeyError: total_dict[_d] = val #remove last day if not a whole day (_t < 24:00) if _t.hour < 23: try: del total_dict[_d] del night_dict[_d] except (KeyError, IndexError): pass _all = [] # all lengths will be in here _today = [] # today's lengths _dates = sorted( total_dict.keys())[:-1] # all except last day 4 * 15min for 4 hrs for _d in _dates: total = total_dict[_d] # there can be a case when I don't get data for 01:00 -> 04:00 # so night[_d] might not exist. in this case let it be zero try: night = night_dict[_d] except KeyError: night = 0 if total > 0 and night > 0 and not isnan(total) and not isnan(night): _all.append(float(night) / float(total)) # Now we need only the last day. However sometimes we have # some timestamps from the next day because the file has all data from # previous day and one entry from today. So we pick today and yesterday # instead of today. Today is too small. And too fast some times. But that # is for some other time to discuss... _dates = sorted(total_dict.keys())[-1:] # only last day's for _d in _dates: total = total_dict[_d] night = night_dict[_d] if total > 0 and night > 0 and not isnan(total) and not isnan(night): _today.append(float(night) / float(total)) if _all and _today: ts = timestamps[-1] tm = "%s-%s-%s %s:%s" % (ts.year, ts.month, ts.day, ts.time().hour, ts.time().minute) all1 = np.array(_all) p = np.percentile(all1, 90) for val in _today: if val > p: return val, tm return 0, 0
def create_objects(data, usernames, force, z_names, z_dict): """ :param data: meter_id -> consumption_type -> [timestamp, volume] :param force: True to overwrite :return: True for success """ households = [] # Create user (household owner), household, database series placeholders hh_ids = sorted(data.keys()) found = False for hh_id in hh_ids: username = usernames[hh_id] if username == "PT94993": pass try: zone_name = z_dict[username] except KeyError: zone_name = z_names[0] zone = DMA.objects.get(name=zone_name) user, created = create_user(username, hh_id) household, found = create_household(hh_id, user, zone.id) households.append(household) db_series = create_raw_timeseries(household) create_processed_timeseries(household) timeseries_data = {} # Now we will create timeseries.Timeseries() and we will add # parsed values for variable in db_series: if variable not in ('WaterCold', 'Electricity'): continue exists = False s, e = timeseries_bounding_dates_from_db(db.connection, db_series[variable].id) latest_ts = e ts_id = db_series[variable].id # checking to see if timeseries records already exist in order # to append # d = read_timeseries_tail_from_db(db.connection, ts_id) total = 0.0 # if s or e: # exists = True # timeseries = TSeries(ts_id) # timeseries.read_from_db(db.connection) # else: # timeseries = TSeries() # timeseries.id = ts_id _dict = data[hh_id] arr = _dict[variable] series = arr if not series: continue earlier = [] if (not latest_ts) or (latest_ts < series[0][0]): # append timeseries = TSeries() timeseries.id = ts_id try: tail = read_timeseries_tail_from_db(db.connection, ts_id) total = float(tail[1]) # keep up from last value except Exception as e: log.debug(repr(e)) total = 0 for timestamp, value in series: if (not latest_ts) or (timestamp > latest_ts): if not isnan(value): total += value timeseries[timestamp] = total else: timeseries[timestamp] = float('NaN') elif timestamp < latest_ts: earlier.append((timestamp, value)) timeseries.append_to_db(db=db.connection, transaction=transaction, commit=True) elif latest_ts >= series[0][0]: if not force: # ignore continue else: # insert for timestamp, value in series: if timestamp < latest_ts: earlier.append((timestamp, value)) if earlier and ("GR" in username or "GBA" in username): # insert (only for athens) # print "appending %s items for %s" % (len(earlier), username) if variable == "WaterCold": ts15 = household \ .timeseries.get(time_step__id=TSTEP_FIFTEEN_MINUTES, variable__id=VAR_PERIOD) series15 = TSeries(id=ts15.id) elif variable == "Electricity": ts15 = household \ .timeseries.get(time_step__id=TSTEP_FIFTEEN_MINUTES, variable__id=VAR_ENERGY_PERIOD) series15 = TSeries(id=ts15.id) series15.read_from_db(db.connection) for ts, value in earlier: series15[ts] = value series15.write_to_db(db=db.connection, transaction=transaction, commit=True) raw_ts = TSeries(ts_id) # read existing ts raw data raw_ts.read_from_db(db.connection) total = get_consumption_totals(household, earlier[0][0], variable) init = total for timestamp, value in earlier: if not isnan(value): total += value raw_ts[timestamp] = total else: raw_ts[timestamp] = float('NaN') # correct later values, too diff = total - init all_ts = sorted(raw_ts.keys()) for ts in all_ts: if ts <= timestamp: continue curr = raw_ts[ts] raw_ts[ts] = curr + diff raw_ts.write_to_db(db=db.connection, transaction=transaction, commit=True) if 'WaterCold' in timeseries_data and not found: # only for new HH calc_occupancy(timeseries_data['WaterCold'], household) return households