def get_data_until(self, timestamp, start_timestamp=None): date = datetime.utcfromtimestamp(timestamp).replace(tzinfo=utc) if self.__class__.dataset == [] or self.__class__.dates == []: path = DATA_PATH + sep + "demo_electricity_2012.csv" raw_dataset = DataLoader.load_from_file( path, "Strom - Verbrauchertotal (Aktuell)", "\t") dates = DataLoader.load_from_file(path, "Datum", "\t") path = DATA_PATH + sep + "demo_electricity_2013.csv" raw_dataset += DataLoader.load_from_file( path, "Strom - Verbrauchertotal (Aktuell)", "\t") dates += DataLoader.load_from_file(path, "Datum", "\t") if date.year == 2014: path = DATA_PATH + sep + "demo_electricity_2014.csv" raw_dataset += DataLoader.load_from_file( path, "Strom - Verbrauchertotal (Aktuell)", "\t") dates += DataLoader.load_from_file(path, "Datum", "\t") self.__class__.dates = [int(date) for date in dates] self.__class__.dataset = raw_dataset dates = self.__class__.dates dataset = self.__class__.dataset now_index = approximate_index(dates, timestamp) # take data until simulated now time if start_timestamp == None: return dataset[:now_index] else: start_index = approximate_index(dates, start_timestamp) return dataset[start_index:now_index]
def get_data_until(self, timestamp, start_timestamp=None): date = datetime.utcfromtimestamp(timestamp).replace(tzinfo=utc) if self.__class__.dataset == [] or self.__class__.dates == []: path = DATA_PATH +sep+ "demo_electricity_2012.csv" raw_dataset = DataLoader.load_from_file( path, "Strom - Verbrauchertotal (Aktuell)", "\t") dates = DataLoader.load_from_file(path, "Datum", "\t") path = DATA_PATH + sep+ "demo_electricity_2013.csv" raw_dataset += DataLoader.load_from_file( path, "Strom - Verbrauchertotal (Aktuell)", "\t") dates += DataLoader.load_from_file(path, "Datum", "\t") if date.year == 2014: path = DATA_PATH + sep +"demo_electricity_2014.csv" raw_dataset += DataLoader.load_from_file( path, "Strom - Verbrauchertotal (Aktuell)", "\t") dates += DataLoader.load_from_file(path, "Datum", "\t") self.__class__.dates = [int(date) for date in dates] self.__class__.dataset = raw_dataset dates = self.__class__.dates dataset = self.__class__.dataset now_index = approximate_index(dates, timestamp) # take data until simulated now time if start_timestamp == None: return dataset[:now_index] else: start_index = approximate_index(dates, start_timestamp) return dataset[start_index:now_index]
def test_approximate_index(self): data = [1, 2, 3, 5, 6, 7, 8] self.assertTrue( approximate_index(data, 4) in [2, 3], "index approximation was wrong") self.assertTrue(approximate_index(data, 8) == data.index(8)) self.assertTrue(approximate_index(data, 9) == -1) self.assertTrue(approximate_index(data, 1.2436) == 0) self.assertTrue(approximate_index(data, 0) == -1)
def get_consumption_power(self): """Use the forecast to determine the current power demand""" time_tuple = gmtime(self.env.now) date_index = approximate_index(all_data["dates"], self.env.now) if self.env.forecast: return self.electrical_forecast.get_forecast_at(self.env.now) else: date_index = approximate_index(all_data["dates"], self.env.now) return float(all_data["dataset"][date_index])
def handle_single_data(self): sep = os.path.sep path = os.path.join(BASE_DIR, "server" + sep + "forecasting" + sep + "devices" + sep + "data" + sep + "Electricity_1.1-12.6.2014.csv") raw_dataset = DataLoader.load_from_file( path, "Strom - Verbrauchertotal (Aktuell)", "\t") dates = [int(d) for d in DataLoader.load_from_file(path, "Datum", "\t")]#StatisticalForecast.make_hourly([int(d) for d in DataLoader.load_from_file(path, "Datum", "\t")],6) demand = StatisticalForecast.make_hourly([float(val) / 1000.0 for val in raw_dataset], 6) start = calendar.timegm(datetime(year=2014,month=1,day=2).timetuple()) start_index = approximate_index(dates, start) train_len= 24*7*8 trainingdata = demand[start_index:start_index+train_len] test_start = start_index+train_len testdata = demand[test_start:test_start+7*24*2] start_forecast = test_start*3600 end_forecast = start_forecast + len(testdata) * 3600 electrical_forecast = DSHWForecast(BaseEnvironment(start_forecast, False, False), trainingdata, samples_per_hour=1) forecast = [electrical_forecast.get_forecast_at(timestamp) for timestamp in range(start_forecast,end_forecast,3600)] #(forecast, alpha, beta, smoothing) = linear(trainingdata, 24*6,alpha=0.4,beta=0.1) #forecast_nodaysplit, (alpha, beta, gamma), insample = multiplicative(trainingdata,24*7,len(testdata) ,optimization_type="RMSE") #forecast_nodaysplit, (alpha, beta, gamma, delta, autocorr), insample = double_seasonal(trainingdata,24,24*7,len(testdata) ,optimization_type="RMSE") #print alpha, beta, gamma, delta #print alpha, beta, gamma, rmse_auto, sqrt(sum([(m - n) ** 2 for m, n in zip(forecast_values_auto, testdata)]) / len(testdata)) #print "normal", sqrt(sum([(m - n) ** 2 for m, n in zip(forecast_values_auto, testdata)]) / len(testdata)) #print "split", sqrt(sum([(m - n) ** 2 for m, n in zip(forecast, testdata)]) / len(testdata)) #split_testdata = DayTypeForecast.split_weekdata(testdata,samples_per_hour=1,start_date=datetime.fromtimestamp(start_forecast)) #plot_dataset({"measured": split_testdata[5], "forecasted": electrical_forecast.forecasted_demands[5]}, 0, True) plot_dataset({"measured":testdata, "forecasted":forecast}) #self.export_rows({"measured": testdata, "forecasted daysplit": forecast, "nodaysplit": forecast_nodaysplit})#, "forecasted_split": forecast}) #self.export_csv(testdata)
def error_arrays(self): sep = os.path.sep path = os.path.join(BASE_DIR, "server" + sep + "forecasting" + sep + "devices" + sep + "data" + sep + "Electricity_2013-6.2014Reger.csv") raw_dataset1 = DataLoader.load_from_file( path, "Energie DG Leistung", "\t") raw_dataset2 = DataLoader.load_from_file( path, "Energie EG Leistung", "\t") dates = DataLoader.load_from_file(path, "Datum", "\t") # path = os.path.join(BASE_DIR, "server" + sep + "forecasting" + sep + "devices" + sep + "data" + sep + "Electricity_1.1-12.6.2014.csv") # raw_dataset += DataLoader.load_from_file( # path, "Strom - Verbrauchertotal (Aktuell)", "\t") # dates += DataLoader.load_from_file(path, "Datum", "\t") transf = lambda v: min(float(v) / 1000.0,500) demand = [transf(v1) + transf(v2) for v1,v2 in zip(raw_dataset1,raw_dataset2)] dates = StatisticalForecast.make_hourly([int(d) for d in dates],6) demand = StatisticalForecast.make_hourly(demand,6)#[float(val) / 1000.0 for val in raw_dataset], 6) start = calendar.timegm(datetime(year=2013,month=2,day=15).timetuple()) end = calendar.timegm(datetime(year=2013,month=8,day=15).timetuple()) fc_length = 7*24*2 #day_errors = [[0,0] for i in range(7)] #rmse, mase #hour_errors = [[0,0] for i in range(24)] period_errors = [[0,0] for i in range(14)] for timestamp in range(start, end, 24*3600): print "day:", datetime.fromtimestamp(timestamp) start_index = approximate_index(dates, timestamp) trainingdata = demand[:start_index] testdata = demand[start_index:start_index+fc_length] try: self.one_forecast(trainingdata, testdata, timestamp, timestamp+fc_length*3600,period_errors=period_errors) except: print "error, really now", sys.exc_info()[0] break l = len(range(start, end, 24*3600)) period_errors = [[r/l,m/l] for r,m in period_errors] #(forecast_values_auto, alpha, beta, gamma) = multiplicative(trainingdata, 7*24, 7*24*2, optimization_type="MASE") #print alpha, beta, gamma, rmse_auto, sqrt(sum([(m - n) ** 2 for m, n in zip(forecast_values_auto, testdata)]) / len(testdata)) #print "normal", sqrt(sum([(m - n) ** 2 for m, n in zip(forecast_values_auto, testdata)]) / len(testdata)) #print "split", sqrt(sum([(m - n) ** 2 for m, n in zip(forecast, testdata)]) / len(testdata)) #split_testdata = DayTypeForecast.split_weekdata(testdata,samples_per_hour=1,start_date=datetime.fromtimestamp(start_forecast)) #plot_dataset({"measured": split_testdata[5], "forecasted": electrical_forecast.forecasted_demands[5]}, 0, True) #self.export_rows({"measured": testdata, "forecasted": forecast_values_auto, "forecasted_split": forecast}) # self.export_csv({"day_errors_rmse": zip(*day_errors)[0], "day_errors_mase": zip(*day_errors)[1], # "hour_errors_rmse": zip(*hour_errors)[0], "hour_errors_mase": zip(*hour_errors)[1], # "period_errors_rmse": zip(*period_errors)[0], "hour_errors_mase": zip(*period_errors)[1]}) self.export_csv(datasets=[("period_errors_rmse", zip(*period_errors)[0]), ("period_errors_mase", zip(*period_errors)[1])], name="eval_dshw.csv")
def get_temperature(self, date): """ Retrieve a temperature at a certain time. The class will cache the values after the first query to speed up subsequent requests. :param datetime date: The time Raises an ``Exception`` if there are no values in the database for the current time. """ if self.cache_real_values == [[], []]: real_temps = RealWeatherValue.objects.all() for entry in real_temps: self.cache_real_values[0].append(calendar.timegm(entry.timestamp.utctimetuple())) self.cache_real_values[1].append(float(entry.temperature)) if len(self.cache_real_values[1]) < 2: raise Exception("not enough weather values in database") idx = approximate_index(self.cache_real_values[0], calendar.timegm(date.utctimetuple())) return self.cache_real_values[1][idx]
def handle_single_data(self): sep = os.path.sep path = os.path.join( BASE_DIR, "server" + sep + "forecasting" + sep + "devices" + sep + "data" + sep + "Electricity_1.1-12.6.2014.csv") raw_dataset = DataLoader.load_from_file( path, "Strom - Verbrauchertotal (Aktuell)", "\t") dates = [ int(d) for d in DataLoader.load_from_file(path, "Datum", "\t") ] #StatisticalForecast.make_hourly([int(d) for d in DataLoader.load_from_file(path, "Datum", "\t")],6) demand = StatisticalForecast.make_hourly( [float(val) / 1000.0 for val in raw_dataset], 6) start = calendar.timegm( datetime(year=2014, month=1, day=2).timetuple()) start_index = approximate_index(dates, start) train_len = 24 * 7 * 8 trainingdata = demand[start_index:start_index + train_len] test_start = start_index + train_len testdata = demand[test_start:test_start + 7 * 24 * 2] start_forecast = test_start * 3600 end_forecast = start_forecast + len(testdata) * 3600 electrical_forecast = DSHWForecast(BaseEnvironment( start_forecast, False, False), trainingdata, samples_per_hour=1) forecast = [ electrical_forecast.get_forecast_at(timestamp) for timestamp in range(start_forecast, end_forecast, 3600) ] #(forecast, alpha, beta, smoothing) = linear(trainingdata, 24*6,alpha=0.4,beta=0.1) #forecast_nodaysplit, (alpha, beta, gamma), insample = multiplicative(trainingdata,24*7,len(testdata) ,optimization_type="RMSE") #forecast_nodaysplit, (alpha, beta, gamma, delta, autocorr), insample = double_seasonal(trainingdata,24,24*7,len(testdata) ,optimization_type="RMSE") #print alpha, beta, gamma, delta #print alpha, beta, gamma, rmse_auto, sqrt(sum([(m - n) ** 2 for m, n in zip(forecast_values_auto, testdata)]) / len(testdata)) #print "normal", sqrt(sum([(m - n) ** 2 for m, n in zip(forecast_values_auto, testdata)]) / len(testdata)) #print "split", sqrt(sum([(m - n) ** 2 for m, n in zip(forecast, testdata)]) / len(testdata)) #split_testdata = DayTypeForecast.split_weekdata(testdata,samples_per_hour=1,start_date=datetime.fromtimestamp(start_forecast)) #plot_dataset({"measured": split_testdata[5], "forecasted": electrical_forecast.forecasted_demands[5]}, 0, True) plot_dataset({"measured": testdata, "forecasted": forecast})
def get_temperature(self, date): """ Retrieve a temperature at a certain time. The class will cache the values after the first query to speed up subsequent requests. :param datetime date: The time Raises an ``Exception`` if there are no values in the database for the current time. """ if self.cache_real_values == [[], []]: real_temps = RealWeatherValue.objects.all() for entry in real_temps: self.cache_real_values[0].append( calendar.timegm(entry.timestamp.utctimetuple())) self.cache_real_values[1].append(float(entry.temperature)) if len(self.cache_real_values[1]) < 2: raise Exception("not enough weather values in database") idx = approximate_index(self.cache_real_values[0], calendar.timegm(date.utctimetuple())) return self.cache_real_values[1][idx]
def value_changer(): try: from matplotlib.widgets import Slider, Button, RadioButtons from pylab import axes except: print "ljdlj" sep = os.path.sep path = os.path.join(BASE_DIR, "server" + sep + "forecasting" + sep + "simulation" + sep + "demodata" + sep + "demo_electricity_2014.csv") raw_data = DataLoader.load_from_file(path, "Strom - Verbrauchertotal (Aktuell)",delim="\t") ind = len(raw_data) / 2 kW_data = StatisticalForecast.make_hourly([float(val) / 1000.0 for val in raw_data],6) #cast to float and convert to kW dates = [int(d) for d in DataLoader.load_from_file(path, "Datum", "\t")] input = make_hourly(kW_data,6)[-24*7:] start = calendar.timegm(datetime(year=2014,month=1,day=2).timetuple()) start_index = approximate_index(dates, start) train_len= 24*7*8 trainingdata = kW_data[start_index:start_index+train_len] test_start = start_index+train_len testdata = kW_data[test_start:test_start+7*24*2] start_forecast = test_start*3600 end_forecast = start_forecast + len(testdata) * 3600 alpha = 0.0000001 beta = 0.0 gamma = 0.05 delta = 0.01 autocorr = 0.01 #plot_dataset(values) m = 24 m2 = 24 * 7 #forecast length fc = int(len(testdata)) forecast_values, params, insample = double_seasonal(trainingdata, m,m2,fc, alpha, beta, gamma,delta,autocorr) values ={ 'forecasting':forecast_values, 'measured':testdata} (fig, sim_plot,forecast_plot) = plot_dataset(values, 0,block=False) axcolor = 'lightgoldenrodyellow' axalpa = axes([0.25, 0.02, 0.65, 0.02], axisbg=axcolor) axautocorr = axes([0.25, 0.05, 0.65, 0.02], axisbg=axcolor) axgamma = axes([0.25, 0.08, 0.65, 0.02], axisbg=axcolor) axdelta = axes([0.25, 0.11, 0.65, 0.02], axisbg=axcolor) alpha_slider = Slider(axalpa, 'Alpha', 0.0, 1.0, valinit=alpha) gamma_slider = Slider(axgamma, 'Gamma', 0.0, 1.0, valinit=gamma) delta_slider = Slider(axdelta, 'Delta', 0.0, 1.0, valinit=delta) autocorr_slider = Slider(axautocorr, 'autocorr_slider', 0.0, 1.0, valinit=autocorr) def update_hw(val): alpha = alpha_slider.val autocorr = autocorr_slider.val beta = 0.0 gamma = gamma_slider.val delta = delta_slider.val forecast_values, params, insample = double_seasonal(trainingdata, m,m2,fc, alpha, beta, gamma,delta,autocorr) values ={ 'forecasting':forecast_values, 'measured':testdata} forecast_plot.set_ydata(forecast_values) sim_plot.set_ydata(testdata) fig.canvas.draw_idle() print alpha, autocorr, gamma, MSE(testdata, forecast_values) alpha_slider.on_changed(update_hw) autocorr_slider.on_changed(update_hw) gamma_slider.on_changed(update_hw) delta_slider.on_changed(update_hw) plt.show()
def error_arrays(self): sep = os.path.sep path = os.path.join( BASE_DIR, "server" + sep + "forecasting" + sep + "devices" + sep + "data" + sep + "Electricity_2013-6.2014Reger.csv") raw_dataset1 = DataLoader.load_from_file(path, "Energie DG Leistung", "\t") raw_dataset2 = DataLoader.load_from_file(path, "Energie EG Leistung", "\t") dates = DataLoader.load_from_file(path, "Datum", "\t") # path = os.path.join(BASE_DIR, "server" + sep + "forecasting" + sep + "devices" + sep + "data" + sep + "Electricity_1.1-12.6.2014.csv") # raw_dataset += DataLoader.load_from_file( # path, "Strom - Verbrauchertotal (Aktuell)", "\t") # dates += DataLoader.load_from_file(path, "Datum", "\t") transf = lambda v: min(float(v) / 1000.0, 500) demand = [ transf(v1) + transf(v2) for v1, v2 in zip(raw_dataset1, raw_dataset2) ] dates = StatisticalForecast.make_hourly([int(d) for d in dates], 6) demand = StatisticalForecast.make_hourly( demand, 6) #[float(val) / 1000.0 for val in raw_dataset], 6) start = calendar.timegm( datetime(year=2013, month=2, day=15).timetuple()) end = calendar.timegm(datetime(year=2013, month=8, day=15).timetuple()) fc_length = 7 * 24 * 2 #day_errors = [[0,0] for i in range(7)] #rmse, mase #hour_errors = [[0,0] for i in range(24)] period_errors = [[0, 0] for i in range(14)] for timestamp in range(start, end, 24 * 3600): print "day:", datetime.fromtimestamp(timestamp) start_index = approximate_index(dates, timestamp) trainingdata = demand[:start_index] testdata = demand[start_index:start_index + fc_length] try: self.one_forecast(trainingdata, testdata, timestamp, timestamp + fc_length * 3600, period_errors=period_errors) except: print "error, really now", sys.exc_info()[0] break l = len(range(start, end, 24 * 3600)) period_errors = [[r / l, m / l] for r, m in period_errors] #(forecast_values_auto, alpha, beta, gamma) = multiplicative(trainingdata, 7*24, 7*24*2, optimization_type="MASE") #print alpha, beta, gamma, rmse_auto, sqrt(sum([(m - n) ** 2 for m, n in zip(forecast_values_auto, testdata)]) / len(testdata)) #print "normal", sqrt(sum([(m - n) ** 2 for m, n in zip(forecast_values_auto, testdata)]) / len(testdata)) #print "split", sqrt(sum([(m - n) ** 2 for m, n in zip(forecast, testdata)]) / len(testdata)) #split_testdata = DayTypeForecast.split_weekdata(testdata,samples_per_hour=1,start_date=datetime.fromtimestamp(start_forecast)) #plot_dataset({"measured": split_testdata[5], "forecasted": electrical_forecast.forecasted_demands[5]}, 0, True) #self.export_rows({"measured": testdata, "forecasted": forecast_values_auto, "forecasted_split": forecast}) # self.export_csv({"day_errors_rmse": zip(*day_errors)[0], "day_errors_mase": zip(*day_errors)[1], # "hour_errors_rmse": zip(*hour_errors)[0], "hour_errors_mase": zip(*hour_errors)[1], # "period_errors_rmse": zip(*period_errors)[0], "hour_errors_mase": zip(*period_errors)[1]}) self.export_csv(datasets=[ ("period_errors_rmse", zip(*period_errors)[0]), ("period_errors_mase", zip(*period_errors)[1]) ], name="eval_dshw.csv")