Beispiel #1
    def strom_real(self):
        sep = os.path.sep
        t0 = time()
        path = os.path.join(BASE_DIR, "server" + sep + "forecasting" + sep + "devices" + sep + "data" + sep + "Electricity_2012Neuendorf.csv")
        raw_dataset1 = DataLoader.load_from_file(
            path, "Strom - Verbrauchertotal (Aktuell)", "\t")
        dates1 = DataLoader.load_from_file(path, "Datum", "\t")

        path2 = os.path.join(BASE_DIR, "server" + sep + "forecasting" + sep + "devices" + sep + "data" + sep + "Electricity_2013.csv")
        raw_dataset2 = DataLoader.load_from_file(
            path2, "Strom - Verbrauchertotal (Aktuell)", "\t")
        dates2 = DataLoader.load_from_file(path2, "Datum", "\t")
        t1 = time()
        dates1 = StatisticalForecast.make_hourly([int(d) for d in dates1],6)
        dates2 = StatisticalForecast.make_hourly([int(d) for d in dates2],6)
        demand1 = StatisticalForecast.make_hourly([float(val) / 1000.0 for val in raw_dataset1], 6)
        demand2 = StatisticalForecast.make_hourly([float(val) / 1000.0 for val in raw_dataset2], 6)   
        t2 = time()
        rm = StatisticalForecast.MASE(demand1,demand1[:len(demand2)],demand2)

        #split_testdata1 = DayTypeForecast.split_weekdata(demand1,samples_per_hour=1,start_date=datetime.fromtimestamp(dates1[0]))
        #split_testdata2 = DayTypeForecast.split_weekdata(demand2,samples_per_hour=1,start_date=datetime.fromtimestamp(dates2[0]))
        #for index, dataset in enumerate(split_testdata1):
        #    print self.rmse(split_testdata2[index], dataset)#StatisticalForecast.MASE(dataset, dataset[:len(split_testdata2[index])],split_testdata2[index][:len(dataset)])
        t3 = time()
        print "t0 ", t1-t0, "t1 ", t2 - t1, "t3 ",t3-t2
        print rm
Beispiel #2
    def error_arrays(self):
        sep = os.path.sep
        path = os.path.join(BASE_DIR, "server" + sep + "forecasting" + sep + "devices" + sep + "data" + sep + "Electricity_2013-6.2014Reger.csv")
        raw_dataset1 = DataLoader.load_from_file(
            path, "Energie DG Leistung", "\t")
        raw_dataset2 = DataLoader.load_from_file(
            path, "Energie EG Leistung", "\t")
        dates = DataLoader.load_from_file(path, "Datum", "\t")

#         path = os.path.join(BASE_DIR, "server" + sep + "forecasting" + sep + "devices" + sep + "data" + sep + "Electricity_1.1-12.6.2014.csv")
#         raw_dataset += DataLoader.load_from_file(
#             path, "Strom - Verbrauchertotal (Aktuell)", "\t")
#         dates += DataLoader.load_from_file(path, "Datum", "\t")
        transf = lambda v: min(float(v) / 1000.0,500)
        demand = [transf(v1) + transf(v2) for v1,v2 in zip(raw_dataset1,raw_dataset2)]
        dates = StatisticalForecast.make_hourly([int(d) for d in dates],6)
        demand = StatisticalForecast.make_hourly(demand,6)#[float(val) / 1000.0 for val in raw_dataset], 6)
        start = calendar.timegm(datetime(year=2013,month=2,day=15).timetuple())
        end = calendar.timegm(datetime(year=2013,month=8,day=15).timetuple())
        fc_length = 7*24*2
        #day_errors = [[0,0] for i in range(7)] #rmse, mase
        #hour_errors = [[0,0] for i in range(24)]
        period_errors = [[0,0] for i in range(14)]
        for timestamp in range(start, end, 24*3600):
            print "day:", datetime.fromtimestamp(timestamp)
            start_index = approximate_index(dates, timestamp)
            trainingdata = demand[:start_index]
            testdata = demand[start_index:start_index+fc_length]
                self.one_forecast(trainingdata, testdata, timestamp, timestamp+fc_length*3600,period_errors=period_errors)
                print "error, really now", sys.exc_info()[0]
        l = len(range(start, end, 24*3600))    
        period_errors = [[r/l,m/l] for r,m in period_errors]

        #(forecast_values_auto, alpha, beta, gamma) = multiplicative(trainingdata, 7*24, 7*24*2, optimization_type="MASE")
        #print alpha, beta, gamma, rmse_auto, sqrt(sum([(m - n) ** 2 for m, n in zip(forecast_values_auto, testdata)]) / len(testdata))
        #print "normal", sqrt(sum([(m - n) ** 2 for m, n in zip(forecast_values_auto, testdata)]) / len(testdata))
        #print "split", sqrt(sum([(m - n) ** 2 for m, n in zip(forecast, testdata)]) / len(testdata))
        #split_testdata = DayTypeForecast.split_weekdata(testdata,samples_per_hour=1,start_date=datetime.fromtimestamp(start_forecast))
        #plot_dataset({"measured": split_testdata[5], "forecasted": electrical_forecast.forecasted_demands[5]}, 0, True)
        #self.export_rows({"measured": testdata, "forecasted": forecast_values_auto,  "forecasted_split": forecast})
#         self.export_csv({"day_errors_rmse": zip(*day_errors)[0], "day_errors_mase": zip(*day_errors)[1],
#                           "hour_errors_rmse": zip(*hour_errors)[0], "hour_errors_mase": zip(*hour_errors)[1], 
#                           "period_errors_rmse": zip(*period_errors)[0], "hour_errors_mase": zip(*period_errors)[1]})
        self.export_csv(datasets=[("period_errors_rmse", zip(*period_errors)[0]), ("period_errors_mase", zip(*period_errors)[1])],
Beispiel #3
 def handle_single_data(self):
     sep = os.path.sep
     path = os.path.join(BASE_DIR, "server" + sep + "forecasting" + sep + "devices" + sep + "data" + sep + "Electricity_1.1-12.6.2014.csv")
     raw_dataset = DataLoader.load_from_file(
         path, "Strom - Verbrauchertotal (Aktuell)", "\t")
     dates = [int(d) for d in DataLoader.load_from_file(path, "Datum", "\t")]#StatisticalForecast.make_hourly([int(d) for d in DataLoader.load_from_file(path, "Datum", "\t")],6)
     demand = StatisticalForecast.make_hourly([float(val) / 1000.0 for val in raw_dataset], 6)
     start = calendar.timegm(datetime(year=2014,month=1,day=2).timetuple())
     start_index = approximate_index(dates, start)
     train_len= 24*7*8
     trainingdata = demand[start_index:start_index+train_len]
     test_start = start_index+train_len 
     testdata = demand[test_start:test_start+7*24*2]
     start_forecast = test_start*3600
     end_forecast = start_forecast + len(testdata) * 3600
     electrical_forecast = DSHWForecast(BaseEnvironment(start_forecast, False, False), trainingdata, samples_per_hour=1)
     forecast  = [electrical_forecast.get_forecast_at(timestamp) for timestamp in range(start_forecast,end_forecast,3600)]
     #(forecast, alpha, beta, smoothing) = linear(trainingdata, 24*6,alpha=0.4,beta=0.1)
     #forecast_nodaysplit, (alpha, beta, gamma), insample = multiplicative(trainingdata,24*7,len(testdata) ,optimization_type="RMSE")
     #forecast_nodaysplit, (alpha, beta, gamma, delta, autocorr), insample = double_seasonal(trainingdata,24,24*7,len(testdata) ,optimization_type="RMSE")
     #print alpha, beta, gamma, delta
     #print alpha, beta, gamma, rmse_auto, sqrt(sum([(m - n) ** 2 for m, n in zip(forecast_values_auto, testdata)]) / len(testdata))
     #print "normal", sqrt(sum([(m - n) ** 2 for m, n in zip(forecast_values_auto, testdata)]) / len(testdata))
     #print "split", sqrt(sum([(m - n) ** 2 for m, n in zip(forecast, testdata)]) / len(testdata))
     #split_testdata = DayTypeForecast.split_weekdata(testdata,samples_per_hour=1,start_date=datetime.fromtimestamp(start_forecast))
     #plot_dataset({"measured": split_testdata[5], "forecasted": electrical_forecast.forecasted_demands[5]}, 0, True)
     plot_dataset({"measured":testdata, "forecasted":forecast})
     #self.export_rows({"measured": testdata, "forecasted daysplit": forecast, "nodaysplit": forecast_nodaysplit})#,  "forecasted_split": forecast})
Beispiel #4
    def test_dshw_forecast(self):
        hourly_data = StatisticalForecast.make_hourly(self.dataset, 6)
        env = BaseEnvironment()
        fc = DSHWForecast(env, hourly_data, try_cache=False)

            len(fc.demands[0]) >= fc.input_hours,
            "the day series only contains " + str(len(fc.demands[0]) / 24) +
            " days, not " + str(fc.input_weeks * 7))
Beispiel #5
 def setUp(self):
     # dataset containing one year of data, sampled in 10 minute intervals
     # really important to reset, because other devices could have added
     # data which is unwanted
     DataLoader.cached_csv = {}
     path = DATA_PATH + sep + "demo_electricity_2013.csv"
     raw_dataset = DataLoader.load_from_file(
         path, "Strom - Verbrauchertotal (Aktuell)", "\t")
     # cast to float and convert to kW
     self.dataset = StatisticalForecast.make_hourly(
         [float(val) / 1000.0 for val in raw_dataset], 6)
    def strom_real(self):
        sep = os.path.sep
        t0 = time()
        path = os.path.join(
            BASE_DIR, "server" + sep + "forecasting" + sep + "devices" + sep +
            "data" + sep + "Electricity_2012Neuendorf.csv")
        raw_dataset1 = DataLoader.load_from_file(
            path, "Strom - Verbrauchertotal (Aktuell)", "\t")
        dates1 = DataLoader.load_from_file(path, "Datum", "\t")

        path2 = os.path.join(
            BASE_DIR, "server" + sep + "forecasting" + sep + "devices" + sep +
            "data" + sep + "Electricity_2013.csv")
        raw_dataset2 = DataLoader.load_from_file(
            path2, "Strom - Verbrauchertotal (Aktuell)", "\t")
        dates2 = DataLoader.load_from_file(path2, "Datum", "\t")

        t1 = time()

        dates1 = StatisticalForecast.make_hourly([int(d) for d in dates1], 6)
        dates2 = StatisticalForecast.make_hourly([int(d) for d in dates2], 6)
        demand1 = StatisticalForecast.make_hourly(
            [float(val) / 1000.0 for val in raw_dataset1], 6)
        demand2 = StatisticalForecast.make_hourly(
            [float(val) / 1000.0 for val in raw_dataset2], 6)
        t2 = time()

        rm = StatisticalForecast.MASE(demand1, demand1[:len(demand2)], demand2)

        #split_testdata1 = DayTypeForecast.split_weekdata(demand1,samples_per_hour=1,start_date=datetime.fromtimestamp(dates1[0]))
        #split_testdata2 = DayTypeForecast.split_weekdata(demand2,samples_per_hour=1,start_date=datetime.fromtimestamp(dates2[0]))
        #for index, dataset in enumerate(split_testdata1):
        #    print self.rmse(split_testdata2[index], dataset)#StatisticalForecast.MASE(dataset, dataset[:len(split_testdata2[index])],split_testdata2[index][:len(dataset)])
        t3 = time()
        print "t0 ", t1 - t0, "t1 ", t2 - t1, "t3 ", t3 - t2
        print rm
Beispiel #7
    def test_make_hourly(self):
        hourly_data = StatisticalForecast.make_hourly(self.dataset, 6)

        average = 0
        for i in range(6):
            average += self.dataset[i]
        average /= 6

            hourly_data[0], average,
            "calculated average not the same as function average")
                               24 * 365,
                               msg="data for " + str(len(hourly_data) / 24) +
                               " days")
    def handle_single_data(self):
        sep = os.path.sep
        path = os.path.join(
            BASE_DIR, "server" + sep + "forecasting" + sep + "devices" + sep +
            "data" + sep + "Electricity_1.1-12.6.2014.csv")
        raw_dataset = DataLoader.load_from_file(
            path, "Strom - Verbrauchertotal (Aktuell)", "\t")
        dates = [
            int(d) for d in DataLoader.load_from_file(path, "Datum", "\t")
        ]  #StatisticalForecast.make_hourly([int(d) for d in DataLoader.load_from_file(path, "Datum", "\t")],6)
        demand = StatisticalForecast.make_hourly(
            [float(val) / 1000.0 for val in raw_dataset], 6)

        start = calendar.timegm(
            datetime(year=2014, month=1, day=2).timetuple())
        start_index = approximate_index(dates, start)
        train_len = 24 * 7 * 8
        trainingdata = demand[start_index:start_index + train_len]
        test_start = start_index + train_len
        testdata = demand[test_start:test_start + 7 * 24 * 2]
        start_forecast = test_start * 3600
        end_forecast = start_forecast + len(testdata) * 3600

        electrical_forecast = DSHWForecast(BaseEnvironment(
            start_forecast, False, False),
        forecast = [
            for timestamp in range(start_forecast, end_forecast, 3600)

        #(forecast, alpha, beta, smoothing) = linear(trainingdata, 24*6,alpha=0.4,beta=0.1)
        #forecast_nodaysplit, (alpha, beta, gamma), insample = multiplicative(trainingdata,24*7,len(testdata) ,optimization_type="RMSE")
        #forecast_nodaysplit, (alpha, beta, gamma, delta, autocorr), insample = double_seasonal(trainingdata,24,24*7,len(testdata) ,optimization_type="RMSE")
        #print alpha, beta, gamma, delta
        #print alpha, beta, gamma, rmse_auto, sqrt(sum([(m - n) ** 2 for m, n in zip(forecast_values_auto, testdata)]) / len(testdata))
        #print "normal", sqrt(sum([(m - n) ** 2 for m, n in zip(forecast_values_auto, testdata)]) / len(testdata))
        #print "split", sqrt(sum([(m - n) ** 2 for m, n in zip(forecast, testdata)]) / len(testdata))
        #split_testdata = DayTypeForecast.split_weekdata(testdata,samples_per_hour=1,start_date=datetime.fromtimestamp(start_forecast))
        #plot_dataset({"measured": split_testdata[5], "forecasted": electrical_forecast.forecasted_demands[5]}, 0, True)
        plot_dataset({"measured": testdata, "forecasted": forecast})
Beispiel #9
    def test_split_week_data(self):
        hourly_data = StatisticalForecast.make_hourly(self.dataset, 6)
        env = BaseEnvironment()
        fc = DayTypeForecast(env, hourly_data, try_cache=False)
            len(fc.demands) == 7, "week_split does not contain 7 series")

            len(fc.demands[0]) / 24 >= fc.input_weeks,
            "the day series only contains " + str(len(fc.demands[0]) / 24) +
            " days, not " + str(fc.input_weeks) +
            " (or at least more than 50)")

        # from import plotting
        for i in range(7):
            # plotting.Plotting.plot_dataset({"measured":fc.demands[i], "forecasted": fc.forecasted_demands[i]}, len(fc.demands[i]), block=True)
            rmse = self.rmse(self.dataset_2014[:len(fc.forecasted_demands[i])],
                rmse < 30.0, "MSE of " + str(rmse) + "for day" + str(i) +
                " is way too high")
Beispiel #10
    def __init__(self, device_id, env):
        super(SimulatedElectricalConsumer, self).__init__(device_id, env)

        # ! TODO: this will have to replaced by a database"
        global electrical_forecast
        if electrical_forecast == None and not env.is_demo_simulation():
            # ! TODO: this will have to replaced by a database"
            raw_dataset = self.get_data_until(
            # cast to float and convert to kW
            dataset = [float(val) / 1000.0 for val in raw_dataset]
            hourly_data = StatisticalForecast.make_hourly(dataset, 6)
            electrical_forecast = DSHWForecast(
                self.env, hourly_data, samples_per_hour=1)
        self.electrical_forecast = electrical_forecast

        self.new_data_interval = 24 * 60 * 60  # append data each day
        self.last_forecast_update =

        # cache the forecast for better performance
        self.start_timestamp = self.env.initial_date
        global all_data
        if all_data == None:
            all_data = self.get_all_data2014()
Beispiel #11
    def __init__(self, device_id, env):
        super(SimulatedElectricalConsumer, self).__init__(device_id, env)

        # ! TODO: this will have to replaced by a database"
        global electrical_forecast
        if electrical_forecast == None and not env.is_demo_simulation():
            # ! TODO: this will have to replaced by a database"
            raw_dataset = self.get_data_until(
            # cast to float and convert to kW
            dataset = [float(val) / 1000.0 for val in raw_dataset]
            hourly_data = StatisticalForecast.make_hourly(dataset, 6)
            electrical_forecast = DSHWForecast(self.env,
        self.electrical_forecast = electrical_forecast

        self.new_data_interval = 24 * 60 * 60  # append data each day
        self.last_forecast_update =

        # cache the forecast for better performance
        self.start_timestamp = self.env.initial_date
        global all_data
        if all_data == None:
            all_data = self.get_all_data2014()
Beispiel #12
    def test_append_data(self):
        path = DATA_PATH + sep + "demo_electricity_2014.csv"
        raw_dataset_2014 = DataLoader.load_from_file(
            path, "Strom - Verbrauchertotal (Aktuell)", "\t")

        # cast to float and convert to kW
        dataset_2014 = StatisticalForecast.make_hourly(
            [float(val) / 1000.0 for val in raw_dataset_2014], 6)

        start = datetime(year=2014, month=1, day=1)
        split_demands14 = DayTypeForecast.split_weekdata(
            dataset_2014, 1, start)

        self.forecast.append_values(dataset_2014, start)

        four_weeks = 24 * 4
        # check that arrays on same weekdays are equal

Beispiel #13
def value_changer():
        from matplotlib.widgets import Slider, Button, RadioButtons
        from pylab import axes
        print "ljdlj"
    sep = os.path.sep
    path = os.path.join(BASE_DIR, "server" + sep + "forecasting" + sep + "simulation" + sep + "demodata" + sep + "demo_electricity_2014.csv")
    raw_data = DataLoader.load_from_file(path, "Strom - Verbrauchertotal (Aktuell)",delim="\t")
    ind = len(raw_data) / 2
    kW_data = StatisticalForecast.make_hourly([float(val) / 1000.0 for val in raw_data],6) #cast to float and convert to kW
    dates = [int(d) for d in DataLoader.load_from_file(path, "Datum", "\t")]
    input = make_hourly(kW_data,6)[-24*7:]
    start = calendar.timegm(datetime(year=2014,month=1,day=2).timetuple())
    start_index = approximate_index(dates, start)
    train_len= 24*7*8
    trainingdata = kW_data[start_index:start_index+train_len]
    test_start = start_index+train_len 
    testdata = kW_data[test_start:test_start+7*24*2]
    start_forecast = test_start*3600
    end_forecast = start_forecast + len(testdata) * 3600

    alpha = 0.0000001
    beta = 0.0
    gamma = 0.05
    delta = 0.01
    autocorr = 0.01
    m = 24
    m2 = 24 * 7
    #forecast length
    fc = int(len(testdata))
    forecast_values, params, insample = double_seasonal(trainingdata, m,m2,fc, alpha, beta, gamma,delta,autocorr)
    values ={ 'forecasting':forecast_values, 'measured':testdata}
    (fig, sim_plot,forecast_plot) = plot_dataset(values, 0,block=False)
    axcolor = 'lightgoldenrodyellow'
    axalpa = axes([0.25, 0.02, 0.65, 0.02], axisbg=axcolor)
    axautocorr  = axes([0.25, 0.05, 0.65, 0.02], axisbg=axcolor)
    axgamma  = axes([0.25, 0.08, 0.65, 0.02], axisbg=axcolor)
    axdelta  = axes([0.25, 0.11, 0.65, 0.02], axisbg=axcolor)
    alpha_slider = Slider(axalpa, 'Alpha', 0.0, 1.0, valinit=alpha)
    gamma_slider = Slider(axgamma, 'Gamma', 0.0, 1.0, valinit=gamma)
    delta_slider = Slider(axdelta, 'Delta', 0.0, 1.0, valinit=delta)
    autocorr_slider = Slider(axautocorr, 'autocorr_slider', 0.0, 1.0, valinit=autocorr)
    def update_hw(val):
        alpha = alpha_slider.val
        autocorr = autocorr_slider.val
        beta = 0.0
        gamma = gamma_slider.val
        delta = delta_slider.val
        forecast_values, params, insample = double_seasonal(trainingdata, m,m2,fc, alpha, beta, gamma,delta,autocorr)
        values ={ 'forecasting':forecast_values, 'measured':testdata}
        print alpha, autocorr, gamma, MSE(testdata, forecast_values)
Beispiel #14
 def setup_forecast(self):
     hourly_data = StatisticalForecast.make_hourly(self.dataset, 6)
     self.env = BaseEnvironment()
     self.forecast = DayTypeForecast(self.env, hourly_data, 1, None,
                                     (0.0000000, 0.0, 1.0))
    def error_arrays(self):
        sep = os.path.sep
        path = os.path.join(
            BASE_DIR, "server" + sep + "forecasting" + sep + "devices" + sep +
            "data" + sep + "Electricity_2013-6.2014Reger.csv")
        raw_dataset1 = DataLoader.load_from_file(path, "Energie DG Leistung",
        raw_dataset2 = DataLoader.load_from_file(path, "Energie EG Leistung",

        dates = DataLoader.load_from_file(path, "Datum", "\t")

        #         path = os.path.join(BASE_DIR, "server" + sep + "forecasting" + sep + "devices" + sep + "data" + sep + "Electricity_1.1-12.6.2014.csv")
        #         raw_dataset += DataLoader.load_from_file(
        #             path, "Strom - Verbrauchertotal (Aktuell)", "\t")
        #         dates += DataLoader.load_from_file(path, "Datum", "\t")
        transf = lambda v: min(float(v) / 1000.0, 500)
        demand = [
            transf(v1) + transf(v2)
            for v1, v2 in zip(raw_dataset1, raw_dataset2)

        dates = StatisticalForecast.make_hourly([int(d) for d in dates], 6)
        demand = StatisticalForecast.make_hourly(
            demand, 6)  #[float(val) / 1000.0 for val in raw_dataset], 6)

        start = calendar.timegm(
            datetime(year=2013, month=2, day=15).timetuple())
        end = calendar.timegm(datetime(year=2013, month=8, day=15).timetuple())
        fc_length = 7 * 24 * 2

        #day_errors = [[0,0] for i in range(7)] #rmse, mase
        #hour_errors = [[0,0] for i in range(24)]
        period_errors = [[0, 0] for i in range(14)]
        for timestamp in range(start, end, 24 * 3600):
            print "day:", datetime.fromtimestamp(timestamp)

            start_index = approximate_index(dates, timestamp)
            trainingdata = demand[:start_index]
            testdata = demand[start_index:start_index + fc_length]
                                  timestamp + fc_length * 3600,
                print "error, really now", sys.exc_info()[0]

        l = len(range(start, end, 24 * 3600))
        period_errors = [[r / l, m / l] for r, m in period_errors]

        #(forecast_values_auto, alpha, beta, gamma) = multiplicative(trainingdata, 7*24, 7*24*2, optimization_type="MASE")
        #print alpha, beta, gamma, rmse_auto, sqrt(sum([(m - n) ** 2 for m, n in zip(forecast_values_auto, testdata)]) / len(testdata))
        #print "normal", sqrt(sum([(m - n) ** 2 for m, n in zip(forecast_values_auto, testdata)]) / len(testdata))
        #print "split", sqrt(sum([(m - n) ** 2 for m, n in zip(forecast, testdata)]) / len(testdata))
        #split_testdata = DayTypeForecast.split_weekdata(testdata,samples_per_hour=1,start_date=datetime.fromtimestamp(start_forecast))
        #plot_dataset({"measured": split_testdata[5], "forecasted": electrical_forecast.forecasted_demands[5]}, 0, True)
        #self.export_rows({"measured": testdata, "forecasted": forecast_values_auto,  "forecasted_split": forecast})

        #         self.export_csv({"day_errors_rmse": zip(*day_errors)[0], "day_errors_mase": zip(*day_errors)[1],
        #                           "hour_errors_rmse": zip(*hour_errors)[0], "hour_errors_mase": zip(*hour_errors)[1],
        #                           "period_errors_rmse": zip(*period_errors)[0], "hour_errors_mase": zip(*period_errors)[1]})

            ("period_errors_rmse", zip(*period_errors)[0]),
            ("period_errors_mase", zip(*period_errors)[1])