def load_cons_model_ens_dfs(df): fit_ts = ens.gen_hourly_timesteps(dt.datetime(2015,12,17,1), dt.datetime(2016,1,15,0)) vali_ts = ens.gen_hourly_timesteps(dt.datetime(2016,1,20,1), dt.datetime(2016,2,5,0)) test_ts = ens.gen_hourly_timesteps(dt.datetime(2016,2,5,1), dt.datetime(2016,3,1,0)) weathervars=['Tout', 'vWind', 'sunRad', 'hum'] fit_data = [pd.DataFrame() for i in range(25)] vali_data = [pd.DataFrame() for i in range(25)] test_data = [pd.DataFrame() for i in range(25)] for i in range(25): fit_data[i]['cons'] = np.array(df.ix[fit_ts[0]:fit_ts[-1]]['cons']) vali_data[i]['cons'] = np.array(df.ix[vali_ts[0]:vali_ts[-1]]['cons']) # the casting is a hack to avoid the index f*****g up test_data[i]['cons'] = np.array(df.ix[test_ts[0]:test_ts[-1]]['cons']) fit_data[i]['cons24hbefore'] = np.array(df.ix[fit_ts[0]+dt.timedelta(days=-1):fit_ts[-1]+dt.timedelta(days=-1)]['cons']) vali_data[i]['cons24hbefore'] = np.array(df.ix[vali_ts[0]+dt.timedelta(days=-1):vali_ts[-1]+dt.timedelta(days=-1)]['cons']) # the casting is a hack to avoid the index f*****g up test_data[i]['cons24hbefore'] = np.array(df.ix[test_ts[0]+dt.timedelta(days=-1):test_ts[-1]+dt.timedelta(days=-1)]['cons']) for v in weathervars: all_ens_fit = ens.load_ens_timeseries_as_df(\ ts_start=fit_ts[0],\ ts_end=fit_ts[-1], \ weathervars=[v]) \ - ens.load_ens_timeseries_as_df(\ ts_start=fit_ts[0]+dt.timedelta(days=-1),\ ts_end=fit_ts[-1]+dt.timedelta(days=-1), \ weathervars=[v]) all_ens_vali = ens.load_ens_timeseries_as_df(\ ts_start=vali_ts[0],\ ts_end=vali_ts[-1], \ weathervars=[v]) \ - ens.load_ens_timeseries_as_df(\ ts_start=vali_ts[0]+dt.timedelta(days=-1),\ ts_end=vali_ts[-1]+dt.timedelta(days=-1), \ weathervars=[v]) all_ens_test = ens.load_ens_timeseries_as_df(\ ts_start=test_ts[0],\ ts_end=test_ts[-1], \ weathervars=[v]) \ - ens.load_ens_timeseries_as_df(\ ts_start=test_ts[0]+dt.timedelta(days=-1),\ ts_end=test_ts[-1]+dt.timedelta(days=-1), \ weathervars=[v]) for i in range(25): fit_data[i]['%s24hdiff'%v] = all_ens_fit[v + str(i)] vali_data[i]['%s24hdiff'%v] = all_ens_vali[v + str(i)] test_data[i]['%s24hdiff'%v] = all_ens_test[v + str(i)] all_data = [] for i in range(25): for d, t in zip([fit_data[i], vali_data[i], test_data[i]], [fit_ts, vali_ts, test_ts]): d.set_index(pd.DatetimeIndex(t), inplace=True) all_data.append(pd.concat([fit_data[i], vali_data[i], test_data[i]])) return all_data
def load_cons_model_dfs(df): # Takes the data frame with the already calculated consumptions #%% fit_ts = ens.gen_hourly_timesteps(dt.datetime(2015,12,17,1), dt.datetime(2016,1,15,0)) vali_ts = ens.gen_hourly_timesteps(dt.datetime(2016,1,20,1), dt.datetime(2016,2,5,0)) test_ts = ens.gen_hourly_timesteps(dt.datetime(2016,2,5,1), dt.datetime(2016,3,1,0)) weathervars=['Tout', 'vWind', 'sunRad', 'hum'] fit_data = pd.DataFrame() vali_data = pd.DataFrame() test_data = pd.DataFrame() fit_data['cons'] = np.array(df.ix[fit_ts[0]:fit_ts[-1]]['cons']) vali_data['cons'] = np.array(df.ix[vali_ts[0]:vali_ts[-1]]['cons']) # the casting is a hack to avoid the index f*****g up test_data['cons'] = np.array(df.ix[test_ts[0]:test_ts[-1]]['cons']) fit_data['cons24hbefore'] = np.array(df.ix[fit_ts[0]+dt.timedelta(days=-1):fit_ts[-1]+dt.timedelta(days=-1)]['cons']) vali_data['cons24hbefore'] = np.array(df.ix[vali_ts[0]+dt.timedelta(days=-1):vali_ts[-1]+dt.timedelta(days=-1)]['cons']) # the casting is a hack to avoid the index f*****g up test_data['cons24hbefore'] = np.array(df.ix[test_ts[0]+dt.timedelta(days=-1):test_ts[-1]+dt.timedelta(days=-1)]['cons']) for v in weathervars: fit_data['%s24hdiff'%v] = ens.load_ens_timeseries_as_df(\ ts_start=fit_ts[0],\ ts_end=fit_ts[-1], \ weathervars=[v]).mean(axis=1) \ - ens.load_ens_timeseries_as_df(\ ts_start=fit_ts[0]+dt.timedelta(days=-1),\ ts_end=fit_ts[-1]+dt.timedelta(days=-1), \ weathervars=[v]).mean(axis=1) vali_data['%s24hdiff'%v] = ens.load_ens_timeseries_as_df(\ ts_start=vali_ts[0],\ ts_end=vali_ts[-1], \ weathervars=[v]).mean(axis=1) \ - ens.load_ens_timeseries_as_df(\ ts_start=vali_ts[0]+dt.timedelta(days=-1),\ ts_end=vali_ts[-1]+dt.timedelta(days=-1), \ weathervars=[v]).mean(axis=1) test_data['%s24hdiff'%v] = ens.load_ens_timeseries_as_df(\ ts_start=test_ts[0],\ ts_end=test_ts[-1], \ weathervars=[v]).mean(axis=1) \ - ens.load_ens_timeseries_as_df(\ ts_start=test_ts[0]+dt.timedelta(days=-1),\ ts_end=test_ts[-1]+dt.timedelta(days=-1), \ weathervars=[v]).mean(axis=1) for d, t in zip([fit_data, vali_data, test_data], [fit_ts, vali_ts, test_ts]): d.set_index(pd.DatetimeIndex(t), inplace=True) all_data = pd.concat([fit_data, vali_data, test_data]) return fit_data, vali_data, test_data, all_data
def plot_best_model(): plt.close('all') columns = ['Tout', 'Toutavg24', 'vWind', 'vWindavg24']#, 'hours', 'hours2','hours3', 'hours4','hours5', 'hours6']#, 'hours7', 'hours8']#,'hours5', 'hours6'] X = all_data[columns] res = mlin_regression(y, X) timesteps = ens.gen_hourly_timesteps(dt.datetime(2015,12,17,1), dt.datetime(2016,1,15,0)) plt.subplot(2,1,1) plt.plot_date(timesteps, y, 'b', label='Actual prodution') plt.plot_date(timesteps, res.fittedvalues, 'r', label='Weather model') prstd, iv_l, iv_u = wls_prediction_std(res) plt.plot_date(timesteps, iv_u, 'r--', label='95% conf. int.') plt.plot_date(timesteps, iv_l, 'r--') mean_day_resid = [res.resid[i::24].mean() for i in range(24)] mean_resid_series = np.tile(mean_day_resid, 29) plt.plot_date(timesteps, res.fittedvalues + mean_resid_series, 'g', label='Weather model + avg daily profile') plt.ylabel('MW') plt.legend(loc=2) plt.subplot(2,1,2) plt.plot_date(timesteps, res.resid, '-', label='Residual') plt.plot_date(timesteps, mean_resid_series) plt.ylabel('MW') plt.legend() mape = np.mean(np.abs((res.fittedvalues + mean_resid_series-y)/y)) mape2 = np.mean(np.abs((res.resid)/y)) mae = np.mean(np.abs((res.fittedvalues + mean_resid_series-y))) print mape, mape2, mae res.summary() return res
def fetch_BrabrandSydWeather(weathervar, from_time, to_time): """ This function takes a weather variable as a string (from BBSyd_pi_dict) as well as first and last step timestep (as datetime objects). It returns the hourly time series from the Brabrand Syd Weather station. Note that this data has not been validated! """ conn = connect() PInr = BBSyd_pi_dict[weathervar] sql_query = """USE [DM_VLP] SELECT [TimeStamp], [Value], [Beskrivelse] FROM [dbo].[Meteorologi] WHERE PInr=%s AND TimeStamp BETWEEN '%s' AND '%s' ORDER BY TimeStamp"""% (PInr, str(from_time), str(to_time)) data = extractdata(conn, sql_query) timestamps, values, description = zip(*data) assert(list(timestamps)==ens.gen_hourly_timesteps(from_time, to_time)), "Timesteps are not hour by hour" return np.array(values, dtype=float)
def weather_forecast_ensemble(): # figure 2 plt.close('all') ts = ens.gen_hourly_timesteps(dt.datetime(2016, 1, 20, 1), dt.datetime(2016, 2, 5, 0)) ens_data = ens.load_ens_timeseries_as_df(ts_start=ts[0], ts_end=ts[-1],\ weathervars=['Tout', 'vWind', 'sunRad']) fig, axes = plt.subplots(3, 1, sharex=True, figsize=(colwidth, 1.65 * colwidth)) plt.xticks(size=5) ylabels = [ u'Outside temperature [%sC]' % uni_degree, 'Wind speed [m/s]', u'Solar irradiance [W/m%s]' % uni_squared ] for ax, v, cshift, ylab in zip(axes, ['Tout', 'vWind', 'sunRad'], (15, 23, 6), ylabels): color_list = plt.cm.Dark2(np.roll(np.linspace(0, 1, 25), cshift)) ax.set_prop_cycle(cycler('color', color_list)) v_ens_data = ens_data[[v + str(i) for i in range(25)]] ax.plot_date(ts, v_ens_data, '-', lw=0.5) ax.set_ylabel(ylab, size=8) ax.tick_params(axis='y', which='major', labelsize=8) plt.box(True) plt.tight_layout() axes[-1].xaxis.set_major_formatter(DateFormatter('%b %d')) axes[-1].set_xlim(dt.datetime(2016, 1, 20, 0), dt.datetime(2016, 2, 5, 0)) fig.savefig('figures/first_articlefigs/weather_forecast_ensemble.pdf') return ens_data, axes
def try_prod24h_before( columns=['Tout', 'vWind', 'vWindavg24', 'prod24h_before'], add_const=False, y=y): plt.close('all') X = all_data[columns] res = mlin_regression(y, X, add_const=add_const) timesteps = ens.gen_hourly_timesteps(dt.datetime(2015, 12, 17, 1), dt.datetime(2016, 1, 15, 0)) plt.subplot(2, 1, 1) plt.plot_date(timesteps, y, 'b', label='Actual prodution') plt.plot_date(timesteps, res.fittedvalues, 'r', label='Weather model') prstd, iv_l, iv_u = wls_prediction_std(res) plt.plot_date(timesteps, iv_u, 'r--', label='95% conf. int.') plt.plot_date(timesteps, iv_l, 'r--') plt.ylabel('MW') plt.legend(loc=2) plt.subplot(2, 1, 2) plt.plot_date(timesteps, res.resid, '-', label='Residual') plt.ylabel('MW') plt.legend() print "MAE = " + str(mae(res.resid)) print "MAPE = " + str(mape(res.resid, y)) print "RMSE = " + str(rmse(res.resid)) print res.summary() return res
def validate_ToutToutavg24vWindvWindavg24_model(): plt.close('all') ts_start = dt.datetime(2016, 1, 19, 1) ts_end = dt.datetime(2016, 1, 26, 0) daily_profile = np.load('daily_profile.npy') params = pd.read_pickle('lin_reg_fit_params.pkl') validation_data = ens.repack_ens_mean_as_df(ts_start, ts_end) weather_model = linear_map(validation_data, params, ['Tout', 'Toutavg24', 'vWind', 'vWindavg24']) timesteps = ens.gen_hourly_timesteps(ts_start, ts_end) plt.plot_date(timesteps, validation_data['prod'], 'b-') plt.plot_date(timesteps, weather_model, 'r-') weather_model_wdailyprofile = [] for ts, wm in zip(timesteps, weather_model): print ts.hour weather_model_wdailyprofile.append( wm + daily_profile[np.mod(ts.hour - 1, 24)]) plt.plot_date(timesteps, weather_model_wdailyprofile, 'g-') return validation_data
def try_prod24h_before(columns=['Tout', 'vWind', 'vWindavg24', 'prod24h_before'], add_const=False, y=y): plt.close('all') X = all_data[columns] res = mlin_regression(y, X, add_const=add_const) timesteps = ens.gen_hourly_timesteps(dt.datetime(2015,12,17,1), dt.datetime(2016,1,15,0)) plt.subplot(2,1,1) plt.plot_date(timesteps, y, 'b', label='Actual prodution') plt.plot_date(timesteps, res.fittedvalues, 'r', label='Weather model') prstd, iv_l, iv_u = wls_prediction_std(res) plt.plot_date(timesteps, iv_u, 'r--', label='95% conf. int.') plt.plot_date(timesteps, iv_l, 'r--') plt.ylabel('MW') plt.legend(loc=2) plt.subplot(2,1,2) plt.plot_date(timesteps, res.resid, '-', label='Residual') plt.ylabel('MW') plt.legend() print "MAE = " + str(mae(res.resid)) print "MAPE = " + str(mape(res.resid, y)) print "RMSE = " + str(rmse(res.resid)) print res.summary() return res
def most_recent_ens_timeseries(start_stop=(dt.datetime(2015,12,16,0), dt.datetime(2016,1,19,0)), pointcode=71699, shift_steno_one=False): """ star_stop can be a tupple with 2 date tim objects. The first is the first time step in the time series, the second is the last. """ plt.close('all') ylabels = ['[$\degree $C]', '[m/s]', '[%]', '[W/m$^2$]'] suffix = ''.join(['_geo', str(pointcode), '_', ens.timestamp_str(start_stop[0]), \ '_to_', ens.timestamp_str(start_stop[1]), '.npy']) timesteps = ens.gen_hourly_timesteps(start_stop[0], start_stop[1]) Steno_data = np.load('Q:/Weatherdata/Steno_weatherstation/Steno_hourly_2015120111_to_2016011800.npz') Steno_Tvhs = Steno_data['Tout_vWind_hum_sunRad'] Steno_timesteps = Steno_data['timesteps'] for v, ylab in zip(weathervars, ylabels): plt.figure(figsize=(15,20)) plt.grid(True) plt.subplot(2,1,1) ens_data = np.load('time_series/' + v + suffix) BBSYD_measured = sq.fetch_BrabrandSydWeather(v, start_stop[0], start_stop[1]) Steno_measured = Steno_Tvhs[:,weathervars.index(v)] if shift_steno_one: Steno_measured = np.roll(Steno_measured, -1) if v =='Tout': ens_data = ens.Kelvin_to_Celcius(ens_data) elif v=='hum': ens_data = ens.frac_to_percent(ens_data) # convert to percentage plt.plot_date(timesteps, ens_data, '-') plt.plot_date(timesteps, BBSYD_measured, 'k-', lw=2, label='Measured: Brabrand Syd') plt.plot_date(Steno_timesteps, Steno_measured, 'r-', lw=2, label='Measured: Steno Museum') plt.ylabel(ylab) plt.grid(True) plt.xlim(start_stop) plt.title(v) plt.legend() plt.subplot(2,1,2) plt.plot_date(timesteps, ens.ensemble_std(ens_data), '-', label='Ensemble std') plt.plot_date(timesteps, ens.ensemble_abs_spread(ens_data), '-', label='Max ensemble spread') plt.ylabel(ylab) plt.legend() plt.grid(True) plt.tight_layout() figfilename = v + '_most_recent_ens_timeseries.pdf' plt.savefig('figures/' + figfilename)
def check_ens_mean_data(): plt.close('all') start_stop=(dt.datetime(2015,12,17,1), dt.datetime(2016,1,15,0)) timesteps = ens.gen_hourly_timesteps(start_stop[0], start_stop[1]) for v in weathervars: hourly_data = np.load('time_series/ens_means/' + v +'_geo71699_2015121701_to_2016011500.npy') daily_avg_data = np.load('time_series/ens_means/' + v +'avg24_geo71699_2015121701_to_2016011500.npy') plt.figure() plt.title(v) plt.plot_date(timesteps, hourly_data, '-', label='Hourly') plt.plot_date(timesteps, daily_avg_data, '-', label='Average over last 24h') plt.legend()
def fetch_hourly_vals_from_PIno(PIno, from_time, to_time): conn = connect() sql_query = """USE [EDW_Stage] SELECT [Pinr] ,[TimeStamp] ,[dValue] FROM [sro].[vHourSerier_Udtræk] WHERE [Pinr]='%s' AND TimeStamp BETWEEN '%s' AND '%s'"""%(PIno, str(from_time), str(to_time)) data = extractdata(conn, sql_query) PI, timestamps, vals= zip(*data) assert(list(timestamps)==ens.gen_hourly_timesteps(from_time, to_time)), "Timesteps are not hour by hour" return np.array(vals, dtype=float)
def fetch_consumption(Forbrugssted_Key, from_time, to_time): conn = connect() sql_query = """ USE [DM_VT] SELECT [Tid_Key] ,[ForbrugMWh] FROM [DM_VT].[dbo].[vForbrug_Doegn] WHERE Forbrugssted_Key = %i AND Tid_Key BETWEEN '%s' AND '%s' ORDER BY Tid_Key""" % (Forbrugssted_Key, ens.timestamp_str(from_time), ens.timestamp_str(to_time)) data = extractdata(conn, sql_query) timestamps, consumption = zip(*data) assert(list(timestamps)==[int(ens.timestamp_str(ts)) for ts in ens.gen_hourly_timesteps(from_time, to_time)]), "Timesteps are not hour by hour" cons_array = np.array(consumption, dtype=float) return cons_array
def check_ens_mean_data(): plt.close('all') start_stop = (dt.datetime(2015, 12, 17, 1), dt.datetime(2016, 1, 15, 0)) timesteps = ens.gen_hourly_timesteps(start_stop[0], start_stop[1]) for v in weathervars: hourly_data = np.load('time_series/ens_means/' + v + '_geo71699_2015121701_to_2016011500.npy') daily_avg_data = np.load('time_series/ens_means/' + v + 'avg24_geo71699_2015121701_to_2016011500.npy') plt.figure() plt.title(v) plt.plot_date(timesteps, hourly_data, '-', label='Hourly') plt.plot_date(timesteps, daily_avg_data, '-', label='Average over last 24h') plt.legend()
def fetch_price(from_time, to_time, price_name='Timenspris'): """ Price_name should be either "Timenspris", "VariabelTimenspris" or "TimensprisMovingAVG". """ conn = connect() sql_query = """ USE [DM_VT] SELECT [Tid_Key] ,[%s] FROM [dbo].[vFact_Timepris_Doegn] WHERE Tid_Key BETWEEN '%s' AND '%s' ORDER BY Tid_Key""" % (price_name, ens.timestamp_str(from_time), ens.timestamp_str(to_time)) data = extractdata(conn, sql_query) timestamps, price = zip(*data) assert(list(timestamps)==[int(ens.timestamp_str(ts)) for ts in ens.gen_hourly_timesteps(from_time, to_time)]), "Timesteps are not hour by hour" return np.array(price, dtype=float)
def validate_prod24h_before_and_diffsmodel(): plt.close('all') cols = ['prod24h_before', 'Tout24hdiff', 'vWind24hdiff', 'sunRad24hdiff'] ts_start = dt.datetime(2016, 1, 20, 1) ts_end = dt.datetime(2016, 1, 31, 0) validation_data = ens.repack_ens_mean_as_df(ts_start, ts_end) # correct error in production: new_val = (validation_data['prod'][116] + validation_data['prod'][116]) / 2 validation_data['prod'][116] = new_val validation_data['prod'][117] = new_val validation_data['prod24h_before'] = sq.fetch_production( ts_start + dt.timedelta(days=-1), ts_end + dt.timedelta(days=-1)) validation_data['prod24h_before'][116 + 24] = new_val validation_data['prod24h_before'][117 + 24] = new_val Tout24h_before = ens.load_ens_timeseries_as_df(ts_start+dt.timedelta(days=-1),\ ts_end+dt.timedelta(days=-1), weathervars=['Tout']).mean(axis=1) vWind24h_before = ens.load_ens_timeseries_as_df(ts_start+dt.timedelta(days=-1),\ ts_end+dt.timedelta(days=-1), weathervars=['vWind']).mean(axis=1) sunRad24h_before = ens.load_ens_timeseries_as_df(ts_start+dt.timedelta(days=-1),\ ts_end+dt.timedelta(days=-1), weathervars=['sunRad']).mean(axis=1) validation_data['Tout24hdiff'] = validation_data['Tout'] - Tout24h_before validation_data[ 'vWind24hdiff'] = validation_data['vWind'] - vWind24h_before validation_data[ 'sunRad24hdiff'] = validation_data['sunRad'] - sunRad24h_before # fit on fit area X = all_data[cols] res = mlin_regression(all_data['prod'], X, add_const=False) #apply to validation area weather_model = linear_map(validation_data, res.params, cols) timesteps = ens.gen_hourly_timesteps(ts_start, ts_end) plt.plot_date(timesteps, validation_data['prod'], 'b-') plt.plot_date(timesteps, weather_model, 'r-') residual = weather_model - validation_data['prod'] return validation_data, res, residual
def fetch_production(from_time, to_time): conn = connect() sql_query = """ USE [DM_VT] SELECT [Tid_Key] ,[SamletProduktionMWh] FROM [dbo].[vFact_Timepris_Doegn] WHERE Tid_Key BETWEEN '%s' AND '%s' ORDER BY Tid_Key""" % (ens.timestamp_str(from_time), ens.timestamp_str(to_time)) data = extractdata(conn, sql_query) timestamps, production = zip(*data) assert(list(timestamps)==[int(ens.timestamp_str(ts)) for ts in ens.gen_hourly_timesteps(from_time, to_time)]), "Timesteps are not hour by hour" prod_array = np.array(production, dtype=float) for ts in (2016032702, 2016032703): if ts in timestamps: print "Correcting error in production by transition to daylight savings on timestamp %s"%ts index = timestamps.index(ts) prod_array[index] = 2*prod_array[index] return prod_array
def validate_prod24h_before_and_diffsmodel(): plt.close('all') cols = ['prod24h_before', 'Tout24hdiff', 'vWind24hdiff', 'sunRad24hdiff'] ts_start = dt.datetime(2016,1,20,1) ts_end = dt.datetime(2016,1,31,0) validation_data = ens.repack_ens_mean_as_df(ts_start, ts_end) # correct error in production: new_val = (validation_data['prod'][116] +validation_data['prod'][116])/2 validation_data['prod'][116] = new_val validation_data['prod'][117] = new_val validation_data['prod24h_before'] = sq.fetch_production(ts_start+dt.timedelta(days=-1), ts_end+dt.timedelta(days=-1)) validation_data['prod24h_before'][116+24] = new_val validation_data['prod24h_before'][117+24] = new_val Tout24h_before = ens.load_ens_timeseries_as_df(ts_start+dt.timedelta(days=-1),\ ts_end+dt.timedelta(days=-1), weathervars=['Tout']).mean(axis=1) vWind24h_before = ens.load_ens_timeseries_as_df(ts_start+dt.timedelta(days=-1),\ ts_end+dt.timedelta(days=-1), weathervars=['vWind']).mean(axis=1) sunRad24h_before = ens.load_ens_timeseries_as_df(ts_start+dt.timedelta(days=-1),\ ts_end+dt.timedelta(days=-1), weathervars=['sunRad']).mean(axis=1) validation_data['Tout24hdiff'] = validation_data['Tout'] - Tout24h_before validation_data['vWind24hdiff'] = validation_data['vWind'] - vWind24h_before validation_data['sunRad24hdiff'] = validation_data['sunRad'] - sunRad24h_before # fit on fit area X = all_data[cols] res = mlin_regression(all_data['prod'], X, add_const=False) #apply to validation area weather_model = linear_map(validation_data, res.params, cols) timesteps = ens.gen_hourly_timesteps(ts_start, ts_end) plt.plot_date(timesteps, validation_data['prod'],'b-') plt.plot_date(timesteps, weather_model,'r-') residual = weather_model - validation_data['prod'] return validation_data, res, residual
def plot_best_model(): plt.close('all') columns = [ 'Tout', 'Toutavg24', 'vWind', 'vWindavg24' ] #, 'hours', 'hours2','hours3', 'hours4','hours5', 'hours6']#, 'hours7', 'hours8']#,'hours5', 'hours6'] X = all_data[columns] res = mlin_regression(y, X) timesteps = ens.gen_hourly_timesteps(dt.datetime(2015, 12, 17, 1), dt.datetime(2016, 1, 15, 0)) plt.subplot(2, 1, 1) plt.plot_date(timesteps, y, 'b', label='Actual prodution') plt.plot_date(timesteps, res.fittedvalues, 'r', label='Weather model') prstd, iv_l, iv_u = wls_prediction_std(res) plt.plot_date(timesteps, iv_u, 'r--', label='95% conf. int.') plt.plot_date(timesteps, iv_l, 'r--') mean_day_resid = [res.resid[i::24].mean() for i in range(24)] mean_resid_series = np.tile(mean_day_resid, 29) plt.plot_date(timesteps, res.fittedvalues + mean_resid_series, 'g', label='Weather model + avg daily profile') plt.ylabel('MW') plt.legend(loc=2) plt.subplot(2, 1, 2) plt.plot_date(timesteps, res.resid, '-', label='Residual') plt.plot_date(timesteps, mean_resid_series) plt.ylabel('MW') plt.legend() mape = np.mean(np.abs((res.fittedvalues + mean_resid_series - y) / y)) mape2 = np.mean(np.abs((res.resid) / y)) mae = np.mean(np.abs((res.fittedvalues + mean_resid_series - y))) print mape, mape2, mae res.summary() return res
def check_for_timeshift(): """ This function chec if there is a time shift between data from the Brabrand Syd weather station and the Steno Museum one. It appears that Steno data is one hour fast.. """ plt.close('all') start_stop = (dt.datetime(2015, 12, 16, 0), dt.datetime(2016, 1, 16, 0)) timesteps = ens.gen_hourly_timesteps(start_stop[0], start_stop[1]) Steno_data = np.load( 'Q:/Weatherdata/Steno_weatherstation/Steno_hourly_2015120111_to_2016011800.npz' ) Steno_Tvhs = Steno_data['Tout_vWind_hum_sunRad'] Steno_timesteps = Steno_data['timesteps'] start_index = np.where(Steno_timesteps == start_stop[0])[0] end_index = np.where(Steno_timesteps == start_stop[1])[0] + 1 Steno_Tvhs_short = Steno_Tvhs[start_index:end_index, :] Steno_timesteps_new = Steno_timesteps[start_index:end_index] assert (all(Steno_timesteps_new == timesteps)) for v in weathervars: plt.figure() for offset in range(-2, 3, 1): plt.subplot(5, 1, offset + 3) BBSYD_measured = sq.fetch_BrabrandSydWeather( v, start_stop[0], start_stop[1]) Steno_measured = Steno_Tvhs_short[:, weathervars.index(v)] Steno_with_offset = np.roll(Steno_measured, offset) MAPE = np.mean(np.abs((Steno_with_offset - BBSYD_measured))) plt.title('offset %i, MAE = %2.4f ' % (offset, MAPE)) plt.plot_date(timesteps, BBSYD_measured, 'k') plt.plot_date(timesteps, Steno_with_offset, 'r') plt.tight_layout() plt.suptitle(v)
def weather_forecast_ensemble(): # figure 2 plt.close('all') ts = ens.gen_hourly_timesteps(dt.datetime(2016,1,20,1), dt.datetime(2016,2,5,0)) ens_data = ens.load_ens_timeseries_as_df(ts_start=ts[0], ts_end=ts[-1],\ weathervars=['Tout', 'vWind', 'sunRad']) fig, axes = plt.subplots(3,1, sharex=True, figsize=(colwidth, 1.65*colwidth)) plt.xticks(size=5) ylabels = [u'Outside temperature [%sC]'%uni_degree, 'Wind speed [m/s]', u'Solar irradiance [W/m%s]'%uni_squared] for ax, v, cshift, ylab in zip(axes, ['Tout', 'vWind', 'sunRad'], (15,23,6), ylabels): color_list = plt.cm.Dark2(np.roll(np.linspace(0, 1, 25), cshift)) ax.set_prop_cycle(cycler('color',color_list)) v_ens_data = ens_data[[v + str(i) for i in range(25)]] ax.plot_date(ts, v_ens_data, '-', lw=0.5) ax.set_ylabel(ylab, size=8) ax.tick_params(axis='y', which='major', labelsize=8) plt.box(True) plt.tight_layout() axes[-1].xaxis.set_major_formatter(DateFormatter('%b %d') ) axes[-1].set_xlim(dt.datetime(2016,1,20,0), dt.datetime(2016,2,5,0)) fig.savefig('figures/first_articlefigs/weather_forecast_ensemble.pdf') return ens_data, axes
def check_for_timeshift(): """ This function chec if there is a time shift between data from the Brabrand Syd weather station and the Steno Museum one. It appears that Steno data is one hour fast.. """ plt.close('all') start_stop=(dt.datetime(2015,12,16,0), dt.datetime(2016,1,16,0)) timesteps = ens.gen_hourly_timesteps(start_stop[0], start_stop[1]) Steno_data = np.load('Q:/Weatherdata/Steno_weatherstation/Steno_hourly_2015120111_to_2016011800.npz') Steno_Tvhs = Steno_data['Tout_vWind_hum_sunRad'] Steno_timesteps = Steno_data['timesteps'] start_index = np.where(Steno_timesteps==start_stop[0])[0] end_index = np.where(Steno_timesteps==start_stop[1])[0] + 1 Steno_Tvhs_short = Steno_Tvhs[start_index:end_index, :] Steno_timesteps_new = Steno_timesteps[start_index:end_index] assert(all(Steno_timesteps_new==timesteps)) for v in weathervars: plt.figure() for offset in range(-2,3,1): plt.subplot(5,1,offset+3) BBSYD_measured = sq.fetch_BrabrandSydWeather(v, start_stop[0], start_stop[1]) Steno_measured = Steno_Tvhs_short[:, weathervars.index(v)] Steno_with_offset = np.roll(Steno_measured, offset) MAPE = np.mean(np.abs((Steno_with_offset-BBSYD_measured))) plt.title('offset %i, MAE = %2.4f '%(offset,MAPE)) plt.plot_date(timesteps, BBSYD_measured, 'k') plt.plot_date(timesteps, Steno_with_offset, 'r') plt.tight_layout() plt.suptitle(v)
def validate_ToutToutavg24vWindvWindavg24_model(): plt.close('all') ts_start = dt.datetime(2016,1,19,1) ts_end = dt.datetime(2016,1,26,0) daily_profile = np.load('daily_profile.npy') params = pd.read_pickle('lin_reg_fit_params.pkl') validation_data = ens.repack_ens_mean_as_df(ts_start, ts_end) weather_model = linear_map(validation_data, params, ['Tout', 'Toutavg24', 'vWind', 'vWindavg24']) timesteps = ens.gen_hourly_timesteps(ts_start, ts_end) plt.plot_date(timesteps, validation_data['prod'],'b-') plt.plot_date(timesteps, weather_model,'r-') weather_model_wdailyprofile = [] for ts, wm in zip(timesteps, weather_model): print ts.hour weather_model_wdailyprofile.append(wm + daily_profile[np.mod(ts.hour-1,24)]) plt.plot_date(timesteps, weather_model_wdailyprofile, 'g-') return validation_data
h_hoursbefore(ts_start, timeshift),\ h_hoursbefore(ts_end, timeshift),\ pointcode=71699) df['%s%ihdiff'%(v,timeshift)] = ens_mean - ens_mean_before return df reload_data = True if reload_data: timelags = [48, 60, 168] all_data = gen_fit_df(dt.datetime(2016,1,26,1), dt.datetime(2016,4,1,0), ['Tout', 'vWind', 'hum', 'sunRad'], timelags) y = all_data['prod'] #%% ts = ens.gen_hourly_timesteps(dt.datetime(2016,1,26,1), dt.datetime(2016,4,1,0)) X = all_data.ix[:, all_data.columns !='prod'] X.to_pickle('48h60h168h_lagged_X.pkl') y.to_pickle('prod_to_gowith.pkl') #%% lr = linear_model.LinearRegression(fit_intercept=False) predicted = cross_val_predict(lr, X, y, cv=25) plt.figure() plt.plot(y) plt.plot(predicted, 'r') sns.jointplot(pd.Series(predicted), y)
def production_model(): # figure 3 plt.close('all') cols = ['prod24h_before', 'Tout24hdiff', 'vWind24hdiff', 'sunRad24hdiff'] ts1 = ens.gen_hourly_timesteps(dt.datetime(2015,12,17,1), dt.datetime(2016,1,15,0)) ts2 = ens.gen_hourly_timesteps(dt.datetime(2016,1,20,1), dt.datetime(2016,2,5,0)) #load the data fit_data = ens.repack_ens_mean_as_df() fit_data['prod24h_before'] = sq.fetch_production(ts1[0]+dt.timedelta(days=-1), ts1[-1]+dt.timedelta(days=-1)) fit_data['Tout24hdiff'] = fit_data['Tout'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts1[0]+dt.timedelta(days=-1),\ ts_end=ts1[-1]+dt.timedelta(days=-1), \ weathervars=['Tout']).mean(axis=1) fit_data['vWind24hdiff'] = fit_data['vWind'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts1[0]+dt.timedelta(days=-1),\ ts_end=ts1[-1]+dt.timedelta(days=-1), \ weathervars=['vWind']).mean(axis=1) fit_data['sunRad24hdiff'] = fit_data['sunRad'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts1[0]+dt.timedelta(days=-1),\ ts_end=ts1[-1]+dt.timedelta(days=-1), \ weathervars=['sunRad']).mean(axis=1) vali_data = ens.repack_ens_mean_as_df(ts2[0], ts2[-1]) vali_data['prod24h_before'] = sq.fetch_production(ts2[0]+dt.timedelta(days=-1), ts2[-1]+dt.timedelta(days=-1)) vali_data['Tout24hdiff'] = vali_data['Tout'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts2[0]+dt.timedelta(days=-1),\ ts_end=ts2[-1]+dt.timedelta(days=-1), \ weathervars=['Tout']).mean(axis=1) vali_data['vWind24hdiff'] = vali_data['vWind'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts2[0]+dt.timedelta(days=-1),\ ts_end=ts2[-1]+dt.timedelta(days=-1), \ weathervars=['vWind']).mean(axis=1) vali_data['sunRad24hdiff'] = vali_data['sunRad'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts2[0]+dt.timedelta(days=-1),\ ts_end=ts2[-1]+dt.timedelta(days=-1), \ weathervars=['sunRad']).mean(axis=1) # correct error in production: new_val = (vali_data['prod'][116] +vali_data['prod'][116])/2 vali_data['prod'][116] = new_val vali_data['prod'][117] = new_val vali_data['prod24h_before'][116+24] = new_val vali_data['prod24h_before'][117+24] = new_val # do the fit X = fit_data[cols] y = fit_data['prod'] res = mlin_regression(y, X, add_const=False) fig, [ax1, ax2] = plt.subplots(2, 1, sharex=True, figsize=(dcolwidth, 0.57*dcolwidth), gridspec_kw={'height_ratios':[4,1]}) # load ensemble data ens_data1 = ens.load_ens_timeseries_as_df(ts_start=ts1[0], ts_end=ts1[-1],\ weathervars=['Tout', 'vWind', 'sunRad']) ens_data1['prod24h_before'] = fit_data['prod24h_before'] ens_data1_24h_before = ens.load_ens_timeseries_as_df(\ ts_start=ts1[0]+dt.timedelta(days=-1),\ ts_end=ts1[-1]+dt.timedelta(days=-1), \ weathervars=['Tout', 'vWind', 'sunRad']) ens_data2 = ens.load_ens_timeseries_as_df(ts_start=ts2[0], ts_end=ts2[-1],\ weathervars=['Tout', 'vWind', 'sunRad']) ens_data2['prod24h_before'] = vali_data['prod24h_before'] ens_data2_24h_before = ens.load_ens_timeseries_as_df(\ ts_start=ts2[0]+dt.timedelta(days=-1),\ ts_end=ts2[-1]+dt.timedelta(days=-1), \ weathervars=['Tout', 'vWind', 'sunRad']) for i in range(25): for v in ['Tout', 'vWind', 'sunRad']: key_raw = v + str(i) key_diff = v + '24hdiff' + str(i) ens_data1[key_diff] = ens_data1[key_raw] - ens_data1_24h_before[key_raw] ens_data2[key_diff] = ens_data2[key_raw] - ens_data2_24h_before[key_raw] all_ens_data = pd.concat([ens_data1, ens_data2]) all_ts = ts1 + ts2 # # # calculate production for each ensemble member ens_prods = np.zeros((len(all_ts), 25)) for i in range(25): ens_cols = ['Tout24hdiff' + str(i), 'vWind24hdiff' + str(i),\ 'sunRad24hdiff' + str(i), 'prod24h_before'] ens_params = pd.Series({'Tout24hdiff' + str(i):res.params['Tout24hdiff'], 'vWind24hdiff' + str(i):res.params['vWind24hdiff'], 'sunRad24hdiff' + str(i):res.params['sunRad24hdiff'], 'prod24h_before':res.params['prod24h_before']}) ens_prods[:,i] = linear_map(all_ens_data, ens_params, ens_cols) # calculate combined confint ens_std = ens_prods.std(axis=1) vali_resid = linear_map(vali_data, res.params, cols) - vali_data['prod'] vali_resid_corrig = vali_resid - np.sign(vali_resid)*1.9599*ens_std[len(ts1):] #mean_conf_int_spread = (vali_resid_corrig.quantile(0.95) - vali_resid_corrig.quantile(0.05))/2 # this conf_int is not used anymore fit_resid = res.resid fit_resid_corrig = fit_resid - np.sign(fit_resid)*1.9599*ens_std[0:len(ts1)] conf_int_spread_lower = - fit_resid_corrig.quantile(0.025) conf_int_spread_higher = fit_resid_corrig.quantile(0.975) combined_conf_ints = conf_int_spread_lower + conf_int_spread_higher + 2*1.9599*ens_std all_prod_model = np.concatenate([res.fittedvalues, linear_map(vali_data, res.params, cols)]) combined_ub95 = all_prod_model + conf_int_spread_higher + 1.9599*ens_std combined_lb95 = all_prod_model - (conf_int_spread_lower + 1.9599*ens_std) # plot confint ax1.fill_between(all_ts[len(ts1):], combined_lb95[len(ts1):], combined_ub95[len(ts1):], label='95% prediction intervals') ax1.fill_between(all_ts[len(ts1):], all_prod_model[len(ts1):] - 1.9599*ens_std[len(ts1):], all_prod_model[len(ts1):] + 1.9599*ens_std[len(ts1):], facecolor='grey', label='Weather ensemble 95% conf. int.') # plot ensempble models ax1.plot_date(all_ts[len(ts1):], ens_prods[len(ts1):], '-', lw=0.5) ax1.plot_date(ts2, vali_data['prod'], 'k-', lw=2, label='Historical production') ax1.plot_date(ts2, linear_map(vali_data, res.params, cols), '-', c=red, lw=2, label='Production model') ax1.set_ylabel('Production [MW]', size=8) ax1.tick_params(axis='both', which='major', labelsize=8) ax1.xaxis.set_major_formatter(DateFormatter('%b %d') ) ax1.legend(loc=1, prop={'size':8}) ax1.set_ylim([300,1100]) N = conf_int_spread_higher + 1.9599*ens_std[len(ts1):].max() ax2.fill_between(ts2, -(1.9599*ens_std[len(ts1):]+conf_int_spread_lower)/N, -1.9599*ens_std[len(ts1):]/N, alpha=0.5) ax2.fill_between(ts2, -1.9599*ens_std[len(ts1):]/N, np.zeros(len(ts2)), facecolor='grey',alpha=0.5) ax2.fill_between(ts2, 1.9599*ens_std[len(ts1):]/N, facecolor='grey') ax2.fill_between(ts2, 1.9599*ens_std[len(ts1):]/N, (conf_int_spread_higher+1.9599*ens_std[len(ts1):])/N) ax2.set_ylabel('Prediction intervals \n[normalized]', size=8) ax2.tick_params(axis='y', which='major', labelsize=8) ax2.set_xlim(dt.datetime(2016,1,20,0), dt.datetime(2016,2,5,0)) fig.tight_layout() print "Min_normalized pos conf bound. ", np.min(1.9599*ens_std[len(ts1):]/N+conf_int_spread_higher/N) print "MAE = " + str(mae(vali_resid)) print "MAPE = " + str(mape(vali_resid, vali_data['prod'])) print "RMSE = " + str(rmse(vali_resid)) print "ME = " + str(np.mean(vali_resid)) print "MAE (fit) = " + str(mae(res.resid)) print "MAPE (fit) = " + str(mape(res.resid, fit_data['prod'])) print "RMSE (fit)= " + str(rmse(res.resid)) print "ME (fit)= " + str(np.mean(res.resid)) print "Width of const blue bands (MW)", conf_int_spread_lower, conf_int_spread_higher plt.savefig('Q:/Projekter/Ens Article 1/figures/production_model.pdf', dpi=400) EO3_fc1 = sq.fetch_EO3_midnight_forecast(ts1[0], ts1[-1]) EO3_fc2 = sq.fetch_EO3_midnight_forecast(ts2[0], ts2[-1]) EO3_err = EO3_fc2-vali_data['prod'] EO3_err_fit = EO3_fc1-fit_data['prod'] print "MAE (EO3) = " + str(mae(EO3_err)) print "MAPE (EO3) = " + str(mape(EO3_err, vali_data['prod'])) print "RMSE (EO3)= " + str(rmse(EO3_err)) print "ME (EO3)= " + str(np.mean(EO3_err)) print "MAE (EO3_fit) = " + str(mae(EO3_err_fit)) print "MAPE (EO3_fit) = " + str(mape(EO3_err_fit, fit_data['prod'])) print "RMSE (EO3_fit)= " + str(rmse(EO3_err_fit)) print "ME (EO3_fit)= " + str(np.mean(EO3_err_fit)) print np.min(combined_conf_ints[len(ts1):]/combined_conf_ints.max()) np.savez('combined_conf_int', combined_conf_int=(conf_int_spread_higher+1.9599*ens_std), timesteps=all_ts) print "Corr coeff: vali ", np.corrcoef(vali_data['prod'],linear_map(vali_data, res.params, cols))[0,1] print "Corr coeff: vali EO3 ", np.corrcoef(vali_data['prod'], EO3_fc2)[0,1] print "Corr coeff: fit ", np.corrcoef(fit_data['prod'],res.fittedvalues)[0,1] print "Corr coeff: fit EO3 ", np.corrcoef(fit_data['prod'], EO3_fc1)[0,1] print "% of actual production in vali period above upper", float(len(np.where(vali_data['prod']>(conf_int_spread_higher+1.9599*ens_std[len(ts1):]+linear_map(vali_data, res.params, cols)))[0]))/len(ts2) print "plus minus: ", 0.5/len(ts2) print "% of actual production in vali period below lower", float(len(np.where(vali_data['prod']<(linear_map(vali_data, res.params, cols)-(conf_int_spread_lower+1.9599*ens_std[len(ts1):])))[0]))/len(ts2) print "plus minus: ", 0.5/len(ts2) return res, fit_data
def load_cons_model_ens_dfs(df): fit_ts = ens.gen_hourly_timesteps(dt.datetime(2015, 12, 17, 1), dt.datetime(2016, 1, 15, 0)) vali_ts = ens.gen_hourly_timesteps(dt.datetime(2016, 1, 20, 1), dt.datetime(2016, 2, 5, 0)) test_ts = ens.gen_hourly_timesteps(dt.datetime(2016, 2, 5, 1), dt.datetime(2016, 3, 1, 0)) weathervars = ['Tout', 'vWind', 'sunRad', 'hum'] fit_data = [pd.DataFrame() for i in range(25)] vali_data = [pd.DataFrame() for i in range(25)] test_data = [pd.DataFrame() for i in range(25)] for i in range(25): fit_data[i]['cons'] = np.array(df.ix[fit_ts[0]:fit_ts[-1]]['cons']) vali_data[i]['cons'] = np.array( df.ix[vali_ts[0]:vali_ts[-1]] ['cons']) # the casting is a hack to avoid the index f*****g up test_data[i]['cons'] = np.array(df.ix[test_ts[0]:test_ts[-1]]['cons']) fit_data[i]['cons24hbefore'] = np.array( df.ix[fit_ts[0] + dt.timedelta(days=-1):fit_ts[-1] + dt.timedelta(days=-1)]['cons']) vali_data[i]['cons24hbefore'] = np.array( df.ix[vali_ts[0] + dt.timedelta(days=-1):vali_ts[-1] + dt.timedelta(days=-1)] ['cons']) # the casting is a hack to avoid the index f*****g up test_data[i]['cons24hbefore'] = np.array( df.ix[test_ts[0] + dt.timedelta(days=-1):test_ts[-1] + dt.timedelta(days=-1)]['cons']) for v in weathervars: all_ens_fit = ens.load_ens_timeseries_as_df(\ ts_start=fit_ts[0],\ ts_end=fit_ts[-1], \ weathervars=[v]) \ - ens.load_ens_timeseries_as_df(\ ts_start=fit_ts[0]+dt.timedelta(days=-1),\ ts_end=fit_ts[-1]+dt.timedelta(days=-1), \ weathervars=[v]) all_ens_vali = ens.load_ens_timeseries_as_df(\ ts_start=vali_ts[0],\ ts_end=vali_ts[-1], \ weathervars=[v]) \ - ens.load_ens_timeseries_as_df(\ ts_start=vali_ts[0]+dt.timedelta(days=-1),\ ts_end=vali_ts[-1]+dt.timedelta(days=-1), \ weathervars=[v]) all_ens_test = ens.load_ens_timeseries_as_df(\ ts_start=test_ts[0],\ ts_end=test_ts[-1], \ weathervars=[v]) \ - ens.load_ens_timeseries_as_df(\ ts_start=test_ts[0]+dt.timedelta(days=-1),\ ts_end=test_ts[-1]+dt.timedelta(days=-1), \ weathervars=[v]) for i in range(25): fit_data[i]['%s24hdiff' % v] = all_ens_fit[v + str(i)] vali_data[i]['%s24hdiff' % v] = all_ens_vali[v + str(i)] test_data[i]['%s24hdiff' % v] = all_ens_test[v + str(i)] all_data = [] for i in range(25): for d, t in zip([fit_data[i], vali_data[i], test_data[i]], [fit_ts, vali_ts, test_ts]): d.set_index(pd.DatetimeIndex(t), inplace=True) all_data.append(pd.concat([fit_data[i], vali_data[i], test_data[i]])) return all_data
def hoerning_pump_model(): # figure 4 # simple model T1 = 68.5 a2 = 15.5 a3 = 2.1 b2 = 295 - a2 * T1 b3 = 340 - a3 * 71.4 def Q_from_cons_lin_piece(cons, a, b): B = -(b + a * T_ret) / a C = -cons / (specific_heat_water * density_water) A = 1 / a Qplus = (-B + np.sqrt(B**2 - 4 * A * C)) / (2 * A) return Qplus def get_Tsup_and_Q(cons, Q_ub): # try lowes possible T Q = cons / (specific_heat_water * density_water * (T1 - T_ret)) if Q <= 295: return T1, Q elif Q > 295: Q = Q_from_cons_lin_piece(cons, a2, b2) if Q <= Q_ub * (340. / 360): T = (Q - b2) / a2 return T, Q elif Q >= Q_ub * (340. / 360): b3_adjusted = b3 + (Q_ub * (340. / 360) - 340) Q = Q_from_cons_lin_piece(cons, a3, b3_adjusted) if Q <= Q_ub: T = (Q - b3_adjusted) / a3 return T, Q elif Q > Q_ub: Q = Q_ub T = cons / (specific_heat_water * density_water * Q) + T_ret return T, Q plt.close('all') fig, [ax1, ax2] = plt.subplots(2, 1, sharex=True, sharey=True) ts1 = ens.gen_hourly_timesteps(dt.datetime(2015, 12, 17, 1), dt.datetime(2016, 1, 15, 0)) ts2 = ens.gen_hourly_timesteps(dt.datetime(2016, 1, 20, 1), dt.datetime(2016, 2, 5, 0)) all_ts = ts1 + ts2 PI_T_sup = '4.146.120.29.HA.101' PI_Q = 'K.146A.181.02.HA.101' specific_heat_water = 1.17e-6 # MWh/kgKelvin density_water = 980 # kg/m3 at 65 deg C T_ret = 36.5 df = pd.DataFrame() df['T_sup']=np.concatenate([sq.fetch_hourly_vals_from_PIno(PI_T_sup, ts1[0], \ ts1[-1]),sq.fetch_hourly_vals_from_PIno(PI_T_sup, ts2[0], ts2[-1])]) df['Q']=np.concatenate([sq.fetch_hourly_vals_from_PIno(PI_Q, ts1[0], ts1[-1]),\ sq.fetch_hourly_vals_from_PIno(PI_Q, ts2[0], ts2[-1])]) df['ts'] = all_ts df['cons'] = specific_heat_water * density_water * df['Q'] * (df['T_sup'] - T_ret) model_conf_int = np.load('combined_conf_int.npz')['combined_conf_int'] assert (list(np.load('combined_conf_int.npz')['timesteps']) == all_ts ), "confidence intervals don't have matching time steps" const_Q_ub = 360 Q_const_cap = [] T_sup_const_cap = [] Q_dyn_cap = [] T_sup_dyn_cap = [] dyn_Q_ub = [] for c, model_uncertainty in zip(df['cons'], model_conf_int): T_const, Q_const = get_Tsup_and_Q(c, const_Q_ub) Q_const_cap.append(Q_const) T_sup_const_cap.append(T_const) Q_ub = 410 - (410 - const_Q_ub) * (model_uncertainty / np.max(model_conf_int)) dyn_Q_ub.append(Q_ub) T_dyn, Q_dyn = get_Tsup_and_Q(c, Q_ub) Q_dyn_cap.append(Q_dyn) T_sup_dyn_cap.append(T_dyn) dT = 0.1 ax1.fill_between([65 + dT, 95 - dT], [410, 410], [360, 360], facecolor=red, alpha=0.25) ax1.fill_between([65 + dT, 95 - dT], [360, 360], [340, 340], facecolor=yellow, alpha=0.25) ax1.fill_between([T1, 71.4, 80.9, 100], [295, 340, 360, 360], color='k', edgecolor='k', alpha=0.2, linewidth=1) ax2.fill_between([65 + dT, 95 - dT], [410, 410], [360, 360], facecolor=red, alpha=0.25) ax2.fill_between([65 + dT, 95 - dT], [360, 360], [340, 340], facecolor=yellow, alpha=0.25) ax2.fill_between([T1, 71.4, 80.9, 100], [295, 340, 360, 360], color='k', edgecolor='k', alpha=0.2, linewidth=1) ax1.plot([65 + dT, 95 - dT], [410, 410], '--', c=red, lw=2) ax1.text(79, 415, 'Maximum pump capacity', size=8) #im = ax1.scatter(T_sup_const_cap, Q_const_cap, facecolors='none', cmap=plt.cm.BuPu) im = ax1.scatter(T_sup_const_cap, Q_const_cap, c=df['cons'], cmap=plt.cm.BuPu) ax2.scatter(T_sup_dyn_cap, Q_dyn_cap, c=df['cons'], cmap=plt.cm.BuPu) ax2.plot([65 + dT, 95 - dT], [410, 410], '--', c=red, lw=2) ax2.text(79, 415, 'Maximum pump capacity', size=8) cax, kw = mpl.colorbar.make_axes([ax1, ax2]) fig.colorbar(im, cax=cax) cax.set_ylabel('Delivered heat [MW]', size=8) ax2.set_xlabel(u'Supply temperature [%sC]' % uni_degree, size=8) ax1.set_ylabel(u'Water flow rate [m%s/h]' % uni_tothethird, size=8) ax2.set_ylabel(u'Water flow rate [m%s/h]' % uni_tothethird, size=8) ax1.tick_params(axis='both', which='major', labelsize=8) ax2.tick_params(axis='both', which='major', labelsize=8) cax.tick_params(axis='y', which='major', labelsize=8) ax1.set_title('Scenario 1', size=10) ax2.set_title('Scenario 2', size=10) ax1.set_xlim((65, 95)) ax1.set_ylim((150, 450)) fig.set_size_inches(1.15 * colwidth, 1.6 * colwidth) fig.savefig('figures/first_articlefigs/hoerning_pump_model.pdf') # This is a theoretical calculation in case the model uncertainty was 50% of what it is statistical_conf_int = 50.90285 # this number is printed when production_model() is run (Width of const blue band (MW) ...) Q_dyn_cap_half_model_unc = [] T_sup_dyn_cap_half_model_unc = [] dyn_Q_ub_half_model_unc = [] reduced_model_conf_int = model_conf_int - 0.5 * statistical_conf_int for c, model_uncertainty in zip(df['cons'], reduced_model_conf_int): Q_ub = 410 - (410 - const_Q_ub) * (model_uncertainty / np.max(model_conf_int)) dyn_Q_ub_half_model_unc.append(Q_ub) T_dyn, Q_dyn = get_Tsup_and_Q(c, Q_ub) Q_dyn_cap_half_model_unc.append(Q_dyn) T_sup_dyn_cap_half_model_unc.append(T_dyn) return T_sup_const_cap, T_sup_dyn_cap, Q_const_cap, Q_dyn_cap, model_conf_int, T_sup_dyn_cap_half_model_unc
def main(argv): plt.close('all') try: station = argv[0] no_sigma = argv[1] if not station in PI_T_sup_dict.keys(): print "Use rundhoej, holme or hoerning and a float for the uncertainty bound" return except: print "No station provided. Defaults to holme, no_sigma=2" station = 'holme' no_sigma = 2 print station, no_sigma # old tsstart dt.datetime(2014,12,17,1) ts1 = ens.gen_hourly_timesteps(dt.datetime(2015, 3, 1, 1), dt.datetime(2016, 1, 15, 0)) ts2 = ens.gen_hourly_timesteps(dt.datetime(2016, 1, 19, 1), dt.datetime(2016, 3, 1, 0)) all_ts = ts1 + ts2 df = pd.DataFrame(index=all_ts) if station == 'holme': PI_Q1 = PI_Q_dict[station] PI_Q2 = PI_Q_dict2[station] df['Q1']=np.concatenate([sq.fetch_hourly_vals_from_PIno(PI_Q1, ts1[0], ts1[-1]),\ sq.fetch_hourly_vals_from_PIno(PI_Q1, ts2[0], ts2[-1])]) df['Q2']=np.concatenate([sq.fetch_hourly_vals_from_PIno(PI_Q2, ts1[0], ts1[-1]),\ sq.fetch_hourly_vals_from_PIno(PI_Q2, ts2[0], ts2[-1])]) df['Q'] = df['Q1'] + df['Q2'] else: PI_Q = PI_Q_dict[station] df['Q']=np.concatenate([sq.fetch_hourly_vals_from_PIno(PI_Q, ts1[0], ts1[-1]),\ sq.fetch_hourly_vals_from_PIno(PI_Q, ts2[0], ts2[-1])]) PI_T_sup = PI_T_sup_dict[station] df['T_sup']=np.concatenate([sq.fetch_hourly_vals_from_PIno(PI_T_sup, ts1[0], \ ts1[-1]),sq.fetch_hourly_vals_from_PIno(PI_T_sup, ts2[0], ts2[-1])]) PI_T_ret = PI_T_ret_dict[station] df['T_ret']=np.concatenate([sq.fetch_hourly_vals_from_PIno(PI_T_ret, ts1[0], \ ts1[-1]),sq.fetch_hourly_vals_from_PIno(PI_T_ret, ts2[0], ts2[-1])]) df['ts'] = all_ts df['cons'] = specific_heat_water * density_water * df['Q'] * (df['T_sup'] - df['T_ret']) Tout1 = sq.fetch_BrabrandSydWeather('Tout', ts1[0], ts1[-1]) Tout2 = sq.fetch_BrabrandSydWeather('Tout', ts2[0], ts2[-1]) Tout = np.concatenate([Tout1, Tout2]) Tout_low_pass = [ Tout[range(i - 23, i + 1)].mean() for i in range(len(Tout)) ] df['Toutsmooth'] = Tout_low_pass Tsup_vs_Tout(df, station) #%% fitting and testing consumption prediction fit_data, vali_data, test_data, all_data = load_cons_model_dfs(df) fit_y = fit_data['cons'] columns = ['cons24hbefore', 'Tout24hdiff', 'vWind24hdiff', 'sunRad24hdiff'] X = fit_data[columns] res = mlin_regression(fit_y, X, add_const=False) fiterr = res.fittedvalues - fit_y print "Errors fit period: ", rmse(fiterr), mae(fiterr), mape(fiterr, fit_y) vali_pred = linear_map(vali_data, res.params, columns) valierr = vali_pred - vali_data['cons'] print "Errors validation period: ", rmse(valierr), mae(valierr), mape( valierr, vali_data['cons']) test_pred = linear_map(test_data, res.params, columns) testerr = test_pred - test_data['cons'] print "Errors test period: ", rmse(testerr), mae(testerr), mape( testerr, test_data['cons']) plt.figure() ens_dfs = load_cons_model_ens_dfs(df) ens_preds = np.empty((len(ens_dfs[0]), len(ens_dfs))) for edf, i in zip(ens_dfs, range(len(ens_dfs))): ens_pred = linear_map(edf, res.params, columns) ens_preds[:, i] = ens_pred plt.plot_date(all_data.index, ens_pred, 'grey', lw=0.5) ens_preds = pd.DataFrame(ens_preds, index=all_data.index) plt.plot_date(all_data.index, all_data['cons'], 'k-', lw=2) plt.plot_date(all_data.index, np.concatenate([res.fittedvalues, vali_pred, test_pred]), 'r-', lw=2) plt.title(station + ' forecasts of consumption') nonfit_errors = pd.concat([valierr, testerr]) all_pred = np.concatenate([res.fittedvalues, vali_pred, test_pred]) all_pred = pd.Series(all_pred, index=all_data.index) print res.summary() #%% TminofTout_fun = get_TminofTout_func(df, station, frac_below=0.005) sim_input = df.ix[all_data.index] sim_input['T_ret1hbefore'] = np.roll(sim_input['T_ret'], 1) sim_input['cons_pred'] = all_pred sc2_errormargin = pd.Series(no_sigma * np.ones(len(sim_input)) * nonfit_errors.std(), index=sim_input.index) nonfit_ts_start = vali_data.index[0] nonfit_ts_end = test_data.index[-1] quantile_sc2 = 1. - percent_above_forecasterrormargin(\ sc2_errormargin.loc[nonfit_ts_start:nonfit_ts_end], \ sim_input.loc[nonfit_ts_start:nonfit_ts_end, 'cons_pred'], \ sim_input.loc[nonfit_ts_start:nonfit_ts_end,'cons']) sc3_model_uncert = model_based_uncertainty_alaGorm(\ ens_preds.loc[nonfit_ts_start:nonfit_ts_end], \ sim_input.loc[nonfit_ts_start:nonfit_ts_end, 'cons_pred'], \ sim_input.loc[nonfit_ts_start:nonfit_ts_end, 'cons'], no_sigma, quantile_sc2) sc3_errormargin = pd.Series(no_sigma * ens_preds.std(axis=1) + sc3_model_uncert, index=sim_input.index) sig_m = model_based_sigma_alaChi2( ens_preds.loc[nonfit_ts_start:nonfit_ts_end], sim_input.loc[nonfit_ts_start:nonfit_ts_end, 'cons_pred'], sim_input.loc[nonfit_ts_start:nonfit_ts_end, 'cons']) sig_t = np.sqrt(ens_preds.std(axis=1)**2 + sig_m**2) sc35scale = total_uncertainty_scale_alaChi2(\ ens_preds.loc[nonfit_ts_start:nonfit_ts_end],\ sim_input.loc[nonfit_ts_start:nonfit_ts_end, 'cons_pred'],\ sim_input.loc[nonfit_ts_start:nonfit_ts_end, 'cons'],\ quantile_sc2) print sig_m #sc35_errormargin = pd.Series(no_sigma*np.sqrt(ens_preds.std(axis=1)**2+sig_m**2), index=sim_input.index) sc35_errormargin = pd.Series(sc35scale * sig_t, index=sim_input.index) use_sc35 = False if use_sc35: sc3_errormargin = sc35_errormargin sim_results_sc2 = simulate_operation(sim_input, sc2_errormargin, TminofTout_fun, station) sim_results_sc3 = simulate_operation(sim_input, sc3_errormargin, TminofTout_fun, station) #%% synthetic consumption, controlled variable model uncertainty model_stds = [ 0.5 * sim_input['cons'].std(), 0.1 * sim_input['cons'].std(), 0.05 * sim_input['cons'].std() ] # sim_input['cons'].std()*np.linspace(0,1,10) sc2_synth_results = [] sc3_synth_results = [] model_uncerts = [] for model_std in model_stds: synth_cons = gen_synthetic_cons(ens_preds, sim_input['cons_pred'], model_std) sim_input_synth = sim_input.copy(deep=True) sim_input_synth['cons'] = synth_cons synth_resid = sim_input_synth.loc[nonfit_ts_start:nonfit_ts_end, 'cons_pred'] - sim_input_synth.loc[ nonfit_ts_start:nonfit_ts_end, 'cons'] sc2_errormargin_synth = pd.Series( no_sigma * np.ones(len(sim_input_synth)) * synth_resid.std(), index=sim_input_synth.index) quantile_sc2_synth = 1. - percent_above_forecasterrormargin(\ sc2_errormargin_synth.loc[nonfit_ts_start:nonfit_ts_end], \ sim_input_synth.loc[nonfit_ts_start:nonfit_ts_end, 'cons_pred'], \ sim_input_synth.loc[nonfit_ts_start:nonfit_ts_end,'cons']) print "Sc2 q: ", quantile_sc2_synth sc3_model_uncert_synth = model_based_uncertainty_alaGorm(\ ens_preds.loc[nonfit_ts_start:nonfit_ts_end], \ sim_input_synth.loc[nonfit_ts_start:nonfit_ts_end, 'cons_pred'], \ sim_input_synth.loc[nonfit_ts_start:nonfit_ts_end, 'cons'], no_sigma, quantile_sc2_synth) model_uncerts.append(sc3_model_uncert_synth) sc3_errormargin_synth = pd.Series(no_sigma * ens_preds.std(axis=1) + sc3_model_uncert_synth, index=sim_input_synth.index) sim_results_sc2_synth = simulate_operation(sim_input_synth, sc2_errormargin_synth, TminofTout_fun, station) sim_results_sc3_synth = simulate_operation(sim_input_synth, sc3_errormargin_synth, TminofTout_fun, station) sc2_synth_results.append(sim_results_sc2_synth) sc3_synth_results.append(sim_results_sc3_synth) mean_Tsupdiff = [] mean_heatlossreduced = [] for sc2_res, sc3_res in zip(sc2_synth_results, sc3_synth_results): mean_Tsupdiff.append(np.mean(sc2_res['T_sup'] - sc3_res['T_sup'])) mean_heatlossreduced.append( np.mean(100 * (1 - (sc3_res['T_sup'] - T_grnd) / (sc2_res['T_sup'] - T_grnd)))) plt.figure() plt.plot(model_uncerts, mean_Tsupdiff, 'k.') plt.title('Mean temp reduction vs model uncert.') print "Perc above errormargin, sc2: ", percent_above_forecasterrormargin(\ sc2_errormargin.loc[nonfit_ts_start:nonfit_ts_end], \ sim_input.loc[nonfit_ts_start:nonfit_ts_end, 'cons_pred'], \ sim_input.loc[nonfit_ts_start:nonfit_ts_end,'cons']) print "Perc above errormargin, sc3: ", percent_above_forecasterrormargin(sc3_errormargin.loc[nonfit_ts_start:nonfit_ts_end], \ sim_input.loc[nonfit_ts_start:nonfit_ts_end, 'cons_pred'], \ sim_input.loc[nonfit_ts_start:nonfit_ts_end,'cons']) print "mean errormargin, sc2: ", sc2_errormargin.mean() print "mean errormargin, sc3: ", sc3_errormargin.mean() print "rms errormargin, sc2: ", rmse(sc2_errormargin) print "rms errormargin, sc3: ", rmse(sc3_errormargin) print "Synth Perc above errormargin, sc2: ", percent_above_forecasterrormargin(\ sc2_errormargin_synth.loc[nonfit_ts_start:nonfit_ts_end], \ sim_input_synth.loc[nonfit_ts_start:nonfit_ts_end, 'cons_pred'], \ sim_input_synth.loc[nonfit_ts_start:nonfit_ts_end,'cons']) print "Synth Perc above errormargin, sc3: ", percent_above_forecasterrormargin(sc3_errormargin_synth.loc[nonfit_ts_start:nonfit_ts_end], \ sim_input_synth.loc[nonfit_ts_start:nonfit_ts_end, 'cons_pred'], \ sim_input_synth.loc[nonfit_ts_start:nonfit_ts_end,'cons']) print "Synth mean errormargin, sc2: ", sc2_errormargin_synth.mean() print "Synth mean errormargin, sc3: ", sc3_errormargin_synth.mean() print "Synth rms errormargin, sc2: ", rmse(sc2_errormargin_synth) print "Synth rms errormargin, sc3: ", rmse(sc3_errormargin_synth) #% error margins: fig_error_margins(sc2_errormargin, sc3_errormargin, sim_input, sc3_model_uncert, station, no_sigma) fig_error_margins(sc2_errormargin_synth, sc3_errormargin_synth, sim_input_synth, sc3_model_uncert_synth, station, no_sigma) sns.jointplot(np.abs(nonfit_errors), ens_preds.loc[nonfit_ts_start:nonfit_ts_end].std(axis=1)) sns.jointplot(np.abs(synth_resid), ens_preds.loc[nonfit_ts_start:nonfit_ts_end].std(axis=1)) #% T Q scatter plots fig, axes = plt.subplots(3, 1, figsize=(10, 16), sharex=True, sharey=True) axes[0].scatter(sim_input['T_sup'], sim_input['Q'], c=sim_input['cons']) axes[0].set_title(station + ': ' + 'Scenario 1') axes[1].scatter(sim_results_sc2['T_sup'], sim_results_sc2['Q'], c=sim_results_sc2['cons']) axes[1].set_title(station + ': Scenario 2: ' + str(no_sigma) + r'$\sigma$') axes[2].scatter(sim_results_sc3['T_sup'], sim_results_sc3['Q'], c=sim_results_sc3['cons']) axes[2].set_title(station + ': Scenario 3: ' + str(no_sigma) + r'$\sigma$') axes[1].set_ylabel(u'Water flow rate [m%s/h]' % uni_tothethird, size=8) axes[2].set_xlabel(u'Supply temperature [%sC]' % uni_degree, size=8) fig.tight_layout() fig.savefig(figpath + 'TQscatter_%2.2f' % (no_sigma) + 'sigma_' + station + '.pdf') # T_sup time series fig fig, axes = plt.subplots(3, 1, figsize=(15, 15), sharex=True) axes[0].plot_date(sim_input.index, sim_input['T_sup'], 'k-', label='Scenario 1') axes[0].plot_date(sim_input.index, sim_results_sc2['T_sup'], 'r-', lw=3, label='Scenario 2') axes[0].plot_date(sim_input.index, sim_results_sc2['T_sup'], 'g-', label='Scenario 3') axes[0].set_title(station + ', ' + str(no_sigma) + r'$\sigma$' + ': Supply temperature') axes[0].set_ylabel(u'Supply temperature [%sC]' % uni_degree, size=8) axes[0].legend() axes[1].plot_date(sim_input.index, sim_input['Q'], 'k-', label='Scenario 1') axes[1].plot_date(sim_input.index, sim_results_sc2['Q'], 'r-', label='Scenario 2') axes[1].plot_date(sim_input.index, sim_results_sc2['Q_ref'], 'b-', lw=1, label=r'$Q_{ref}$' + 'Scenario 2') axes[1].set_ylabel(u'Water flow rate [m%s/h]' % uni_tothethird, size=8) axes[1].legend() axes[2].plot_date(sim_input.index, sim_input['Q'], 'k-', label='Scenario 1') axes[2].plot_date(sim_input.index, sim_results_sc3['Q'], 'g-', label='Scenario 3') axes[2].plot_date(sim_input.index, sim_results_sc3['Q_ref'], 'b-', lw=1, label=r'$Q_{ref}$' + 'Scenario 3') axes[2].set_ylabel(u'Water flow rate [m%s/h]' % uni_tothethird, size=8) axes[2].legend() fig.savefig(figpath + 'TQtimeseries_%2.2f' % (no_sigma) + 'sigma_' + station + '.pdf') # Differencen in supply temperature between the scenarios fig_heat_loss(sim_input, sim_results_sc2, sim_results_sc3, station, no_sigma) fig_heat_loss(sim_input_synth, sim_results_sc2_synth, sim_results_sc3_synth, station, no_sigma, save=False) return #%% The below section only runs if we view Tmin as a function of Q (the old way) # note: SOME OF THIS USES CONSTANT TRET!! TminofQ = False if TminofQ: # outlierdetection X = df[['T_sup', 'Q']] outlier_detection = False if outlier_detection: detect_outliers(X, station) else: inlierpred = np.ones(len(df), dtype=bool) fig, ax1 = plt.subplots() ax2 = ax1.twinx() cond_df = df ax1.plot_date(np.array(cond_df['ts']), np.array(cond_df['Q']), 'b') ax2.plot_date(np.array(cond_df['ts']), np.array(cond_df['T_sup']), 'r-') plt.figure() plt.plot_date(df['ts'], df['cons'], 'g-') plt.title(station) plt.figure() plt.scatter(df['T_sup'], df['Q'], c=df['cons'], alpha=0.25) plt.colorbar() plt.title(station) outliers = df[np.logical_not(inlierpred)] plt.plot(np.array(outliers['T_sup']), np.array(outliers['Q']), 'ko') #%% #plot_Tmin_Q_quantiles(df, inlierpred) Q = np.linspace(df[inlierpred]['Q'].min(), df[inlierpred]['Q'].max(), 500) qs = [0.001, 0.005, 0.01, 0.02275, 0.05, 0.1] for q in qs: T_min_func, Q_quantiles = get_Tmin_func(df[inlierpred], T_min_q=q, N_Q_q=21) plt.plot(T_min_func(Q), Q, label=str(q), lw=2) plt.legend() for Q_qua in Q_quantiles: plt.axhline(y=Q_qua) #%% P vs Q (T=Tmin(Q)) T_min_func, Q_quantiles = get_Tmin_func(df, T_min_q=0.02275, N_Q_q=21) plt.figure() plt.plot(Q, T_min_func(Q), 'r', label='Tmin') P = specific_heat_water * density_water * Q * (T_min_func(Q) - T_ret) plt.plot(Q, P, 'b', label='Cons') plt.xlabel('Q') plt.legend() plt.figure() simP = df['cons'] res = [ op_model(cons, T_min_func, Q_max=Q_max_dict[station], T_ret=T_ret) for cons in simP ] simT, simQ = zip(*res) plt.scatter(df['T_sup'], df['Q'], c='k', alpha=0.1) plt.scatter(simT, simQ, c=simP) plt.colorbar()
Created on Thu Feb 11 12:30:28 2016 @author: Magnus Dahl """ import numpy as np import matplotlib.pyplot as plt import statsmodels.api as sm import pandas as pd import datetime as dt import gurobipy as gb import ensemble_tools as ens import sql_tools as sq plt.close('all') ts1 = ens.gen_hourly_timesteps(dt.datetime(2015,12,17,1), dt.datetime(2016,1,15,0)) ts2 = ens.gen_hourly_timesteps(dt.datetime(2016,1,20,1), dt.datetime(2016,2,5,0)) all_ts = ts1 + ts2 specific_heat_water = 1.17e-6 # MWh/kgKelvin density_water = 980 # kg/m3 at 65 deg C T_ret = 36.5 PI_T_sup = '4.146.120.29.HA.101' PI_Q = 'K.146A.181.02.HA.101' df = pd.DataFrame() df['T_sup']=np.concatenate([sq.fetch_hourly_vals_from_PIno(PI_T_sup, ts1[0], \ ts1[-1]),sq.fetch_hourly_vals_from_PIno(PI_T_sup, ts2[0], ts2[-1])]) df['Q']=np.concatenate([sq.fetch_hourly_vals_from_PIno(PI_Q, ts1[0], ts1[-1]),\ sq.fetch_hourly_vals_from_PIno(PI_Q, ts2[0], ts2[-1])])
def Q_T_heatloss_timeseries(): # figure 5 T_sup_const_cap, T_sup_dyn_cap, Q_const_cap, Q_dyn_cap, model_conf_int, T_sup_dyn_cap_half_model_unc = hoerning_pump_model( ) plt.close('all') fig, [ax1, ax2, ax3] = plt.subplots(3, 1, sharex=True, figsize=(dcolwidth, 0.65 * dcolwidth), gridspec_kw={'height_ratios': [3, 1, 1]}) #fig, [ax1, ax2, ax3] = plt.subplots(3, 1, sharex=True, figsize=(dcolwidth, 0.55*dcolwidth)) ts1 = ens.gen_hourly_timesteps(dt.datetime(2015, 12, 17, 1), dt.datetime(2016, 1, 15, 0)) ts2 = ens.gen_hourly_timesteps(dt.datetime(2016, 1, 20, 1), dt.datetime(2016, 2, 5, 0)) red_area_lb1 = 410 - (410 - 360) * (model_conf_int[0:len(ts1)] / np.max(model_conf_int)) red_area_lb2 = 410 - (410 - 360) * (model_conf_int[len(ts1):] / np.max(model_conf_int)) yellow_area_lb1 = (340. / 360) * red_area_lb1 yellow_area_lb2 = (340. / 360) * red_area_lb2 limlw = 0.75 ax1.plot_date(ts1, red_area_lb1, '-', c=darkgrey, lw=limlw, label='Scenario 2 security margins') ax1.plot_date(ts2, red_area_lb2, '-', c=darkgrey, lw=limlw) ax1.plot_date(ts1, yellow_area_lb1, '-', c=darkgrey, lw=limlw) ax1.plot_date(ts2, yellow_area_lb2, '-', c=darkgrey, lw=limlw) ax1.fill_between(ts1, 360 * np.ones(len(ts1)), 410 * np.ones(len(ts1)), facecolor=red, alpha=0.25) ax1.fill_between(ts2, 360 * np.ones(len(ts2)), 410 * np.ones(len(ts2)), facecolor=red, alpha=0.25) ax1.fill_between(ts1, 340 * np.ones(len(ts1)), 360 * np.ones(len(ts1)), facecolor=yellow, alpha=0.25) ax1.fill_between(ts2, 340 * np.ones(len(ts2)), 360 * np.ones(len(ts2)), facecolor=yellow, alpha=0.25) ax1.plot_date(ts1, Q_const_cap[0:len(ts1)], '-', c=red, label='Scenario 1') ax1.plot_date(ts2, Q_const_cap[len(ts1):], '-', c=red) ax1.plot_date(ts1, Q_dyn_cap[0:len(ts1)], '-', c=green, lw=1, label='Scenario 2') ax1.plot_date(ts2, Q_dyn_cap[len(ts1):], '-', c=green, lw=1) ax1.plot_date(ts1 + ts2, 410 * np.ones(len(ts1 + ts2)), '--', c=red, lw=1) handles, labels = ax1.get_legend_handles_labels() hl = sorted(zip(handles, labels), key=operator.itemgetter(1)) handles2, labels2 = zip(*hl) ax1.legend(handles2, labels2, loc=0, prop={'size': 8}) ax2.plot_date(ts1, T_sup_const_cap[0:len(ts1)], '-', c=red, label='Scenario 1') ax2.plot_date(ts2, T_sup_const_cap[len(ts1):], '-', c=red) ax2.plot_date(ts1, T_sup_dyn_cap[0:len(ts1)], '-', c=green, lw=1, label='Scenario 2') ax2.plot_date(ts2, T_sup_dyn_cap[len(ts1):], '-', c=green, lw=1) ax2.legend(loc=6, prop={'size': 8}) T_grnd = 6.4 heat_loss_reduction = 100 * (1 - (np.array(T_sup_dyn_cap) - T_grnd) / (np.array(T_sup_const_cap) - T_grnd)) heat_loss_reduction_half_model_unc = 100 * ( 1 - (np.array(T_sup_dyn_cap_half_model_unc) - T_grnd) / (np.array(T_sup_const_cap) - T_grnd)) redu_heat_loss1 = heat_loss_reduction[0:len(ts1)] redu_heat_loss2 = heat_loss_reduction[len(ts1):] ax3.plot_date(ts1, redu_heat_loss1, '-', c=blue, lw=1) ax3.plot_date(ts2, redu_heat_loss2, '-', c=blue, lw=1) ax3.xaxis.set_major_formatter(DateFormatter('%b %d \n %Y')) ax1.tick_params(axis='y', which='major', labelsize=8) ax1.set_ylim(150, 450) ax2.tick_params(axis='y', which='major', labelsize=8) ax3.tick_params(axis='y', which='major', labelsize=8) ax1.set_ylabel(u'Flow rate [m%s/h]' % uni_tothethird, size=8) ax2.set_ylabel(u'Supply\ntemperature [%sC]' % uni_degree, size=8) ax3.set_ylabel('Heat loss\nreduction [%]', size=8) mjloc = mpl.ticker.MultipleLocator(1) ax3.yaxis.set_major_locator(mjloc) ax3.set_xlim(dt.datetime(2015, 12, 17, 0), dt.datetime(2016, 2, 5, 0)) fig.tight_layout() fig.savefig( 'Q:/Projekter/Ens Article 1/figures/Q_T_heatloss_timeseries.pdf') return heat_loss_reduction, heat_loss_reduction_half_model_unc
# -*- coding: utf-8 -*- """ Created on Thu Jul 28 11:04:19 2016 @author: azfv1n8 """ import datetime as dt import numpy as np import pandas as pd import ensemble_tools as ens import sql_tools as sq from model_selection import linear_map, mlin_regression, gen_all_combinations, summary_to_file, mae, mape, rmse #%% fit_ts = ens.gen_hourly_timesteps(dt.datetime(2015,12,17,1), dt.datetime(2016,1,15,0)) vali_ts = ens.gen_hourly_timesteps(dt.datetime(2016,1,20,1), dt.datetime(2016,2,5,0)) test_ts = ens.gen_hourly_timesteps(dt.datetime(2016,2,5,1), dt.datetime(2016,3,1,0)) all_ts = fit_ts + vali_ts + test_ts weathervars=['Tout', 'vWind', 'sunRad', 'hum'] fit_data = pd.DataFrame() vali_data = pd.DataFrame() test_data = pd.DataFrame() fit_data['prod24h_before'] = sq.fetch_production(fit_ts[0]+dt.timedelta(days=-1), fit_ts[-1]+dt.timedelta(days=-1)) vali_data['prod24h_before'] = sq.fetch_production(vali_ts[0]+dt.timedelta(days=-1), vali_ts[-1]+dt.timedelta(days=-1)) test_data['prod24h_before'] = sq.fetch_production(test_ts[0]+dt.timedelta(days=-1), test_ts[-1]+dt.timedelta(days=-1))
def main(argv): plt.close('all') try: station = argv[0] if not station in PI_T_sup_dict.keys(): print "Wrong station, use rundhoej, holme or hoerning" return except: print "No station provided. Defaults to holme." station = 'holme' print station plt.close('all') #%% fit_ts = ens.gen_hourly_timesteps(dt.datetime(2015,12,17,1), dt.datetime(2016,1,15,0)) vali_ts = ens.gen_hourly_timesteps(dt.datetime(2016,1,20,1), dt.datetime(2016,2,5,0)) test_ts = ens.gen_hourly_timesteps(dt.datetime(2016,2,5,1), dt.datetime(2016,4,1,0)) all_ts = fit_ts + vali_ts + test_ts weathervars=['Tout', 'vWind', 'sunRad', 'hum'] fit_data = pd.DataFrame() vali_data = pd.DataFrame() test_data = pd.DataFrame() cons_key = sq.consumption_place_key_dict[station] fit_data['cons24h_before'] = sq.fetch_consumption(cons_key, fit_ts[0]+dt.timedelta(days=-1), fit_ts[-1]+dt.timedelta(days=-1)) vali_data['cons24h_before'] = sq.fetch_consumption(cons_key, vali_ts[0]+dt.timedelta(days=-1), vali_ts[-1]+dt.timedelta(days=-1)) test_data['cons24h_before'] = sq.fetch_consumption(cons_key, test_ts[0]+dt.timedelta(days=-1), test_ts[-1]+dt.timedelta(days=-1)) fit_data['cons'] = sq.fetch_consumption(cons_key, fit_ts[0], fit_ts[-1]) vali_data['cons'] = sq.fetch_consumption(cons_key, vali_ts[0], vali_ts[-1]) test_data['cons'] = sq.fetch_consumption(cons_key, test_ts[0], test_ts[-1]) for v in weathervars: fit_data['%s24hdiff'%v] = ens.load_ens_timeseries_as_df(\ ts_start=fit_ts[0],\ ts_end=fit_ts[-1], \ weathervars=[v]).mean(axis=1) \ - ens.load_ens_timeseries_as_df(\ ts_start=fit_ts[0]+dt.timedelta(days=-1),\ ts_end=fit_ts[-1]+dt.timedelta(days=-1), \ weathervars=[v]).mean(axis=1) vali_data['%s24hdiff'%v] = ens.load_ens_timeseries_as_df(\ ts_start=vali_ts[0],\ ts_end=vali_ts[-1], \ weathervars=[v]).mean(axis=1) \ - ens.load_ens_timeseries_as_df(\ ts_start=vali_ts[0]+dt.timedelta(days=-1),\ ts_end=vali_ts[-1]+dt.timedelta(days=-1), \ weathervars=[v]).mean(axis=1) test_data['%s24hdiff'%v] = ens.load_ens_timeseries_as_df(\ ts_start=test_ts[0],\ ts_end=test_ts[-1], \ weathervars=[v]).mean(axis=1) \ - ens.load_ens_timeseries_as_df(\ ts_start=test_ts[0]+dt.timedelta(days=-1),\ ts_end=test_ts[-1]+dt.timedelta(days=-1), \ weathervars=[v]).mean(axis=1) #%% all_data = pd.concat([fit_data, vali_data, test_data]) no_blind_data = pd.concat([fit_data, vali_data]) corr = no_blind_data.corr() fit_y = fit_data['cons'] columns = ['cons24h_before', 'Tout24hdiff', 'vWind24hdiff', 'sunRad24hdiff'] X = fit_data[columns] res = mlin_regression(fit_y,X, add_const=False) fiterr = res.fittedvalues - fit_y print "Errors fit period: ", rmse(fiterr), mae(fiterr), mape(fiterr, fit_y) vali_pred = linear_map(vali_data, res.params, columns) valierr = vali_pred - vali_data['cons'] print "Errors validation period: ", rmse(valierr), mae(valierr), mape(valierr, vali_data['cons']) test_pred = linear_map(test_data, res.params, columns) testerr = test_pred - test_data['cons'] print "Errors test period: ", rmse(testerr), mae(testerr), mape(testerr, test_data['cons']) plt.figure() plt.plot_date(all_ts, all_data['cons'], 'k-') plt.plot_date(all_ts, np.concatenate([res.fittedvalues, vali_pred, test_pred]), 'r-')
def autocorr(x): result = np.correlate(x, x, mode='full') return result[result.size/2:] def autocorr2(x, lag=1): rho = np.corrcoef(x, np.roll(x,lag))[0,1] return rho def my_diff(x, lag=24): return x-np.roll(x,lag) ts = ens.gen_hourly_timesteps(dt.datetime(2013, 1, 1, 1), dt.datetime(2016,1,1,0)) prod = sq.fetch_production(ts[0], ts[-1]) norm_prod = (prod-prod.mean())/prod.std() plt.plot_date(ts, prod, '-') auto_c = autocorr(norm_prod) rho_i = [autocorr2(prod, i) for i in range(2*168)] prod_24h_diff = my_diff(prod) rho2 = [autocorr2(prod_24h_diff, i) for i in range(2*168)] prod_48h_diff = my_diff(prod, 48)
def main(argv): plt.close('all') try: station = argv[0] no_sigma = argv[1] if not station in PI_T_sup_dict.keys(): print "Use rundhoej, holme or hoerning and a float for the uncertainty bound" return except: print "No station provided. Defaults to holme, no_sigma=2" station = 'holme' no_sigma=2 print station, no_sigma # old tsstart dt.datetime(2014,12,17,1) ts1 = ens.gen_hourly_timesteps(dt.datetime(2015,3,1,1), dt.datetime(2016,1,15,0)) ts2 = ens.gen_hourly_timesteps(dt.datetime(2016,1,19,1), dt.datetime(2016,3,1,0)) all_ts = ts1 + ts2 df = pd.DataFrame(index=all_ts) if station == 'holme': PI_Q1 = PI_Q_dict[station] PI_Q2 = PI_Q_dict2[station] df['Q1']=np.concatenate([sq.fetch_hourly_vals_from_PIno(PI_Q1, ts1[0], ts1[-1]),\ sq.fetch_hourly_vals_from_PIno(PI_Q1, ts2[0], ts2[-1])]) df['Q2']=np.concatenate([sq.fetch_hourly_vals_from_PIno(PI_Q2, ts1[0], ts1[-1]),\ sq.fetch_hourly_vals_from_PIno(PI_Q2, ts2[0], ts2[-1])]) df['Q'] = df['Q1']+df['Q2'] else: PI_Q = PI_Q_dict[station] df['Q']=np.concatenate([sq.fetch_hourly_vals_from_PIno(PI_Q, ts1[0], ts1[-1]),\ sq.fetch_hourly_vals_from_PIno(PI_Q, ts2[0], ts2[-1])]) PI_T_sup = PI_T_sup_dict[station] df['T_sup']=np.concatenate([sq.fetch_hourly_vals_from_PIno(PI_T_sup, ts1[0], \ ts1[-1]),sq.fetch_hourly_vals_from_PIno(PI_T_sup, ts2[0], ts2[-1])]) PI_T_ret = PI_T_ret_dict[station] df['T_ret']=np.concatenate([sq.fetch_hourly_vals_from_PIno(PI_T_ret, ts1[0], \ ts1[-1]),sq.fetch_hourly_vals_from_PIno(PI_T_ret, ts2[0], ts2[-1])]) df['ts'] = all_ts df['cons'] = specific_heat_water*density_water*df['Q']*(df['T_sup']-df['T_ret']) Tout1 = sq.fetch_BrabrandSydWeather('Tout', ts1[0], ts1[-1]) Tout2 = sq.fetch_BrabrandSydWeather('Tout', ts2[0], ts2[-1]) Tout = np.concatenate([Tout1, Tout2]) Tout_low_pass = [Tout[range(i-23,i+1)].mean() for i in range(len(Tout))] df['Toutsmooth'] = Tout_low_pass Tsup_vs_Tout(df, station) #%% fitting and testing consumption prediction fit_data, vali_data, test_data, all_data = load_cons_model_dfs(df) fit_y = fit_data['cons'] columns = ['cons24hbefore', 'Tout24hdiff', 'vWind24hdiff', 'sunRad24hdiff'] X = fit_data[columns] res = mlin_regression(fit_y,X, add_const=False) fiterr = res.fittedvalues - fit_y print "Errors fit period: ", rmse(fiterr), mae(fiterr), mape(fiterr, fit_y) vali_pred = linear_map(vali_data, res.params, columns) valierr = vali_pred - vali_data['cons'] print "Errors validation period: ", rmse(valierr), mae(valierr), mape(valierr, vali_data['cons']) test_pred = linear_map(test_data, res.params, columns) testerr = test_pred - test_data['cons'] print "Errors test period: ", rmse(testerr), mae(testerr), mape(testerr, test_data['cons']) plt.figure() ens_dfs = load_cons_model_ens_dfs(df) ens_preds = np.empty((len(ens_dfs[0]), len(ens_dfs))) for edf, i in zip(ens_dfs, range(len(ens_dfs))): ens_pred = linear_map(edf, res.params, columns) ens_preds[:,i] = ens_pred plt.plot_date(all_data.index, ens_pred, 'grey', lw=0.5) ens_preds = pd.DataFrame(ens_preds, index=all_data.index) plt.plot_date(all_data.index, all_data['cons'], 'k-', lw=2) plt.plot_date(all_data.index, np.concatenate([res.fittedvalues, vali_pred, test_pred]), 'r-', lw=2) plt.title(station + ' forecasts of consumption') nonfit_errors = pd.concat([valierr, testerr]) all_pred = np.concatenate([res.fittedvalues, vali_pred, test_pred]) all_pred = pd.Series(all_pred, index=all_data.index) print res.summary() #%% TminofTout_fun = get_TminofTout_func(df, station, frac_below = 0.005) sim_input = df.ix[all_data.index] sim_input['T_ret1hbefore'] = np.roll(sim_input['T_ret'], 1) sim_input['cons_pred'] = all_pred sc2_errormargin = pd.Series(no_sigma*np.ones(len(sim_input))*nonfit_errors.std(), index=sim_input.index) nonfit_ts_start = vali_data.index[0] nonfit_ts_end = test_data.index[-1] quantile_sc2 = 1. - percent_above_forecasterrormargin(\ sc2_errormargin.loc[nonfit_ts_start:nonfit_ts_end], \ sim_input.loc[nonfit_ts_start:nonfit_ts_end, 'cons_pred'], \ sim_input.loc[nonfit_ts_start:nonfit_ts_end,'cons']) sc3_model_uncert = model_based_uncertainty_alaGorm(\ ens_preds.loc[nonfit_ts_start:nonfit_ts_end], \ sim_input.loc[nonfit_ts_start:nonfit_ts_end, 'cons_pred'], \ sim_input.loc[nonfit_ts_start:nonfit_ts_end, 'cons'], no_sigma, quantile_sc2) sc3_errormargin = pd.Series(no_sigma*ens_preds.std(axis=1) + sc3_model_uncert, index=sim_input.index) sig_m = model_based_sigma_alaChi2(ens_preds.loc[nonfit_ts_start:nonfit_ts_end], sim_input.loc[nonfit_ts_start:nonfit_ts_end, 'cons_pred'], sim_input.loc[nonfit_ts_start:nonfit_ts_end, 'cons']) sig_t = np.sqrt(ens_preds.std(axis=1)**2+sig_m**2) sc35scale = total_uncertainty_scale_alaChi2(\ ens_preds.loc[nonfit_ts_start:nonfit_ts_end],\ sim_input.loc[nonfit_ts_start:nonfit_ts_end, 'cons_pred'],\ sim_input.loc[nonfit_ts_start:nonfit_ts_end, 'cons'],\ quantile_sc2) print sig_m #sc35_errormargin = pd.Series(no_sigma*np.sqrt(ens_preds.std(axis=1)**2+sig_m**2), index=sim_input.index) sc35_errormargin = pd.Series(sc35scale*sig_t, index=sim_input.index) use_sc35 = False if use_sc35: sc3_errormargin = sc35_errormargin sim_results_sc2 = simulate_operation(sim_input, sc2_errormargin, TminofTout_fun, station) sim_results_sc3 = simulate_operation(sim_input, sc3_errormargin, TminofTout_fun, station) #%% synthetic consumption, controlled variable model uncertainty model_stds = [0.5*sim_input['cons'].std(), 0.1*sim_input['cons'].std(), 0.05*sim_input['cons'].std()]# sim_input['cons'].std()*np.linspace(0,1,10) sc2_synth_results = [] sc3_synth_results = [] model_uncerts = [] for model_std in model_stds: synth_cons = gen_synthetic_cons(ens_preds, sim_input['cons_pred'], model_std) sim_input_synth = sim_input.copy(deep=True) sim_input_synth['cons'] = synth_cons synth_resid = sim_input_synth.loc[nonfit_ts_start:nonfit_ts_end, 'cons_pred'] - sim_input_synth.loc[nonfit_ts_start:nonfit_ts_end, 'cons'] sc2_errormargin_synth = pd.Series(no_sigma*np.ones(len(sim_input_synth))*synth_resid.std(), index=sim_input_synth.index) quantile_sc2_synth = 1. - percent_above_forecasterrormargin(\ sc2_errormargin_synth.loc[nonfit_ts_start:nonfit_ts_end], \ sim_input_synth.loc[nonfit_ts_start:nonfit_ts_end, 'cons_pred'], \ sim_input_synth.loc[nonfit_ts_start:nonfit_ts_end,'cons']) print "Sc2 q: ", quantile_sc2_synth sc3_model_uncert_synth = model_based_uncertainty_alaGorm(\ ens_preds.loc[nonfit_ts_start:nonfit_ts_end], \ sim_input_synth.loc[nonfit_ts_start:nonfit_ts_end, 'cons_pred'], \ sim_input_synth.loc[nonfit_ts_start:nonfit_ts_end, 'cons'], no_sigma, quantile_sc2_synth) model_uncerts.append(sc3_model_uncert_synth) sc3_errormargin_synth = pd.Series(no_sigma*ens_preds.std(axis=1) + sc3_model_uncert_synth, index=sim_input_synth.index) sim_results_sc2_synth = simulate_operation(sim_input_synth, sc2_errormargin_synth, TminofTout_fun, station) sim_results_sc3_synth = simulate_operation(sim_input_synth, sc3_errormargin_synth, TminofTout_fun, station) sc2_synth_results.append(sim_results_sc2_synth) sc3_synth_results.append(sim_results_sc3_synth) mean_Tsupdiff = [] mean_heatlossreduced = [] for sc2_res, sc3_res in zip(sc2_synth_results, sc3_synth_results): mean_Tsupdiff.append(np.mean(sc2_res['T_sup'] - sc3_res['T_sup'])) mean_heatlossreduced.append(np.mean(100*(1-(sc3_res['T_sup']-T_grnd)/(sc2_res['T_sup'] - T_grnd)))) plt.figure() plt.plot(model_uncerts, mean_Tsupdiff, 'k.') plt.title('Mean temp reduction vs model uncert.') print "Perc above errormargin, sc2: ", percent_above_forecasterrormargin(\ sc2_errormargin.loc[nonfit_ts_start:nonfit_ts_end], \ sim_input.loc[nonfit_ts_start:nonfit_ts_end, 'cons_pred'], \ sim_input.loc[nonfit_ts_start:nonfit_ts_end,'cons']) print "Perc above errormargin, sc3: ", percent_above_forecasterrormargin(sc3_errormargin.loc[nonfit_ts_start:nonfit_ts_end], \ sim_input.loc[nonfit_ts_start:nonfit_ts_end, 'cons_pred'], \ sim_input.loc[nonfit_ts_start:nonfit_ts_end,'cons']) print "mean errormargin, sc2: ", sc2_errormargin.mean() print "mean errormargin, sc3: ", sc3_errormargin.mean() print "rms errormargin, sc2: ", rmse(sc2_errormargin) print "rms errormargin, sc3: ", rmse(sc3_errormargin) print "Synth Perc above errormargin, sc2: ", percent_above_forecasterrormargin(\ sc2_errormargin_synth.loc[nonfit_ts_start:nonfit_ts_end], \ sim_input_synth.loc[nonfit_ts_start:nonfit_ts_end, 'cons_pred'], \ sim_input_synth.loc[nonfit_ts_start:nonfit_ts_end,'cons']) print "Synth Perc above errormargin, sc3: ", percent_above_forecasterrormargin(sc3_errormargin_synth.loc[nonfit_ts_start:nonfit_ts_end], \ sim_input_synth.loc[nonfit_ts_start:nonfit_ts_end, 'cons_pred'], \ sim_input_synth.loc[nonfit_ts_start:nonfit_ts_end,'cons']) print "Synth mean errormargin, sc2: ", sc2_errormargin_synth.mean() print "Synth mean errormargin, sc3: ", sc3_errormargin_synth.mean() print "Synth rms errormargin, sc2: ", rmse(sc2_errormargin_synth) print "Synth rms errormargin, sc3: ", rmse(sc3_errormargin_synth) #% error margins: fig_error_margins(sc2_errormargin, sc3_errormargin, sim_input, sc3_model_uncert, station, no_sigma) fig_error_margins(sc2_errormargin_synth, sc3_errormargin_synth, sim_input_synth, sc3_model_uncert_synth, station, no_sigma) sns.jointplot(np.abs(nonfit_errors), ens_preds.loc[nonfit_ts_start:nonfit_ts_end].std(axis=1)) sns.jointplot(np.abs(synth_resid), ens_preds.loc[nonfit_ts_start:nonfit_ts_end].std(axis=1)) #% T Q scatter plots fig, axes = plt.subplots(3,1, figsize=(10,16), sharex=True, sharey=True) axes[0].scatter(sim_input['T_sup'], sim_input['Q'], c=sim_input['cons']) axes[0].set_title(station + ': ' + 'Scenario 1') axes[1].scatter(sim_results_sc2['T_sup'], sim_results_sc2['Q'], c=sim_results_sc2['cons']) axes[1].set_title(station + ': Scenario 2: ' + str(no_sigma) + r'$\sigma$' ) axes[2].scatter(sim_results_sc3['T_sup'], sim_results_sc3['Q'], c=sim_results_sc3['cons']) axes[2].set_title(station + ': Scenario 3: ' + str(no_sigma) + r'$\sigma$') axes[1].set_ylabel(u'Water flow rate [m%s/h]'%uni_tothethird, size=8) axes[2].set_xlabel(u'Supply temperature [%sC]'%uni_degree, size=8) fig.tight_layout() fig.savefig(figpath + 'TQscatter_%2.2f'%(no_sigma) + 'sigma_' + station + '.pdf') # T_sup time series fig fig, axes = plt.subplots(3,1, figsize=(15,15), sharex=True) axes[0].plot_date(sim_input.index, sim_input['T_sup'], 'k-', label='Scenario 1') axes[0].plot_date(sim_input.index, sim_results_sc2['T_sup'], 'r-', lw=3, label='Scenario 2') axes[0].plot_date(sim_input.index, sim_results_sc2['T_sup'], 'g-', label='Scenario 3') axes[0].set_title(station + ', ' + str(no_sigma) + r'$\sigma$' + ': Supply temperature') axes[0].set_ylabel(u'Supply temperature [%sC]'%uni_degree, size=8) axes[0].legend() axes[1].plot_date(sim_input.index, sim_input['Q'], 'k-', label='Scenario 1' ) axes[1].plot_date(sim_input.index, sim_results_sc2['Q'], 'r-', label='Scenario 2') axes[1].plot_date(sim_input.index, sim_results_sc2['Q_ref'], 'b-', lw=1, label=r'$Q_{ref}$' + 'Scenario 2') axes[1].set_ylabel(u'Water flow rate [m%s/h]'%uni_tothethird, size=8) axes[1].legend() axes[2].plot_date(sim_input.index, sim_input['Q'], 'k-', label='Scenario 1' ) axes[2].plot_date(sim_input.index, sim_results_sc3['Q'], 'g-', label='Scenario 3') axes[2].plot_date(sim_input.index, sim_results_sc3['Q_ref'], 'b-', lw=1, label=r'$Q_{ref}$' + 'Scenario 3') axes[2].set_ylabel(u'Water flow rate [m%s/h]'%uni_tothethird, size=8) axes[2].legend() fig.savefig(figpath + 'TQtimeseries_%2.2f'%(no_sigma) + 'sigma_' + station + '.pdf') # Differencen in supply temperature between the scenarios fig_heat_loss(sim_input, sim_results_sc2, sim_results_sc3, station, no_sigma) fig_heat_loss(sim_input_synth, sim_results_sc2_synth, sim_results_sc3_synth, station, no_sigma, save=False) return #%% The below section only runs if we view Tmin as a function of Q (the old way) # note: SOME OF THIS USES CONSTANT TRET!! TminofQ = False if TminofQ: # outlierdetection X = df[['T_sup','Q']] outlier_detection = False if outlier_detection: detect_outliers(X, station) else: inlierpred = np.ones(len(df), dtype=bool) fig, ax1 = plt.subplots() ax2 = ax1.twinx() cond_df = df ax1.plot_date(np.array(cond_df['ts']), np.array(cond_df['Q']), 'b') ax2.plot_date(np.array(cond_df['ts']), np.array(cond_df['T_sup']), 'r-') plt.figure() plt.plot_date(df['ts'], df['cons'], 'g-') plt.title(station) plt.figure() plt.scatter(df['T_sup'], df['Q'], c=df['cons'], alpha=0.25) plt.colorbar() plt.title(station) outliers = df[np.logical_not(inlierpred)] plt.plot(np.array(outliers['T_sup']), np.array(outliers['Q']), 'ko') #%% #plot_Tmin_Q_quantiles(df, inlierpred) Q = np.linspace(df[inlierpred]['Q'].min(), df[inlierpred]['Q'].max(), 500) qs = [0.001, 0.005, 0.01, 0.02275, 0.05, 0.1] for q in qs: T_min_func, Q_quantiles = get_Tmin_func(df[inlierpred],T_min_q=q, N_Q_q=21) plt.plot(T_min_func(Q), Q, label=str(q), lw=2) plt.legend() for Q_qua in Q_quantiles: plt.axhline(y=Q_qua) #%% P vs Q (T=Tmin(Q)) T_min_func, Q_quantiles = get_Tmin_func(df, T_min_q=0.02275, N_Q_q=21) plt.figure() plt.plot(Q, T_min_func(Q), 'r', label='Tmin') P = specific_heat_water*density_water*Q*(T_min_func(Q)-T_ret) plt.plot(Q, P, 'b', label='Cons') plt.xlabel('Q') plt.legend() plt.figure() simP = df['cons'] res = [op_model(cons, T_min_func, Q_max=Q_max_dict[station], T_ret=T_ret) for cons in simP] simT, simQ = zip(*res) plt.scatter(df['T_sup'], df['Q'], c='k', alpha=0.1) plt.scatter(simT,simQ,c=simP) plt.colorbar()
pointcode=71699) df['%s%ihdiff' % (v, timeshift)] = ens_mean - ens_mean_before return df reload_data = True if reload_data: timelags = [48, 60, 168] all_data = gen_fit_df(dt.datetime(2016, 1, 26, 1), dt.datetime(2016, 4, 1, 0), ['Tout', 'vWind', 'hum', 'sunRad'], timelags) y = all_data['prod'] #%% ts = ens.gen_hourly_timesteps(dt.datetime(2016, 1, 26, 1), dt.datetime(2016, 4, 1, 0)) X = all_data.ix[:, all_data.columns != 'prod'] X.to_pickle('48h60h168h_lagged_X.pkl') y.to_pickle('prod_to_gowith.pkl') #%% lr = linear_model.LinearRegression(fit_intercept=False) predicted = cross_val_predict(lr, X, y, cv=25) plt.figure() plt.plot(y) plt.plot(predicted, 'r') sns.jointplot(pd.Series(predicted), y) score = cross_val_score(lr, X, y, cv=25, scoring='mean_absolute_error')
def hoerning_pump_model(): # figure 4 # simple model T1 = 68.5 a2 = 15.5 a3 = 2.1 b2 = 295-a2*T1 b3 = 340-a3*71.4 def Q_from_cons_lin_piece(cons, a, b): B = -(b+a*T_ret)/a C = -cons/(specific_heat_water*density_water) A = 1/a Qplus = (-B+np.sqrt(B**2 - 4*A*C))/(2*A) return Qplus def get_Tsup_and_Q(cons, Q_ub): # try lowes possible T Q = cons/(specific_heat_water*density_water*(T1 - T_ret)) if Q <= 295: return T1, Q elif Q > 295: Q = Q_from_cons_lin_piece(cons, a2, b2) if Q <= Q_ub*(340./360): T = (Q - b2)/a2 return T, Q elif Q >= Q_ub*(340./360): b3_adjusted = b3 + (Q_ub*(340./360) - 340) Q = Q_from_cons_lin_piece(cons, a3, b3_adjusted) if Q <= Q_ub: T = (Q - b3_adjusted)/a3 return T, Q elif Q > Q_ub: Q = Q_ub T = cons/(specific_heat_water*density_water*Q) + T_ret return T, Q plt.close('all') fig, [ax1, ax2] = plt.subplots(2,1,sharex=True, sharey=True) ts1 = ens.gen_hourly_timesteps(dt.datetime(2015,12,17,1), dt.datetime(2016,1,15,0)) ts2 = ens.gen_hourly_timesteps(dt.datetime(2016,1,20,1), dt.datetime(2016,2,5,0)) all_ts = ts1 + ts2 PI_T_sup = '4.146.120.29.HA.101' PI_Q = 'K.146A.181.02.HA.101' specific_heat_water = 1.17e-6 # MWh/kgKelvin density_water = 980 # kg/m3 at 65 deg C T_ret = 36.5 df = pd.DataFrame() df['T_sup']=np.concatenate([sq.fetch_hourly_vals_from_PIno(PI_T_sup, ts1[0], \ ts1[-1]),sq.fetch_hourly_vals_from_PIno(PI_T_sup, ts2[0], ts2[-1])]) df['Q']=np.concatenate([sq.fetch_hourly_vals_from_PIno(PI_Q, ts1[0], ts1[-1]),\ sq.fetch_hourly_vals_from_PIno(PI_Q, ts2[0], ts2[-1])]) df['ts'] = all_ts df['cons'] = specific_heat_water*density_water*df['Q']*(df['T_sup']-T_ret) model_conf_int = np.load('combined_conf_int.npz')['combined_conf_int'] assert(list(np.load('combined_conf_int.npz')['timesteps'])==all_ts), "confidence intervals don't have matching time steps" const_Q_ub = 360 Q_const_cap = [] T_sup_const_cap = [] Q_dyn_cap = [] T_sup_dyn_cap = [] dyn_Q_ub = [] for c, model_uncertainty in zip(df['cons'], model_conf_int): T_const, Q_const = get_Tsup_and_Q(c, const_Q_ub) Q_const_cap.append(Q_const) T_sup_const_cap.append(T_const) Q_ub = 410 - (410-const_Q_ub)*(model_uncertainty/np.max(model_conf_int)) dyn_Q_ub.append(Q_ub) T_dyn, Q_dyn = get_Tsup_and_Q(c, Q_ub) Q_dyn_cap.append(Q_dyn) T_sup_dyn_cap.append(T_dyn) dT=0.1 ax1.fill_between([65+dT,95-dT], [410, 410], [360, 360], facecolor=red, alpha=0.25) ax1.fill_between([65+dT,95-dT], [360, 360],[340, 340], facecolor=yellow, alpha=0.25) ax1.fill_between([T1, 71.4, 80.9, 100], [295, 340, 360, 360], color='k', edgecolor='k', alpha=0.2, linewidth=1) ax2.fill_between([65+dT,95-dT], [410, 410], [360, 360], facecolor=red, alpha=0.25) ax2.fill_between([65+dT,95-dT], [360, 360],[340, 340], facecolor=yellow, alpha=0.25) ax2.fill_between([T1, 71.4, 80.9, 100], [295, 340, 360, 360], color='k', edgecolor='k', alpha=0.2, linewidth=1) ax1.plot([65+dT,95-dT], [410, 410], '--', c=red, lw=2) ax1.text(79,415, 'Maximum pump capacity', size=8) #im = ax1.scatter(T_sup_const_cap, Q_const_cap, facecolors='none', cmap=plt.cm.BuPu) im = ax1.scatter(T_sup_const_cap, Q_const_cap, c=df['cons'], cmap=plt.cm.BuPu) ax2.scatter(T_sup_dyn_cap, Q_dyn_cap, c=df['cons'], cmap=plt.cm.BuPu) ax2.plot([65+dT,95-dT], [410, 410], '--', c=red, lw=2) ax2.text(79,415, 'Maximum pump capacity', size=8) cax, kw = mpl.colorbar.make_axes([ax1, ax2]) fig.colorbar(im, cax=cax) cax.set_ylabel('Delivered heat [MW]',size=8) ax2.set_xlabel(u'Supply temperature [%sC]'%uni_degree, size=8) ax1.set_ylabel(u'Water flow rate [m%s/h]'%uni_tothethird, size=8) ax2.set_ylabel(u'Water flow rate [m%s/h]'%uni_tothethird, size=8) ax1.tick_params(axis='both', which='major', labelsize=8) ax2.tick_params(axis='both', which='major', labelsize=8) cax.tick_params(axis='y', which='major', labelsize=8) ax1.set_title('Scenario 1', size=10) ax2.set_title('Scenario 2', size=10) ax1.set_xlim((65,95)) ax1.set_ylim((150,450)) fig.set_size_inches(1.15*colwidth,1.6*colwidth) fig.savefig('figures/first_articlefigs/hoerning_pump_model.pdf') # This is a theoretical calculation in case the model uncertainty was 50% of what it is statistical_conf_int = 50.90285 # this number is printed when production_model() is run (Width of const blue band (MW) ...) Q_dyn_cap_half_model_unc = [] T_sup_dyn_cap_half_model_unc = [] dyn_Q_ub_half_model_unc = [] reduced_model_conf_int = model_conf_int-0.5*statistical_conf_int for c, model_uncertainty in zip(df['cons'], reduced_model_conf_int): Q_ub = 410 - (410-const_Q_ub)*(model_uncertainty/np.max(model_conf_int)) dyn_Q_ub_half_model_unc.append(Q_ub) T_dyn, Q_dyn = get_Tsup_and_Q(c, Q_ub) Q_dyn_cap_half_model_unc.append(Q_dyn) T_sup_dyn_cap_half_model_unc.append(T_dyn) return T_sup_const_cap, T_sup_dyn_cap, Q_const_cap, Q_dyn_cap, model_conf_int, T_sup_dyn_cap_half_model_unc
# -*- coding: utf-8 -*- """ Created on Thu Jul 28 11:04:19 2016 @author: azfv1n8 """ import datetime as dt import numpy as np import pandas as pd import ensemble_tools as ens import sql_tools as sq from model_selection import linear_map, mlin_regression, gen_all_combinations, summary_to_file, mae, mape, rmse #%% fit_ts = ens.gen_hourly_timesteps(dt.datetime(2015, 12, 17, 1), dt.datetime(2016, 1, 15, 0)) vali_ts = ens.gen_hourly_timesteps(dt.datetime(2016, 1, 20, 1), dt.datetime(2016, 2, 5, 0)) test_ts = ens.gen_hourly_timesteps(dt.datetime(2016, 2, 5, 1), dt.datetime(2016, 3, 1, 0)) all_ts = fit_ts + vali_ts + test_ts weathervars = ['Tout', 'vWind', 'sunRad', 'hum'] fit_data = pd.DataFrame() vali_data = pd.DataFrame() test_data = pd.DataFrame() fit_data['prod24h_before'] = sq.fetch_production( fit_ts[0] + dt.timedelta(days=-1), fit_ts[-1] + dt.timedelta(days=-1))
def Q_T_heatloss_timeseries(): # figure 5 T_sup_const_cap, T_sup_dyn_cap, Q_const_cap, Q_dyn_cap, model_conf_int, T_sup_dyn_cap_half_model_unc = hoerning_pump_model() plt.close('all') fig, [ax1, ax2, ax3] = plt.subplots(3, 1, sharex=True, figsize=(dcolwidth, 0.65*dcolwidth), gridspec_kw={'height_ratios':[3,1,1]}) #fig, [ax1, ax2, ax3] = plt.subplots(3, 1, sharex=True, figsize=(dcolwidth, 0.55*dcolwidth)) ts1 = ens.gen_hourly_timesteps(dt.datetime(2015,12,17,1), dt.datetime(2016,1,15,0)) ts2 = ens.gen_hourly_timesteps(dt.datetime(2016,1,20,1), dt.datetime(2016,2,5,0)) red_area_lb1 = 410 - (410-360)*(model_conf_int[0:len(ts1)]/np.max(model_conf_int)) red_area_lb2 = 410 - (410-360)*(model_conf_int[len(ts1):]/np.max(model_conf_int)) yellow_area_lb1 = (340./360)*red_area_lb1 yellow_area_lb2 = (340./360)*red_area_lb2 limlw = 0.75 ax1.plot_date(ts1, red_area_lb1, '-', c=darkgrey, lw=limlw, label='Scenario 2 security margins') ax1.plot_date(ts2, red_area_lb2, '-', c=darkgrey, lw=limlw) ax1.plot_date(ts1, yellow_area_lb1, '-', c=darkgrey, lw=limlw) ax1.plot_date(ts2, yellow_area_lb2, '-', c=darkgrey, lw=limlw) ax1.fill_between(ts1, 360*np.ones(len(ts1)), 410*np.ones(len(ts1)), facecolor=red, alpha=0.25) ax1.fill_between(ts2, 360*np.ones(len(ts2)), 410*np.ones(len(ts2)), facecolor=red, alpha=0.25) ax1.fill_between(ts1, 340*np.ones(len(ts1)), 360*np.ones(len(ts1)), facecolor=yellow, alpha=0.25) ax1.fill_between(ts2, 340*np.ones(len(ts2)), 360*np.ones(len(ts2)), facecolor=yellow, alpha=0.25) ax1.plot_date(ts1, Q_const_cap[0:len(ts1)], '-', c=red, label='Scenario 1') ax1.plot_date(ts2, Q_const_cap[len(ts1):], '-', c=red) ax1.plot_date(ts1, Q_dyn_cap[0:len(ts1)], '-', c=green, lw=1, label='Scenario 2') ax1.plot_date(ts2, Q_dyn_cap[len(ts1):], '-', c=green, lw=1) ax1.plot_date(ts1+ts2, 410*np.ones(len(ts1+ts2)), '--', c=red, lw=1) handles, labels = ax1.get_legend_handles_labels() hl = sorted(zip(handles, labels), key=operator.itemgetter(1)) handles2, labels2 = zip(*hl) ax1.legend(handles2, labels2, loc=0, prop={'size':8}) ax2.plot_date(ts1, T_sup_const_cap[0:len(ts1)], '-', c=red, label='Scenario 1') ax2.plot_date(ts2, T_sup_const_cap[len(ts1):], '-', c=red) ax2.plot_date(ts1, T_sup_dyn_cap[0:len(ts1)], '-', c=green, lw=1, label='Scenario 2') ax2.plot_date(ts2, T_sup_dyn_cap[len(ts1):], '-', c=green, lw=1) ax2.legend(loc=6, prop={'size':8}) T_grnd = 6.4 heat_loss_reduction = 100*(1 - (np.array(T_sup_dyn_cap) - T_grnd)/(np.array(T_sup_const_cap) - T_grnd)) heat_loss_reduction_half_model_unc = 100*(1 - (np.array(T_sup_dyn_cap_half_model_unc) - T_grnd)/(np.array(T_sup_const_cap) - T_grnd)) redu_heat_loss1 = heat_loss_reduction[0:len(ts1)] redu_heat_loss2 = heat_loss_reduction[len(ts1):] ax3.plot_date(ts1, redu_heat_loss1, '-', c=blue, lw=1) ax3.plot_date(ts2, redu_heat_loss2, '-', c=blue, lw=1) ax3.xaxis.set_major_formatter(DateFormatter('%b %d \n %Y') ) ax1.tick_params(axis='y', which='major', labelsize=8) ax1.set_ylim(150,450) ax2.tick_params(axis='y', which='major', labelsize=8) ax3.tick_params(axis='y', which='major', labelsize=8) ax1.set_ylabel(u'Flow rate [m%s/h]'%uni_tothethird, size=8) ax2.set_ylabel(u'Supply\ntemperature [%sC]'%uni_degree, size=8) ax3.set_ylabel('Heat loss\nreduction [%]', size=8) mjloc = mpl.ticker.MultipleLocator(1) ax3.yaxis.set_major_locator(mjloc) ax3.set_xlim(dt.datetime(2015,12,17,0), dt.datetime(2016,2,5,0)) fig.tight_layout() fig.savefig('Q:/Projekter/Ens Article 1/figures/Q_T_heatloss_timeseries.pdf') return heat_loss_reduction, heat_loss_reduction_half_model_unc
def second_ens_prod_fig(): """ This plot is based on a production model taking into account: the production 24 hours before as well as the change in temparature, windspeed and solar radiotion from 24 hours ago to now. """ plt.close('all') cols = ['prod24h_before', 'Tout24hdiff', 'vWind24hdiff', 'sunRad24hdiff'] ts1 = ens.gen_hourly_timesteps(dt.datetime(2015, 12, 17, 1), dt.datetime(2016, 1, 15, 0)) ts2 = ens.gen_hourly_timesteps(dt.datetime(2016, 1, 20, 1), dt.datetime(2016, 2, 5, 0)) #load the data fit_data = ens.repack_ens_mean_as_df() fit_data['prod24h_before'] = sq.fetch_production( ts1[0] + dt.timedelta(days=-1), ts1[-1] + dt.timedelta(days=-1)) fit_data['Tout24hdiff'] = fit_data['Tout'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts1[0]+dt.timedelta(days=-1),\ ts_end=ts1[-1]+dt.timedelta(days=-1), \ weathervars=['Tout']).mean(axis=1) fit_data['vWind24hdiff'] = fit_data['vWind'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts1[0]+dt.timedelta(days=-1),\ ts_end=ts1[-1]+dt.timedelta(days=-1), \ weathervars=['vWind']).mean(axis=1) fit_data['sunRad24hdiff'] = fit_data['sunRad'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts1[0]+dt.timedelta(days=-1),\ ts_end=ts1[-1]+dt.timedelta(days=-1), \ weathervars=['sunRad']).mean(axis=1) vali_data = ens.repack_ens_mean_as_df(ts2[0], ts2[-1]) vali_data['prod24h_before'] = sq.fetch_production( ts2[0] + dt.timedelta(days=-1), ts2[-1] + dt.timedelta(days=-1)) vali_data['Tout24hdiff'] = vali_data['Tout'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts2[0]+dt.timedelta(days=-1),\ ts_end=ts2[-1]+dt.timedelta(days=-1), \ weathervars=['Tout']).mean(axis=1) vali_data['vWind24hdiff'] = vali_data['vWind'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts2[0]+dt.timedelta(days=-1),\ ts_end=ts2[-1]+dt.timedelta(days=-1), \ weathervars=['vWind']).mean(axis=1) vali_data['sunRad24hdiff'] = vali_data['sunRad'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts2[0]+dt.timedelta(days=-1),\ ts_end=ts2[-1]+dt.timedelta(days=-1), \ weathervars=['sunRad']).mean(axis=1) # correct error in production: new_val = (vali_data['prod'][116] + vali_data['prod'][116]) / 2 vali_data['prod'][116] = new_val vali_data['prod'][117] = new_val vali_data['prod24h_before'][116 + 24] = new_val vali_data['prod24h_before'][117 + 24] = new_val # do the fit X = fit_data[cols] y = fit_data['prod'] res = mlin_regression(y, X, add_const=False) fig, [ax1, ax2] = plt.subplots(2, 1, figsize=(40, 20)) # load ensemble data ens_data1 = ens.load_ens_timeseries_as_df(ts_start=ts1[0], ts_end=ts1[-1],\ weathervars=['Tout', 'vWind', 'sunRad']) ens_data1['prod24h_before'] = fit_data['prod24h_before'] ens_data1_24h_before = ens.load_ens_timeseries_as_df(\ ts_start=ts1[0]+dt.timedelta(days=-1),\ ts_end=ts1[-1]+dt.timedelta(days=-1), \ weathervars=['Tout', 'vWind', 'sunRad']) ens_data2 = ens.load_ens_timeseries_as_df(ts_start=ts2[0], ts_end=ts2[-1],\ weathervars=['Tout', 'vWind', 'sunRad']) ens_data2['prod24h_before'] = vali_data['prod24h_before'] ens_data2_24h_before = ens.load_ens_timeseries_as_df(\ ts_start=ts2[0]+dt.timedelta(days=-1),\ ts_end=ts2[-1]+dt.timedelta(days=-1), \ weathervars=['Tout', 'vWind', 'sunRad']) for i in range(25): for v in ['Tout', 'vWind', 'sunRad']: key_raw = v + str(i) key_diff = v + '24hdiff' + str(i) ens_data1[ key_diff] = ens_data1[key_raw] - ens_data1_24h_before[key_raw] ens_data2[ key_diff] = ens_data2[key_raw] - ens_data2_24h_before[key_raw] all_ens_data = pd.concat([ens_data1, ens_data2]) all_ts = ts1 + ts2 # # # calculate production for each ensemble member ens_prods = np.zeros((len(all_ts), 25)) for i in range(25): ens_cols = ['Tout24hdiff' + str(i), 'vWind24hdiff' + str(i),\ 'sunRad24hdiff' + str(i), 'prod24h_before'] ens_params = pd.Series({ 'Tout24hdiff' + str(i): res.params['Tout24hdiff'], 'vWind24hdiff' + str(i): res.params['vWind24hdiff'], 'sunRad24hdiff' + str(i): res.params['sunRad24hdiff'], 'prod24h_before': res.params['prod24h_before'] }) ens_prods[:, i] = linear_map(all_ens_data, ens_params, ens_cols) # calculate combined confint ens_std = ens_prods.std(axis=1) vali_resid = linear_map(vali_data, res.params, cols) - vali_data['prod'] vali_resid_corrig = vali_resid - np.sign( vali_resid) * 1.9599 * ens_std[len(ts1):] mean_conf_int_spread = (vali_resid_corrig.quantile(0.95) - vali_resid_corrig.quantile(0.05)) / 2 combined_conf_int = mean_conf_int_spread + 1.9599 * ens_std all_prod_model = np.concatenate( [res.fittedvalues, linear_map(vali_data, res.params, cols)]) combined_ub95 = all_prod_model + combined_conf_int combined_lb95 = all_prod_model - combined_conf_int # plot confint ax1.fill_between(all_ts, combined_lb95, combined_ub95, label='Combined 95% conf. int.') ax1.fill_between(all_ts, all_prod_model - 1.9599 * ens_std, all_prod_model + 1.9599 * ens_std, facecolor='grey', label='Ensemble 95% conf. int.') # plot ensempble models ax1.plot_date(all_ts, ens_prods, '-', lw=0.5) ax1.plot_date(ts1, y, 'k-', lw=2, label='Actual production') ax1.plot_date(ts1, res.fittedvalues, 'r-', lw=2, label='Model on ensemble mean') ax1.plot_date(ts2, vali_data['prod'], 'k-', lw=2, label='') ax1.plot_date(ts2, linear_map(vali_data, res.params, cols), 'r-', lw=2) ax1.set_ylabel('[MW]') ax1.legend(loc=2) ax1.set_ylim([0, 1100]) ax2.plot_date(ts1, res.resid, '-', label='Residual, fitted data') ax2.plot_date(ts2, vali_resid, '-', label='Residual, validation data') ax2.set_ylabel('[MW]') ax2.legend(loc=2) ax2.set_ylim([-550, 550]) print "MAE = " + str(mae(vali_resid)) print "MAPE = " + str(mape(vali_resid, vali_data['prod'])) print "RMSE = " + str(rmse(vali_resid)) print "ME = " + str(np.mean(vali_resid)) print "MAE (fit) = " + str(mae(res.resid)) print "MAPE (fit) = " + str(mape(res.resid, fit_data['prod'])) print "RMSE (fit)= " + str(rmse(res.resid)) print "ME (fit)= " + str(np.mean(res.resid)) plt.savefig('figures/ens_prod_models_v2.pdf', dpi=600) plt.figure() plt.plot_date(all_ts, ens_std) plt.ylabel('Std. of ensemble production models [MW]') plt.savefig('figures/std_ens_prod_models.pdf', dpi=600) # vali_ens_std = ens_std[len(ts1):] sns.jointplot(x=pd.Series(vali_ens_std), y=np.abs(vali_resid)) sns.jointplot(x=vali_data['prod'], y=pd.Series(linear_map(vali_data, res.params, cols))) EO3_fc1 = sq.fetch_EO3_midnight_forecast(ts1[0], ts1[-1]) EO3_fc2 = sq.fetch_EO3_midnight_forecast(ts2[0], ts2[-1]) plt.figure() plt.plot_date(ts1, fit_data['prod'], 'k-', label='Actual production') plt.plot_date(ts2, vali_data['prod'], 'k-') plt.plot_date(ts1, EO3_fc1, 'r-', label='EO3 forecast') plt.plot_date(ts2, EO3_fc2, 'r-') EO3_err = EO3_fc2 - vali_data['prod'] EO3_err_fit = EO3_fc1 - fit_data['prod'] print "MAE (EO3) = " + str(mae(EO3_err)) print "MAPE (EO3) = " + str(mape(EO3_err, vali_data['prod'])) print "RMSE (EO3)= " + str(rmse(EO3_err)) print "ME (EO3)= " + str(np.mean(EO3_err)) print "MAE (EO3_fit) = " + str(mae(EO3_err_fit)) print "MAPE (EO3_fit) = " + str(mape(EO3_err_fit, fit_data['prod'])) print "RMSE (EO3_fit)= " + str(rmse(EO3_err_fit)) print "ME (EO3_fit)= " + str(np.mean(EO3_err_fit)) sns.jointplot(x=pd.Series(vali_ens_std), y=np.abs(EO3_err)) plt.figure(figsize=(20, 10)) plt.subplot(2, 1, 1) plt.plot_date(all_ts, combined_conf_int / combined_conf_int.max(), '-') plt.ylabel('Model + ensemble uncertainty \n [normalized]') plt.ylim(0, 1) plt.subplot(2, 1, 2) plt.plot_date(all_ts, (1 - 0.2 * combined_conf_int / combined_conf_int.max()), '-', label='Dynamic setpoint') plt.plot_date(all_ts, 0.8 * np.ones(len(all_ts)), '--', label='Static setpoint') plt.ylabel( 'Setpoint for pump massflow \n temperature [fraction of max pump cap]') plt.legend() plt.ylim(.7, 1) plt.savefig('figures/setpoint.pdf') return vali_data, fit_data, res, ens_std, vali_resid
""" import pandas as pd import datetime as dt import numpy as np from sklearn import linear_model from sklearn.svm import SVR from sklearn.preprocessing import StandardScaler from sklearn.cross_validation import cross_val_predict from model_selection import gen_all_combinations, rmse, mae, mape import sql_tools as sq import ensemble_tools as ens #%% SVR experinment ts = ens.gen_hourly_timesteps(dt.datetime(2016, 1, 26, 1), dt.datetime(2016, 4, 1, 0)) X = pd.read_pickle('48h60h168h_lagged_X.pkl' ) # run model_selection_ext_horizon to generate these files y = pd.read_pickle('prod_to_gowith.pkl') # add more predictor data: for v in ['Tout', 'vWind', 'hum', 'sunRad']: X[v] = ens.load_ens_mean_avail_at10_series(v, ts[0], ts[-1], pointcode=71699) #X['weekdays'] = [t.weekday() for t in ts] def h_hoursbefore(timestamp, h): return timestamp + dt.timedelta(hours=-h)
def most_recent_ens_timeseries(start_stop=(dt.datetime(2015, 12, 16, 0), dt.datetime(2016, 1, 19, 0)), pointcode=71699, shift_steno_one=False): """ star_stop can be a tupple with 2 date tim objects. The first is the first time step in the time series, the second is the last. """ plt.close('all') ylabels = ['[$\degree $C]', '[m/s]', '[%]', '[W/m$^2$]'] suffix = ''.join(['_geo', str(pointcode), '_', ens.timestamp_str(start_stop[0]), \ '_to_', ens.timestamp_str(start_stop[1]), '.npy']) timesteps = ens.gen_hourly_timesteps(start_stop[0], start_stop[1]) Steno_data = np.load( 'Q:/Weatherdata/Steno_weatherstation/Steno_hourly_2015120111_to_2016011800.npz' ) Steno_Tvhs = Steno_data['Tout_vWind_hum_sunRad'] Steno_timesteps = Steno_data['timesteps'] for v, ylab in zip(weathervars, ylabels): plt.figure(figsize=(15, 20)) plt.grid(True) plt.subplot(2, 1, 1) ens_data = np.load('time_series/' + v + suffix) BBSYD_measured = sq.fetch_BrabrandSydWeather(v, start_stop[0], start_stop[1]) Steno_measured = Steno_Tvhs[:, weathervars.index(v)] if shift_steno_one: Steno_measured = np.roll(Steno_measured, -1) if v == 'Tout': ens_data = ens.Kelvin_to_Celcius(ens_data) elif v == 'hum': ens_data = ens.frac_to_percent(ens_data) # convert to percentage plt.plot_date(timesteps, ens_data, '-') plt.plot_date(timesteps, BBSYD_measured, 'k-', lw=2, label='Measured: Brabrand Syd') plt.plot_date(Steno_timesteps, Steno_measured, 'r-', lw=2, label='Measured: Steno Museum') plt.ylabel(ylab) plt.grid(True) plt.xlim(start_stop) plt.title(v) plt.legend() plt.subplot(2, 1, 2) plt.plot_date(timesteps, ens.ensemble_std(ens_data), '-', label='Ensemble std') plt.plot_date(timesteps, ens.ensemble_abs_spread(ens_data), '-', label='Max ensemble spread') plt.ylabel(ylab) plt.legend() plt.grid(True) plt.tight_layout() figfilename = v + '_most_recent_ens_timeseries.pdf' plt.savefig('figures/' + figfilename)
def first_ens_prod_fig(): """ This plot is based on a production model taking into account: Tout, vWind and the production 24 hours before """ plt.close('all') cols = ['Tout', 'vWind', 'prod24h_before'] ts1 = ens.gen_hourly_timesteps(dt.datetime(2015, 12, 17, 1), dt.datetime(2016, 1, 15, 0)) ts2 = ens.gen_hourly_timesteps(dt.datetime(2016, 1, 20, 1), dt.datetime(2016, 1, 28, 0)) #load the data fit_data = ens.repack_ens_mean_as_df() fit_data['prod24h_before'] = sq.fetch_production( dt.datetime(2015, 12, 16, 1), dt.datetime(2016, 1, 14, 0)) vali_data = ens.repack_ens_mean_as_df(dt.datetime(2016, 1, 20, 1), dt.datetime(2016, 1, 28, 0)) vali_data['prod24h_before'] = sq.fetch_production( dt.datetime(2016, 1, 19, 1), dt.datetime(2016, 1, 27, 0)) # do the fit X = fit_data[cols] y = fit_data['prod'] res = mlin_regression(y, X, add_const=True) fig, [ax1, ax2] = plt.subplots(2, 1, figsize=(40, 20)) # load ensemble data ens_data1 = ens.load_ens_timeseries_as_df(ts_start=ts1[0], ts_end=ts1[-1]) ens_data1['prod24h_before'] = fit_data['prod24h_before'] ens_data2 = ens.load_ens_timeseries_as_df(ts_start=ts2[0], ts_end=ts2[-1]) ens_data2['prod24h_before'] = vali_data['prod24h_before'] all_ens_data = pd.concat([ens_data1, ens_data2]) all_ts = ts1 + ts2 # calculate production for each ensemble member ens_prods = np.zeros((len(all_ts), 25)) for i in range(25): ens_cols = ['Tout' + str(i), 'vWind' + str(i), 'prod24h_before'] ens_params = pd.Series({ 'Tout' + str(i): res.params['Tout'], 'vWind' + str(i): res.params['vWind'], 'const': res.params['const'], 'prod24h_before': res.params['prod24h_before'] }) ens_prods[:, i] = linear_map(all_ens_data, ens_params, ens_cols) # calculate combined confint prstd, iv_l, iv_u = wls_prediction_std(res) mean_conf_int_spread = np.mean(res.fittedvalues - iv_l) model_std = np.concatenate( [prstd, (1. / 1.9599) * mean_conf_int_spread * np.ones(len(ts2))]) ens_std = ens_prods.std(axis=1) combined_std = np.sqrt(model_std**2 + ens_std**2) all_prod_model = np.concatenate( [res.fittedvalues, linear_map(vali_data, res.params, cols)]) combined_ub95 = all_prod_model + 1.9599 * combined_std combined_lb95 = all_prod_model - 1.9599 * combined_std # plot confint ax1.fill_between(all_ts, combined_lb95, combined_ub95, label='Combined 95% conf. int.') ax1.fill_between(all_ts, all_prod_model - 1.9599 * ens_std, all_prod_model + 1.9599 * ens_std, facecolor='grey', label='Ensemble 95% conf. int.') # plot ensempble models ax1.plot_date(all_ts, ens_prods, '-', lw=0.5) ax1.plot_date(ts1, y, 'k-', lw=2, label='Actual production') ax1.plot_date(ts1, res.fittedvalues, 'r-', lw=2, label='Model on ensemble mean') ax1.plot_date(ts2, vali_data['prod'], 'k-', lw=2, label='') ax1.plot_date(ts2, linear_map(vali_data, res.params, cols), 'r-', lw=2) ax1.set_ylabel('[MW]') ax1.legend(loc=2) vali_resid = linear_map(vali_data, res.params, cols) - vali_data['prod'] ax2.plot_date(ts1, res.resid, '-', label='Residual, fitted data') ax2.plot_date(ts2, vali_resid, '-', label='Residual, validation data') ax2.set_ylabel('[MW]') ax2.legend(loc=2) print "MAE = " + str(mae(vali_resid)) print "MAPE = " + str(mape(vali_resid, vali_data['prod'])) print "RMSE = " + str(rmse(vali_resid)) print "ME = " + str(np.mean(vali_resid)) print "MAE (fit) = " + str(mae(res.resid)) print "MAPE (fit) = " + str(mape(res.resid, fit_data['prod'])) print "RMSE (fit)= " + str(rmse(res.resid)) print "ME (fit)= " + str(np.mean(res.resid)) plt.savefig('figures/ens_prod_models.pdf', dpi=600) plt.figure() plt.plot_date(all_ts, ens_std) plt.ylabel('Std. of ensemble production models [MW]') plt.savefig('figures/std_ens_prod_models.pdf', dpi=600) sns.jointplot(x=ens_std, y=np.concatenate([res.resid, vali_resid])) return res, all_ens_data, all_ts, fit_data['prod'], vali_data['prod']
import pandas as pd import datetime as dt import numpy as np from sklearn import linear_model from sklearn.svm import SVR from sklearn.preprocessing import StandardScaler from sklearn.cross_validation import cross_val_predict from model_selection import gen_all_combinations, rmse, mae, mape import sql_tools as sq import ensemble_tools as ens #%% SVR experinment ts = ens.gen_hourly_timesteps(dt.datetime(2016,1,26,1), dt.datetime(2016,4,1,0)) X = pd.read_pickle('48h60h168h_lagged_X.pkl') # run model_selection_ext_horizon to generate these files y = pd.read_pickle('prod_to_gowith.pkl') # add more predictor data: for v in ['Tout', 'vWind', 'hum', 'sunRad']: X[v] = ens.load_ens_mean_avail_at10_series(v, ts[0], ts[-1], pointcode=71699) #X['weekdays'] = [t.weekday() for t in ts] def h_hoursbefore(timestamp, h): return timestamp + dt.timedelta(hours=-h) most_recent_avail_prod = sq.fetch_production(h_hoursbefore(ts[0], 24),\ h_hoursbefore(ts[-1], 24)) for i, t, p48 in zip(range(len(most_recent_avail_prod)), ts, X['prod48hbefore']):
def main(argv): plt.close('all') try: station = argv[0] if not station in PI_T_sup_dict.keys(): print "Wrong station, use rundhoej, holme or hoerning" return except: print "No station provided. Defaults to holme." station = 'holme' print station plt.close('all') #%% fit_ts = ens.gen_hourly_timesteps(dt.datetime(2015, 12, 17, 1), dt.datetime(2016, 1, 15, 0)) vali_ts = ens.gen_hourly_timesteps(dt.datetime(2016, 1, 20, 1), dt.datetime(2016, 2, 5, 0)) test_ts = ens.gen_hourly_timesteps(dt.datetime(2016, 2, 5, 1), dt.datetime(2016, 4, 1, 0)) all_ts = fit_ts + vali_ts + test_ts weathervars = ['Tout', 'vWind', 'sunRad', 'hum'] fit_data = pd.DataFrame() vali_data = pd.DataFrame() test_data = pd.DataFrame() cons_key = sq.consumption_place_key_dict[station] fit_data['cons24h_before'] = sq.fetch_consumption( cons_key, fit_ts[0] + dt.timedelta(days=-1), fit_ts[-1] + dt.timedelta(days=-1)) vali_data['cons24h_before'] = sq.fetch_consumption( cons_key, vali_ts[0] + dt.timedelta(days=-1), vali_ts[-1] + dt.timedelta(days=-1)) test_data['cons24h_before'] = sq.fetch_consumption( cons_key, test_ts[0] + dt.timedelta(days=-1), test_ts[-1] + dt.timedelta(days=-1)) fit_data['cons'] = sq.fetch_consumption(cons_key, fit_ts[0], fit_ts[-1]) vali_data['cons'] = sq.fetch_consumption(cons_key, vali_ts[0], vali_ts[-1]) test_data['cons'] = sq.fetch_consumption(cons_key, test_ts[0], test_ts[-1]) for v in weathervars: fit_data['%s24hdiff'%v] = ens.load_ens_timeseries_as_df(\ ts_start=fit_ts[0],\ ts_end=fit_ts[-1], \ weathervars=[v]).mean(axis=1) \ - ens.load_ens_timeseries_as_df(\ ts_start=fit_ts[0]+dt.timedelta(days=-1),\ ts_end=fit_ts[-1]+dt.timedelta(days=-1), \ weathervars=[v]).mean(axis=1) vali_data['%s24hdiff'%v] = ens.load_ens_timeseries_as_df(\ ts_start=vali_ts[0],\ ts_end=vali_ts[-1], \ weathervars=[v]).mean(axis=1) \ - ens.load_ens_timeseries_as_df(\ ts_start=vali_ts[0]+dt.timedelta(days=-1),\ ts_end=vali_ts[-1]+dt.timedelta(days=-1), \ weathervars=[v]).mean(axis=1) test_data['%s24hdiff'%v] = ens.load_ens_timeseries_as_df(\ ts_start=test_ts[0],\ ts_end=test_ts[-1], \ weathervars=[v]).mean(axis=1) \ - ens.load_ens_timeseries_as_df(\ ts_start=test_ts[0]+dt.timedelta(days=-1),\ ts_end=test_ts[-1]+dt.timedelta(days=-1), \ weathervars=[v]).mean(axis=1) #%% all_data = pd.concat([fit_data, vali_data, test_data]) no_blind_data = pd.concat([fit_data, vali_data]) corr = no_blind_data.corr() fit_y = fit_data['cons'] columns = [ 'cons24h_before', 'Tout24hdiff', 'vWind24hdiff', 'sunRad24hdiff' ] X = fit_data[columns] res = mlin_regression(fit_y, X, add_const=False) fiterr = res.fittedvalues - fit_y print "Errors fit period: ", rmse(fiterr), mae(fiterr), mape(fiterr, fit_y) vali_pred = linear_map(vali_data, res.params, columns) valierr = vali_pred - vali_data['cons'] print "Errors validation period: ", rmse(valierr), mae(valierr), mape( valierr, vali_data['cons']) test_pred = linear_map(test_data, res.params, columns) testerr = test_pred - test_data['cons'] print "Errors test period: ", rmse(testerr), mae(testerr), mape( testerr, test_data['cons']) plt.figure() plt.plot_date(all_ts, all_data['cons'], 'k-') plt.plot_date(all_ts, np.concatenate([res.fittedvalues, vali_pred, test_pred]), 'r-')
def first_ens_prod_fig(): """ This plot is based on a production model taking into account: Tout, vWind and the production 24 hours before """ plt.close('all') cols = ['Tout', 'vWind', 'prod24h_before'] ts1 = ens.gen_hourly_timesteps(dt.datetime(2015,12,17,1), dt.datetime(2016,1,15,0)) ts2 = ens.gen_hourly_timesteps(dt.datetime(2016,1,20,1), dt.datetime(2016,1,28,0)) #load the data fit_data = ens.repack_ens_mean_as_df() fit_data['prod24h_before'] = sq.fetch_production(dt.datetime(2015,12,16,1), dt.datetime(2016,1,14,0)) vali_data = ens.repack_ens_mean_as_df(dt.datetime(2016,1,20,1), dt.datetime(2016,1,28,0)) vali_data['prod24h_before'] = sq.fetch_production(dt.datetime(2016,1,19,1), dt.datetime(2016,1,27,0)) # do the fit X = fit_data[cols] y = fit_data['prod'] res = mlin_regression(y, X, add_const=True) fig, [ax1, ax2] = plt.subplots(2,1, figsize=(40,20)) # load ensemble data ens_data1 = ens.load_ens_timeseries_as_df(ts_start=ts1[0], ts_end=ts1[-1]) ens_data1['prod24h_before'] = fit_data['prod24h_before'] ens_data2 = ens.load_ens_timeseries_as_df(ts_start=ts2[0], ts_end=ts2[-1]) ens_data2['prod24h_before'] = vali_data['prod24h_before'] all_ens_data = pd.concat([ens_data1, ens_data2]) all_ts = ts1 + ts2 # calculate production for each ensemble member ens_prods = np.zeros((len(all_ts), 25)) for i in range(25): ens_cols = ['Tout' + str(i), 'vWind' + str(i), 'prod24h_before'] ens_params = pd.Series({'Tout' + str(i):res.params['Tout'], 'vWind' + str(i):res.params['vWind'], 'const':res.params['const'], 'prod24h_before':res.params['prod24h_before']}) ens_prods[:,i] = linear_map(all_ens_data, ens_params, ens_cols) # calculate combined confint prstd, iv_l, iv_u = wls_prediction_std(res) mean_conf_int_spread = np.mean(res.fittedvalues - iv_l) model_std = np.concatenate([prstd, (1./1.9599)*mean_conf_int_spread*np.ones(len(ts2))]) ens_std = ens_prods.std(axis=1) combined_std = np.sqrt(model_std**2 + ens_std**2) all_prod_model = np.concatenate([res.fittedvalues, linear_map(vali_data, res.params, cols)]) combined_ub95 = all_prod_model + 1.9599*combined_std combined_lb95 = all_prod_model - 1.9599*combined_std # plot confint ax1.fill_between(all_ts, combined_lb95, combined_ub95, label='Combined 95% conf. int.') ax1.fill_between(all_ts, all_prod_model - 1.9599*ens_std, all_prod_model + 1.9599*ens_std, facecolor='grey', label='Ensemble 95% conf. int.') # plot ensempble models ax1.plot_date(all_ts, ens_prods, '-', lw=0.5) ax1.plot_date(ts1, y, 'k-', lw=2, label='Actual production') ax1.plot_date(ts1, res.fittedvalues,'r-', lw=2, label='Model on ensemble mean') ax1.plot_date(ts2, vali_data['prod'], 'k-', lw=2, label='') ax1.plot_date(ts2, linear_map(vali_data, res.params, cols), 'r-', lw=2) ax1.set_ylabel('[MW]') ax1.legend(loc=2) vali_resid = linear_map(vali_data, res.params, cols) - vali_data['prod'] ax2.plot_date(ts1, res.resid, '-', label='Residual, fitted data') ax2.plot_date(ts2, vali_resid, '-', label='Residual, validation data') ax2.set_ylabel('[MW]') ax2.legend(loc=2) print "MAE = " + str(mae(vali_resid)) print "MAPE = " + str(mape(vali_resid, vali_data['prod'])) print "RMSE = " + str(rmse(vali_resid)) print "ME = " + str(np.mean(vali_resid)) print "MAE (fit) = " + str(mae(res.resid)) print "MAPE (fit) = " + str(mape(res.resid, fit_data['prod'])) print "RMSE (fit)= " + str(rmse(res.resid)) print "ME (fit)= " + str(np.mean(res.resid)) plt.savefig('figures/ens_prod_models.pdf', dpi=600) plt.figure() plt.plot_date(all_ts, ens_std) plt.ylabel('Std. of ensemble production models [MW]') plt.savefig('figures/std_ens_prod_models.pdf', dpi=600) sns.jointplot(x=ens_std, y=np.concatenate([res.resid, vali_resid])) return res, all_ens_data, all_ts, fit_data['prod'], vali_data['prod']
def production_model(): # figure 3 plt.close('all') cols = ['prod24h_before', 'Tout24hdiff', 'vWind24hdiff', 'sunRad24hdiff'] ts1 = ens.gen_hourly_timesteps(dt.datetime(2015, 12, 17, 1), dt.datetime(2016, 1, 15, 0)) ts2 = ens.gen_hourly_timesteps(dt.datetime(2016, 1, 20, 1), dt.datetime(2016, 2, 5, 0)) #load the data fit_data = ens.repack_ens_mean_as_df() fit_data['prod24h_before'] = sq.fetch_production( ts1[0] + dt.timedelta(days=-1), ts1[-1] + dt.timedelta(days=-1)) fit_data['Tout24hdiff'] = fit_data['Tout'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts1[0]+dt.timedelta(days=-1),\ ts_end=ts1[-1]+dt.timedelta(days=-1), \ weathervars=['Tout']).mean(axis=1) fit_data['vWind24hdiff'] = fit_data['vWind'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts1[0]+dt.timedelta(days=-1),\ ts_end=ts1[-1]+dt.timedelta(days=-1), \ weathervars=['vWind']).mean(axis=1) fit_data['sunRad24hdiff'] = fit_data['sunRad'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts1[0]+dt.timedelta(days=-1),\ ts_end=ts1[-1]+dt.timedelta(days=-1), \ weathervars=['sunRad']).mean(axis=1) vali_data = ens.repack_ens_mean_as_df(ts2[0], ts2[-1]) vali_data['prod24h_before'] = sq.fetch_production( ts2[0] + dt.timedelta(days=-1), ts2[-1] + dt.timedelta(days=-1)) vali_data['Tout24hdiff'] = vali_data['Tout'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts2[0]+dt.timedelta(days=-1),\ ts_end=ts2[-1]+dt.timedelta(days=-1), \ weathervars=['Tout']).mean(axis=1) vali_data['vWind24hdiff'] = vali_data['vWind'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts2[0]+dt.timedelta(days=-1),\ ts_end=ts2[-1]+dt.timedelta(days=-1), \ weathervars=['vWind']).mean(axis=1) vali_data['sunRad24hdiff'] = vali_data['sunRad'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts2[0]+dt.timedelta(days=-1),\ ts_end=ts2[-1]+dt.timedelta(days=-1), \ weathervars=['sunRad']).mean(axis=1) # correct error in production: new_val = (vali_data['prod'][116] + vali_data['prod'][116]) / 2 vali_data['prod'][116] = new_val vali_data['prod'][117] = new_val vali_data['prod24h_before'][116 + 24] = new_val vali_data['prod24h_before'][117 + 24] = new_val # do the fit X = fit_data[cols] y = fit_data['prod'] res = mlin_regression(y, X, add_const=False) fig, [ax1, ax2] = plt.subplots(2, 1, sharex=True, figsize=(dcolwidth, 0.57 * dcolwidth), gridspec_kw={'height_ratios': [4, 1]}) # load ensemble data ens_data1 = ens.load_ens_timeseries_as_df(ts_start=ts1[0], ts_end=ts1[-1],\ weathervars=['Tout', 'vWind', 'sunRad']) ens_data1['prod24h_before'] = fit_data['prod24h_before'] ens_data1_24h_before = ens.load_ens_timeseries_as_df(\ ts_start=ts1[0]+dt.timedelta(days=-1),\ ts_end=ts1[-1]+dt.timedelta(days=-1), \ weathervars=['Tout', 'vWind', 'sunRad']) ens_data2 = ens.load_ens_timeseries_as_df(ts_start=ts2[0], ts_end=ts2[-1],\ weathervars=['Tout', 'vWind', 'sunRad']) ens_data2['prod24h_before'] = vali_data['prod24h_before'] ens_data2_24h_before = ens.load_ens_timeseries_as_df(\ ts_start=ts2[0]+dt.timedelta(days=-1),\ ts_end=ts2[-1]+dt.timedelta(days=-1), \ weathervars=['Tout', 'vWind', 'sunRad']) for i in range(25): for v in ['Tout', 'vWind', 'sunRad']: key_raw = v + str(i) key_diff = v + '24hdiff' + str(i) ens_data1[ key_diff] = ens_data1[key_raw] - ens_data1_24h_before[key_raw] ens_data2[ key_diff] = ens_data2[key_raw] - ens_data2_24h_before[key_raw] all_ens_data = pd.concat([ens_data1, ens_data2]) all_ts = ts1 + ts2 # # # calculate production for each ensemble member ens_prods = np.zeros((len(all_ts), 25)) for i in range(25): ens_cols = ['Tout24hdiff' + str(i), 'vWind24hdiff' + str(i),\ 'sunRad24hdiff' + str(i), 'prod24h_before'] ens_params = pd.Series({ 'Tout24hdiff' + str(i): res.params['Tout24hdiff'], 'vWind24hdiff' + str(i): res.params['vWind24hdiff'], 'sunRad24hdiff' + str(i): res.params['sunRad24hdiff'], 'prod24h_before': res.params['prod24h_before'] }) ens_prods[:, i] = linear_map(all_ens_data, ens_params, ens_cols) # calculate combined confint ens_std = ens_prods.std(axis=1) vali_resid = linear_map(vali_data, res.params, cols) - vali_data['prod'] vali_resid_corrig = vali_resid - np.sign( vali_resid) * 1.9599 * ens_std[len(ts1):] #mean_conf_int_spread = (vali_resid_corrig.quantile(0.95) - vali_resid_corrig.quantile(0.05))/2 # this conf_int is not used anymore fit_resid = res.resid fit_resid_corrig = fit_resid - np.sign( fit_resid) * 1.9599 * ens_std[0:len(ts1)] conf_int_spread_lower = -fit_resid_corrig.quantile(0.025) conf_int_spread_higher = fit_resid_corrig.quantile(0.975) combined_conf_ints = conf_int_spread_lower + conf_int_spread_higher + 2 * 1.9599 * ens_std all_prod_model = np.concatenate( [res.fittedvalues, linear_map(vali_data, res.params, cols)]) combined_ub95 = all_prod_model + conf_int_spread_higher + 1.9599 * ens_std combined_lb95 = all_prod_model - (conf_int_spread_lower + 1.9599 * ens_std) # plot confint ax1.fill_between(all_ts[len(ts1):], combined_lb95[len(ts1):], combined_ub95[len(ts1):], label='95% prediction intervals') ax1.fill_between(all_ts[len(ts1):], all_prod_model[len(ts1):] - 1.9599 * ens_std[len(ts1):], all_prod_model[len(ts1):] + 1.9599 * ens_std[len(ts1):], facecolor='grey', label='Weather ensemble 95% conf. int.') # plot ensempble models ax1.plot_date(all_ts[len(ts1):], ens_prods[len(ts1):], '-', lw=0.5) ax1.plot_date(ts2, vali_data['prod'], 'k-', lw=2, label='Historical production') ax1.plot_date(ts2, linear_map(vali_data, res.params, cols), '-', c=red, lw=2, label='Production model') ax1.set_ylabel('Production [MW]', size=8) ax1.tick_params(axis='both', which='major', labelsize=8) ax1.xaxis.set_major_formatter(DateFormatter('%b %d')) ax1.legend(loc=1, prop={'size': 8}) ax1.set_ylim([300, 1100]) N = conf_int_spread_higher + 1.9599 * ens_std[len(ts1):].max() ax2.fill_between(ts2, -(1.9599 * ens_std[len(ts1):] + conf_int_spread_lower) / N, -1.9599 * ens_std[len(ts1):] / N, alpha=0.5) ax2.fill_between(ts2, -1.9599 * ens_std[len(ts1):] / N, np.zeros(len(ts2)), facecolor='grey', alpha=0.5) ax2.fill_between(ts2, 1.9599 * ens_std[len(ts1):] / N, facecolor='grey') ax2.fill_between(ts2, 1.9599 * ens_std[len(ts1):] / N, (conf_int_spread_higher + 1.9599 * ens_std[len(ts1):]) / N) ax2.set_ylabel('Prediction intervals \n[normalized]', size=8) ax2.tick_params(axis='y', which='major', labelsize=8) ax2.set_xlim(dt.datetime(2016, 1, 20, 0), dt.datetime(2016, 2, 5, 0)) fig.tight_layout() print "Min_normalized pos conf bound. ", np.min(1.9599 * ens_std[len(ts1):] / N + conf_int_spread_higher / N) print "MAE = " + str(mae(vali_resid)) print "MAPE = " + str(mape(vali_resid, vali_data['prod'])) print "RMSE = " + str(rmse(vali_resid)) print "ME = " + str(np.mean(vali_resid)) print "MAE (fit) = " + str(mae(res.resid)) print "MAPE (fit) = " + str(mape(res.resid, fit_data['prod'])) print "RMSE (fit)= " + str(rmse(res.resid)) print "ME (fit)= " + str(np.mean(res.resid)) print "Width of const blue bands (MW)", conf_int_spread_lower, conf_int_spread_higher plt.savefig('Q:/Projekter/Ens Article 1/figures/production_model.pdf', dpi=400) EO3_fc1 = sq.fetch_EO3_midnight_forecast(ts1[0], ts1[-1]) EO3_fc2 = sq.fetch_EO3_midnight_forecast(ts2[0], ts2[-1]) EO3_err = EO3_fc2 - vali_data['prod'] EO3_err_fit = EO3_fc1 - fit_data['prod'] print "MAE (EO3) = " + str(mae(EO3_err)) print "MAPE (EO3) = " + str(mape(EO3_err, vali_data['prod'])) print "RMSE (EO3)= " + str(rmse(EO3_err)) print "ME (EO3)= " + str(np.mean(EO3_err)) print "MAE (EO3_fit) = " + str(mae(EO3_err_fit)) print "MAPE (EO3_fit) = " + str(mape(EO3_err_fit, fit_data['prod'])) print "RMSE (EO3_fit)= " + str(rmse(EO3_err_fit)) print "ME (EO3_fit)= " + str(np.mean(EO3_err_fit)) print np.min(combined_conf_ints[len(ts1):] / combined_conf_ints.max()) np.savez('combined_conf_int', combined_conf_int=(conf_int_spread_higher + 1.9599 * ens_std), timesteps=all_ts) print "Corr coeff: vali ", np.corrcoef( vali_data['prod'], linear_map(vali_data, res.params, cols))[0, 1] print "Corr coeff: vali EO3 ", np.corrcoef(vali_data['prod'], EO3_fc2)[0, 1] print "Corr coeff: fit ", np.corrcoef(fit_data['prod'], res.fittedvalues)[0, 1] print "Corr coeff: fit EO3 ", np.corrcoef(fit_data['prod'], EO3_fc1)[0, 1] print "% of actual production in vali period above upper", float( len( np.where(vali_data['prod'] > (conf_int_spread_higher + 1.9599 * ens_std[len(ts1):] + linear_map(vali_data, res.params, cols)))[0])) / len(ts2) print "plus minus: ", 0.5 / len(ts2) print "% of actual production in vali period below lower", float( len( np.where(vali_data['prod'] < (linear_map(vali_data, res.params, cols) - (conf_int_spread_lower + 1.9599 * ens_std[len(ts1):]))) [0])) / len(ts2) print "plus minus: ", 0.5 / len(ts2) return res, fit_data
def second_ens_prod_fig(): """ This plot is based on a production model taking into account: the production 24 hours before as well as the change in temparature, windspeed and solar radiotion from 24 hours ago to now. """ plt.close('all') cols = ['prod24h_before', 'Tout24hdiff', 'vWind24hdiff', 'sunRad24hdiff'] ts1 = ens.gen_hourly_timesteps(dt.datetime(2015,12,17,1), dt.datetime(2016,1,15,0)) ts2 = ens.gen_hourly_timesteps(dt.datetime(2016,1,20,1), dt.datetime(2016,2,5,0)) #load the data fit_data = ens.repack_ens_mean_as_df() fit_data['prod24h_before'] = sq.fetch_production(ts1[0]+dt.timedelta(days=-1), ts1[-1]+dt.timedelta(days=-1)) fit_data['Tout24hdiff'] = fit_data['Tout'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts1[0]+dt.timedelta(days=-1),\ ts_end=ts1[-1]+dt.timedelta(days=-1), \ weathervars=['Tout']).mean(axis=1) fit_data['vWind24hdiff'] = fit_data['vWind'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts1[0]+dt.timedelta(days=-1),\ ts_end=ts1[-1]+dt.timedelta(days=-1), \ weathervars=['vWind']).mean(axis=1) fit_data['sunRad24hdiff'] = fit_data['sunRad'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts1[0]+dt.timedelta(days=-1),\ ts_end=ts1[-1]+dt.timedelta(days=-1), \ weathervars=['sunRad']).mean(axis=1) vali_data = ens.repack_ens_mean_as_df(ts2[0], ts2[-1]) vali_data['prod24h_before'] = sq.fetch_production(ts2[0]+dt.timedelta(days=-1), ts2[-1]+dt.timedelta(days=-1)) vali_data['Tout24hdiff'] = vali_data['Tout'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts2[0]+dt.timedelta(days=-1),\ ts_end=ts2[-1]+dt.timedelta(days=-1), \ weathervars=['Tout']).mean(axis=1) vali_data['vWind24hdiff'] = vali_data['vWind'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts2[0]+dt.timedelta(days=-1),\ ts_end=ts2[-1]+dt.timedelta(days=-1), \ weathervars=['vWind']).mean(axis=1) vali_data['sunRad24hdiff'] = vali_data['sunRad'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts2[0]+dt.timedelta(days=-1),\ ts_end=ts2[-1]+dt.timedelta(days=-1), \ weathervars=['sunRad']).mean(axis=1) # correct error in production: new_val = (vali_data['prod'][116] +vali_data['prod'][116])/2 vali_data['prod'][116] = new_val vali_data['prod'][117] = new_val vali_data['prod24h_before'][116+24] = new_val vali_data['prod24h_before'][117+24] = new_val # do the fit X = fit_data[cols] y = fit_data['prod'] res = mlin_regression(y, X, add_const=False) fig, [ax1, ax2] = plt.subplots(2,1, figsize=(40,20)) # load ensemble data ens_data1 = ens.load_ens_timeseries_as_df(ts_start=ts1[0], ts_end=ts1[-1],\ weathervars=['Tout', 'vWind', 'sunRad']) ens_data1['prod24h_before'] = fit_data['prod24h_before'] ens_data1_24h_before = ens.load_ens_timeseries_as_df(\ ts_start=ts1[0]+dt.timedelta(days=-1),\ ts_end=ts1[-1]+dt.timedelta(days=-1), \ weathervars=['Tout', 'vWind', 'sunRad']) ens_data2 = ens.load_ens_timeseries_as_df(ts_start=ts2[0], ts_end=ts2[-1],\ weathervars=['Tout', 'vWind', 'sunRad']) ens_data2['prod24h_before'] = vali_data['prod24h_before'] ens_data2_24h_before = ens.load_ens_timeseries_as_df(\ ts_start=ts2[0]+dt.timedelta(days=-1),\ ts_end=ts2[-1]+dt.timedelta(days=-1), \ weathervars=['Tout', 'vWind', 'sunRad']) for i in range(25): for v in ['Tout', 'vWind', 'sunRad']: key_raw = v + str(i) key_diff = v + '24hdiff' + str(i) ens_data1[key_diff] = ens_data1[key_raw] - ens_data1_24h_before[key_raw] ens_data2[key_diff] = ens_data2[key_raw] - ens_data2_24h_before[key_raw] all_ens_data = pd.concat([ens_data1, ens_data2]) all_ts = ts1 + ts2 # # # calculate production for each ensemble member ens_prods = np.zeros((len(all_ts), 25)) for i in range(25): ens_cols = ['Tout24hdiff' + str(i), 'vWind24hdiff' + str(i),\ 'sunRad24hdiff' + str(i), 'prod24h_before'] ens_params = pd.Series({'Tout24hdiff' + str(i):res.params['Tout24hdiff'], 'vWind24hdiff' + str(i):res.params['vWind24hdiff'], 'sunRad24hdiff' + str(i):res.params['sunRad24hdiff'], 'prod24h_before':res.params['prod24h_before']}) ens_prods[:,i] = linear_map(all_ens_data, ens_params, ens_cols) # calculate combined confint ens_std = ens_prods.std(axis=1) vali_resid = linear_map(vali_data, res.params, cols) - vali_data['prod'] vali_resid_corrig = vali_resid - np.sign(vali_resid)*1.9599*ens_std[len(ts1):] mean_conf_int_spread = (vali_resid_corrig.quantile(0.95) - vali_resid_corrig.quantile(0.05))/2 combined_conf_int = mean_conf_int_spread + 1.9599*ens_std all_prod_model = np.concatenate([res.fittedvalues, linear_map(vali_data, res.params, cols)]) combined_ub95 = all_prod_model + combined_conf_int combined_lb95 = all_prod_model - combined_conf_int # plot confint ax1.fill_between(all_ts, combined_lb95, combined_ub95, label='Combined 95% conf. int.') ax1.fill_between(all_ts, all_prod_model - 1.9599*ens_std, all_prod_model + 1.9599*ens_std, facecolor='grey', label='Ensemble 95% conf. int.') # plot ensempble models ax1.plot_date(all_ts, ens_prods, '-', lw=0.5) ax1.plot_date(ts1, y, 'k-', lw=2, label='Actual production') ax1.plot_date(ts1, res.fittedvalues,'r-', lw=2, label='Model on ensemble mean') ax1.plot_date(ts2, vali_data['prod'], 'k-', lw=2, label='') ax1.plot_date(ts2, linear_map(vali_data, res.params, cols), 'r-', lw=2) ax1.set_ylabel('[MW]') ax1.legend(loc=2) ax1.set_ylim([0,1100]) ax2.plot_date(ts1, res.resid, '-', label='Residual, fitted data') ax2.plot_date(ts2, vali_resid, '-', label='Residual, validation data') ax2.set_ylabel('[MW]') ax2.legend(loc=2) ax2.set_ylim([-550, 550]) print "MAE = " + str(mae(vali_resid)) print "MAPE = " + str(mape(vali_resid, vali_data['prod'])) print "RMSE = " + str(rmse(vali_resid)) print "ME = " + str(np.mean(vali_resid)) print "MAE (fit) = " + str(mae(res.resid)) print "MAPE (fit) = " + str(mape(res.resid, fit_data['prod'])) print "RMSE (fit)= " + str(rmse(res.resid)) print "ME (fit)= " + str(np.mean(res.resid)) plt.savefig('figures/ens_prod_models_v2.pdf', dpi=600) plt.figure() plt.plot_date(all_ts, ens_std) plt.ylabel('Std. of ensemble production models [MW]') plt.savefig('figures/std_ens_prod_models.pdf', dpi=600) # vali_ens_std = ens_std[len(ts1):] sns.jointplot(x=pd.Series(vali_ens_std), y=np.abs(vali_resid)) sns.jointplot(x=vali_data['prod'], y=pd.Series(linear_map(vali_data, res.params, cols))) EO3_fc1 = sq.fetch_EO3_midnight_forecast(ts1[0], ts1[-1]) EO3_fc2 = sq.fetch_EO3_midnight_forecast(ts2[0], ts2[-1]) plt.figure() plt.plot_date(ts1, fit_data['prod'], 'k-', label='Actual production') plt.plot_date(ts2, vali_data['prod'], 'k-') plt.plot_date(ts1, EO3_fc1, 'r-', label='EO3 forecast') plt.plot_date(ts2, EO3_fc2, 'r-') EO3_err = EO3_fc2-vali_data['prod'] EO3_err_fit = EO3_fc1-fit_data['prod'] print "MAE (EO3) = " + str(mae(EO3_err)) print "MAPE (EO3) = " + str(mape(EO3_err, vali_data['prod'])) print "RMSE (EO3)= " + str(rmse(EO3_err)) print "ME (EO3)= " + str(np.mean(EO3_err)) print "MAE (EO3_fit) = " + str(mae(EO3_err_fit)) print "MAPE (EO3_fit) = " + str(mape(EO3_err_fit, fit_data['prod'])) print "RMSE (EO3_fit)= " + str(rmse(EO3_err_fit)) print "ME (EO3_fit)= " + str(np.mean(EO3_err_fit)) sns.jointplot(x=pd.Series(vali_ens_std), y=np.abs(EO3_err)) plt.figure(figsize=(20,10)) plt.subplot(2,1,1) plt.plot_date(all_ts, combined_conf_int/combined_conf_int.max(), '-') plt.ylabel('Model + ensemble uncertainty \n [normalized]') plt.ylim(0,1) plt.subplot(2,1,2) plt.plot_date(all_ts, (1-0.2*combined_conf_int/combined_conf_int.max()), '-', label='Dynamic setpoint') plt.plot_date(all_ts, 0.8*np.ones(len(all_ts)), '--', label='Static setpoint') plt.ylabel('Setpoint for pump massflow \n temperature [fraction of max pump cap]') plt.legend() plt.ylim(.7,1) plt.savefig('figures/setpoint.pdf') return vali_data, fit_data, res, ens_std, vali_resid
if res.pvalues[var] > 0.03: print res.pvalues[var], var return False, var elif correct_signs[var]*res.params[var] < 0: return False, var if np.abs(res.params['prod24h_before']-1) > 0.05: print "WARNING: prod24h_before is weighted with: " + str(res.params['prod24h_before']) if res.resid.mean()>5: print "WARNING: Bias in model: " + res.resid.mean() return True, None ts_start = dt.datetime(2015, 10, 17, 1) ts_end = dt.datetime(2016,1,16,0) timesteps = gen_hourly_timesteps(ts_start, ts_end) df = pd.DataFrame() df['prod'] = sq.fetch_production(ts_start, ts_end) df['prod24h_before'] = sq.fetch_production(ts_start + dt.timedelta(days=-1), \ ts_end + dt.timedelta(days=-1)) for v in ['Tout', 'vWind', 'sunRad', 'hum']: df[v] = sq.fetch_BrabrandSydWeather(v, ts_start, ts_end) df[v + '24h_before'] = sq.fetch_BrabrandSydWeather(v, ts_start + dt.timedelta(days=-1), \ ts_end + dt.timedelta(days=-1)) df[v + '24hdiff'] = df[v] - df[v + '24h_before'] cols = ['Tout24hdiff', 'vWind24hdiff', 'prod24h_before', 'sunRad24hdiff', 'hum24hdiff'] good_fit = False while not good_fit:
def load_cons_model_dfs(df): # Takes the data frame with the already calculated consumptions #%% fit_ts = ens.gen_hourly_timesteps(dt.datetime(2015, 12, 17, 1), dt.datetime(2016, 1, 15, 0)) vali_ts = ens.gen_hourly_timesteps(dt.datetime(2016, 1, 20, 1), dt.datetime(2016, 2, 5, 0)) test_ts = ens.gen_hourly_timesteps(dt.datetime(2016, 2, 5, 1), dt.datetime(2016, 3, 1, 0)) weathervars = ['Tout', 'vWind', 'sunRad', 'hum'] fit_data = pd.DataFrame() vali_data = pd.DataFrame() test_data = pd.DataFrame() fit_data['cons'] = np.array(df.ix[fit_ts[0]:fit_ts[-1]]['cons']) vali_data['cons'] = np.array( df.ix[vali_ts[0]:vali_ts[-1]] ['cons']) # the casting is a hack to avoid the index f*****g up test_data['cons'] = np.array(df.ix[test_ts[0]:test_ts[-1]]['cons']) fit_data['cons24hbefore'] = np.array( df.ix[fit_ts[0] + dt.timedelta(days=-1):fit_ts[-1] + dt.timedelta(days=-1)]['cons']) vali_data['cons24hbefore'] = np.array( df.ix[vali_ts[0] + dt.timedelta(days=-1):vali_ts[-1] + dt.timedelta(days=-1)] ['cons']) # the casting is a hack to avoid the index f*****g up test_data['cons24hbefore'] = np.array( df.ix[test_ts[0] + dt.timedelta(days=-1):test_ts[-1] + dt.timedelta(days=-1)]['cons']) for v in weathervars: fit_data['%s24hdiff'%v] = ens.load_ens_timeseries_as_df(\ ts_start=fit_ts[0],\ ts_end=fit_ts[-1], \ weathervars=[v]).mean(axis=1) \ - ens.load_ens_timeseries_as_df(\ ts_start=fit_ts[0]+dt.timedelta(days=-1),\ ts_end=fit_ts[-1]+dt.timedelta(days=-1), \ weathervars=[v]).mean(axis=1) vali_data['%s24hdiff'%v] = ens.load_ens_timeseries_as_df(\ ts_start=vali_ts[0],\ ts_end=vali_ts[-1], \ weathervars=[v]).mean(axis=1) \ - ens.load_ens_timeseries_as_df(\ ts_start=vali_ts[0]+dt.timedelta(days=-1),\ ts_end=vali_ts[-1]+dt.timedelta(days=-1), \ weathervars=[v]).mean(axis=1) test_data['%s24hdiff'%v] = ens.load_ens_timeseries_as_df(\ ts_start=test_ts[0],\ ts_end=test_ts[-1], \ weathervars=[v]).mean(axis=1) \ - ens.load_ens_timeseries_as_df(\ ts_start=test_ts[0]+dt.timedelta(days=-1),\ ts_end=test_ts[-1]+dt.timedelta(days=-1), \ weathervars=[v]).mean(axis=1) for d, t in zip([fit_data, vali_data, test_data], [fit_ts, vali_ts, test_ts]): d.set_index(pd.DatetimeIndex(t), inplace=True) all_data = pd.concat([fit_data, vali_data, test_data]) return fit_data, vali_data, test_data, all_data