def plot_const_vs_dym_cap(mass_flow_full_cap_cw): plt.close('all') mass_flow_100pct_cap = mass_flow_full_cap_cw / specific_heat_water combined_conf_int = np.load('combined_conf_int.npz')['combined_conf_int'] prod = np.concatenate([ sq.fetch_production(ts1[0], ts1[-1]), sq.fetch_production(ts2[0], ts2[-1]) ]) PTM_const = PumpToyModel(delivered_heat=prod, mass_flow_cap=mass_flow_100pct_cap * mass_flow_cap_pct_of_full * np.ones_like(prod)) PTM_const.calc_mass_flow_and_T_sup() PTM_dyn = PumpToyModel(delivered_heat=prod, mass_flow_cap=mass_flow_100pct_cap * (1 - (1 - mass_flow_cap_pct_of_full) * combined_conf_int / combined_conf_int.max()) * np.ones_like(prod)) PTM_dyn.calc_mass_flow_and_T_sup() plt.figure(figsize=(20, 10)) plt.subplot(3, 1, 1) plt.plot_date(all_ts, PTM_const.mass_flow, 'r-', label='"Massflow" const cap') plt.plot_date(all_ts, PTM_dyn.mass_flow, 'g-', label='"Massflow" dyn cap') plt.plot_date(all_ts, PTM_const.mass_flow_cap, 'k--', label='Constant cap') plt.plot_date(all_ts, PTM_dyn.mass_flow_cap, 'y--', label='Dynamic cap') plt.ylabel("Massflow [kg/hour]") plt.legend(loc=4) plt.subplot(3, 1, 2) plt.plot_date(all_ts, PTM_const.T_sup, 'r-', label='T_sup constant cap') plt.plot_date(all_ts, PTM_dyn.T_sup, 'g-', label='T_sup dynamic cap') plt.ylabel('T_sup [degree C]') plt.legend() plt.subplot(3, 1, 3) reduced_heat_loss_pct = (np.array(PTM_dyn.T_sup) - T_grnd) / (np.array(PTM_const.T_sup) - T_grnd) plt.plot_date(all_ts, reduced_heat_loss_pct, 'r') hours_with_reduced_heat_loss = len(np.where(reduced_heat_loss_pct != 1)[0]) average_heat_loss_reduction = reduced_heat_loss_pct[np.where( reduced_heat_loss_pct != 1)].mean() estimated_savings_MWh = 571e3 * (1 - average_heat_loss_reduction) * float( hours_with_reduced_heat_loss) / ( 365 * 24 ) # the 571e3 MWh corresponds toe 19% of the total annual producion estimated_savings_DKK = estimated_savings_MWh * mean_price plt.text(dt.datetime(2015,12,17,12), 0.98,\ "Hours with reduced heat loss: %i\nEstimated saved: %2.1f MWh\nEstimated saved: %2.2f DKK"\ %(hours_with_reduced_heat_loss, estimated_savings_MWh, estimated_savings_DKK)) plt.ylabel('Heatloss_const/heatloss_dyn') plt.savefig('figures/toymodel/const_vs_dyn_cap%i.pdf' % mass_flow_full_cap_cw)
def gen_ens_dfs(ts_start, ts_end, varnames, timeshifts, pointcode=71699): """ timeshifts must be integer number of hours. Posetive values only, dataframe contains columns with the variables minus their value 'timeshift' hours before. """ df = pd.DataFrame() df_s = [pd.DataFrame() for i in range(25)] for timeshift in timeshifts: prod_before = sq.fetch_production(h_hoursbefore(ts_start, timeshift),\ h_hoursbefore(ts_end, timeshift)) for df in df_s: df['prod%ihbefore'%timeshift] = prod_before for v in varnames: ens_data = ens.load_ens_avail_at10_series(ts_start, ts_end, v, pointcode=71699) ens_data_before = ens.load_ens_avail_at10_series(h_hoursbefore(ts_start, timeshift),\ h_hoursbefore(ts_end, timeshift), v, pointcode=71699) diff = ens_data - ens_data_before for i in range(ens_data.shape[1]): df_s[i]['%s%ihdiff%i'%(v,timeshift, i)] = diff[:,i] for v in varnames: ens_data = ens.load_ens_avail_at10_series(ts_start, ts_end, v, pointcode=71699) for i in range(ens_data.shape[1]): df_s[i]['%s%i'%(v, i)] = ens_data[:,i] for df in df_s: df['prod24or48hbefore'] = most_recent_avail_prod return df_s
def plot_const_vs_dym_cap(mass_flow_full_cap_cw): plt.close('all') mass_flow_100pct_cap = mass_flow_full_cap_cw/specific_heat_water combined_conf_int = np.load('combined_conf_int.npz')['combined_conf_int'] prod = np.concatenate([sq.fetch_production(ts1[0], ts1[-1]), sq.fetch_production(ts2[0], ts2[-1])]) PTM_const = PumpToyModel(delivered_heat=prod, mass_flow_cap=mass_flow_100pct_cap*mass_flow_cap_pct_of_full*np.ones_like(prod)) PTM_const.calc_mass_flow_and_T_sup() PTM_dyn = PumpToyModel(delivered_heat=prod, mass_flow_cap=mass_flow_100pct_cap*(1-(1-mass_flow_cap_pct_of_full)*combined_conf_int/combined_conf_int.max())*np.ones_like(prod)) PTM_dyn.calc_mass_flow_and_T_sup() plt.figure(figsize=(20,10)) plt.subplot(3,1,1) plt.plot_date(all_ts, PTM_const.mass_flow, 'r-', label='"Massflow" const cap') plt.plot_date(all_ts, PTM_dyn.mass_flow, 'g-', label='"Massflow" dyn cap') plt.plot_date(all_ts, PTM_const.mass_flow_cap, 'k--', label='Constant cap') plt.plot_date(all_ts, PTM_dyn.mass_flow_cap, 'y--', label='Dynamic cap') plt.ylabel("Massflow [kg/hour]") plt.legend(loc=4) plt.subplot(3,1,2) plt.plot_date(all_ts, PTM_const.T_sup, 'r-', label='T_sup constant cap') plt.plot_date(all_ts, PTM_dyn.T_sup, 'g-', label='T_sup dynamic cap') plt.ylabel('T_sup [degree C]') plt.legend() plt.subplot(3,1,3) reduced_heat_loss_pct = (np.array(PTM_dyn.T_sup) - T_grnd)/(np.array(PTM_const.T_sup) - T_grnd) plt.plot_date(all_ts, reduced_heat_loss_pct, 'r') hours_with_reduced_heat_loss = len(np.where(reduced_heat_loss_pct!=1)[0]) average_heat_loss_reduction = reduced_heat_loss_pct[np.where(reduced_heat_loss_pct!=1)].mean() estimated_savings_MWh = 571e3*(1-average_heat_loss_reduction)*float(hours_with_reduced_heat_loss)/(365*24) # the 571e3 MWh corresponds toe 19% of the total annual producion estimated_savings_DKK = estimated_savings_MWh*mean_price plt.text(dt.datetime(2015,12,17,12), 0.98,\ "Hours with reduced heat loss: %i\nEstimated saved: %2.1f MWh\nEstimated saved: %2.2f DKK"\ %(hours_with_reduced_heat_loss, estimated_savings_MWh, estimated_savings_DKK)) plt.ylabel('Heatloss_const/heatloss_dyn') plt.savefig('figures/toymodel/const_vs_dyn_cap%i.pdf'%mass_flow_full_cap_cw)
def repack_ens_mean_as_df(ts_start=dt.datetime(2015,12,17,1), ts_end=dt.datetime(2016,1,15,0),\ load_path='time_series/ens_means/', pointcode=71699): load_suffix = ''.join(['_geo', str(pointcode), '_', timestamp_str(ts_start), \ '_to_', timestamp_str(ts_end), '.npy']) weathervars = ['Tout', 'hum', 'vWind', 'sunRad'] allvars = weathervars + [v + 'avg24' for v in weathervars] data_dict = {v:np.load(load_path + v + load_suffix) for v in allvars} data_dict['prod'] = sq.fetch_production(ts_start, ts_end) data_dict['(Tout-17)*vWind'] = (data_dict['Tout']-17)*data_dict['vWind'] data_dict['(Toutavg-17)*vWindavg24'] = (data_dict['Toutavg24']-17)*data_dict['vWindavg24'] dataframe = pd.DataFrame(data_dict) return dataframe
def gen_fit_df(ts_start, ts_end, varnames, timeshifts, pointcode=71699): """ timeshifts must be integer number of hours. Posetive values only, dataframe contains columns with the variables minus their value 'timeshift' hours before. """ df = pd.DataFrame() df['prod'] = sq.fetch_production(ts_start, ts_end) for timeshift in timeshifts: df['prod%ihbefore'%timeshift] = sq.fetch_production(h_hoursbefore(ts_start, timeshift),\ h_hoursbefore(ts_end, timeshift)) for v in varnames: ens_mean = ens.load_ens_mean_avail_at10_series(v, ts_start, ts_end, pointcode=71699) ens_mean_before = ens.load_ens_mean_avail_at10_series(v,\ h_hoursbefore(ts_start, timeshift),\ h_hoursbefore(ts_end, timeshift),\ pointcode=71699) df['%s%ihdiff'%(v,timeshift)] = ens_mean - ens_mean_before return df
def gen_fit_df(ts_start, ts_end, varnames, timeshifts, pointcode=71699): """ timeshifts must be integer number of hours. Posetive values only, dataframe contains columns with the variables minus their value 'timeshift' hours before. """ df = pd.DataFrame() df['prod'] = sq.fetch_production(ts_start, ts_end) for timeshift in timeshifts: df['prod%ihbefore'%timeshift] = sq.fetch_production(h_hoursbefore(ts_start, timeshift),\ h_hoursbefore(ts_end, timeshift)) for v in varnames: ens_mean = ens.load_ens_mean_avail_at10_series(v, ts_start, ts_end, pointcode=71699) ens_mean_before = ens.load_ens_mean_avail_at10_series(v,\ h_hoursbefore(ts_start, timeshift),\ h_hoursbefore(ts_end, timeshift),\ pointcode=71699) df['%s%ihdiff' % (v, timeshift)] = ens_mean - ens_mean_before return df
def gen_ens_df(ts_start, ts_end, varnames, timeshifts, pointcode=71699): """ timeshifts must be integer number of hours. Posetive values only, dataframe contains columns with the variables minus their value 'timeshift' hours before. """ df = pd.DataFrame() df['prod'] = sq.fetch_production(ts_start, ts_end) for timeshift in timeshifts: df['prod%ihbefore'%timeshift] = sq.fetch_production(h_hoursbefore(ts_start, timeshift),\ h_hoursbefore(ts_end, timeshift)) for v in varnames: ens_data = ens.load_ens_avail_at10_series(ts_start, ts_end, v, pointcode=71699) ens_data_before = ens.load_ens_avail_at10_series(h_hoursbefore(ts_start, timeshift),\ h_hoursbefore(ts_end, timeshift), v, pointcode=71699) diff = ens_data - ens_data_before for i in range(ens_data.shape[1]): df['%s%ihdiff%i' % (v, timeshift, i)] = diff[:, i] return df
def corr_coeff_plot(): plt.close('all') start_stop=(dt.datetime(2015,12,17,1), dt.datetime(2016,1,15,0)) load_suffix = '_geo71699_2015121701_to_2016011500.npy' load_path = 'time_series/ens_means/' allvars = weathervars + [v + 'avg24' for v in weathervars] data_dict = {v:np.load(load_path + v + load_suffix) for v in allvars} data_dict['prod'] = sq.fetch_production(start_stop[0], start_stop[1]) data_dict['(Tout-17)*vWind'] = (data_dict['Tout']-17)*data_dict['vWind'] data_dict['(Toutavg-17)*vWindavg24'] = (data_dict['Toutavg24']-17)*data_dict['vWindavg24'] dataframe = pd.DataFrame(data_dict) sns.heatmap(dataframe.corr()) return dataframe
def corr_coeff_plot(): plt.close('all') start_stop = (dt.datetime(2015, 12, 17, 1), dt.datetime(2016, 1, 15, 0)) load_suffix = '_geo71699_2015121701_to_2016011500.npy' load_path = 'time_series/ens_means/' allvars = weathervars + [v + 'avg24' for v in weathervars] data_dict = {v: np.load(load_path + v + load_suffix) for v in allvars} data_dict['prod'] = sq.fetch_production(start_stop[0], start_stop[1]) data_dict['(Tout-17)*vWind'] = (data_dict['Tout'] - 17) * data_dict['vWind'] data_dict['(Toutavg-17)*vWindavg24'] = (data_dict['Toutavg24'] - 17) * data_dict['vWindavg24'] dataframe = pd.DataFrame(data_dict) sns.heatmap(dataframe.corr()) return dataframe
def create_5_fold_scatter(avg24=False): plt.close('all') start_stop=(dt.datetime(2015,12,17,1), dt.datetime(2016,1,15,0)) load_suffix = '_geo71699_2015121701_to_2016011500.npy' figfilename = 'prod_weather_pairplot.pdf' if avg24: load_suffix = 'avg24' + load_suffix figfilename = 'avg24_' + figfilename load_path = 'time_series/ens_means/' data_dict = {v:np.load(load_path + v + load_suffix) for v in weathervars} data_dict['prod'] = sq.fetch_production(start_stop[0], start_stop[1]) data_dict['(Tout-17)*vWind'] = (data_dict['Tout']-17)*data_dict['vWind'] dataframe = pd.DataFrame(data_dict) sns.pairplot(dataframe) plt.savefig('figures/' + figfilename)
def validate_prod24h_before_and_diffsmodel(): plt.close('all') cols = ['prod24h_before', 'Tout24hdiff', 'vWind24hdiff', 'sunRad24hdiff'] ts_start = dt.datetime(2016, 1, 20, 1) ts_end = dt.datetime(2016, 1, 31, 0) validation_data = ens.repack_ens_mean_as_df(ts_start, ts_end) # correct error in production: new_val = (validation_data['prod'][116] + validation_data['prod'][116]) / 2 validation_data['prod'][116] = new_val validation_data['prod'][117] = new_val validation_data['prod24h_before'] = sq.fetch_production( ts_start + dt.timedelta(days=-1), ts_end + dt.timedelta(days=-1)) validation_data['prod24h_before'][116 + 24] = new_val validation_data['prod24h_before'][117 + 24] = new_val Tout24h_before = ens.load_ens_timeseries_as_df(ts_start+dt.timedelta(days=-1),\ ts_end+dt.timedelta(days=-1), weathervars=['Tout']).mean(axis=1) vWind24h_before = ens.load_ens_timeseries_as_df(ts_start+dt.timedelta(days=-1),\ ts_end+dt.timedelta(days=-1), weathervars=['vWind']).mean(axis=1) sunRad24h_before = ens.load_ens_timeseries_as_df(ts_start+dt.timedelta(days=-1),\ ts_end+dt.timedelta(days=-1), weathervars=['sunRad']).mean(axis=1) validation_data['Tout24hdiff'] = validation_data['Tout'] - Tout24h_before validation_data[ 'vWind24hdiff'] = validation_data['vWind'] - vWind24h_before validation_data[ 'sunRad24hdiff'] = validation_data['sunRad'] - sunRad24h_before # fit on fit area X = all_data[cols] res = mlin_regression(all_data['prod'], X, add_const=False) #apply to validation area weather_model = linear_map(validation_data, res.params, cols) timesteps = ens.gen_hourly_timesteps(ts_start, ts_end) plt.plot_date(timesteps, validation_data['prod'], 'b-') plt.plot_date(timesteps, weather_model, 'r-') residual = weather_model - validation_data['prod'] return validation_data, res, residual
def create_5_fold_scatter(avg24=False): plt.close('all') start_stop = (dt.datetime(2015, 12, 17, 1), dt.datetime(2016, 1, 15, 0)) load_suffix = '_geo71699_2015121701_to_2016011500.npy' figfilename = 'prod_weather_pairplot.pdf' if avg24: load_suffix = 'avg24' + load_suffix figfilename = 'avg24_' + figfilename load_path = 'time_series/ens_means/' data_dict = {v: np.load(load_path + v + load_suffix) for v in weathervars} data_dict['prod'] = sq.fetch_production(start_stop[0], start_stop[1]) data_dict['(Tout-17)*vWind'] = (data_dict['Tout'] - 17) * data_dict['vWind'] dataframe = pd.DataFrame(data_dict) sns.pairplot(dataframe) plt.savefig('figures/' + figfilename)
def validate_prod24h_before_and_diffsmodel(): plt.close('all') cols = ['prod24h_before', 'Tout24hdiff', 'vWind24hdiff', 'sunRad24hdiff'] ts_start = dt.datetime(2016,1,20,1) ts_end = dt.datetime(2016,1,31,0) validation_data = ens.repack_ens_mean_as_df(ts_start, ts_end) # correct error in production: new_val = (validation_data['prod'][116] +validation_data['prod'][116])/2 validation_data['prod'][116] = new_val validation_data['prod'][117] = new_val validation_data['prod24h_before'] = sq.fetch_production(ts_start+dt.timedelta(days=-1), ts_end+dt.timedelta(days=-1)) validation_data['prod24h_before'][116+24] = new_val validation_data['prod24h_before'][117+24] = new_val Tout24h_before = ens.load_ens_timeseries_as_df(ts_start+dt.timedelta(days=-1),\ ts_end+dt.timedelta(days=-1), weathervars=['Tout']).mean(axis=1) vWind24h_before = ens.load_ens_timeseries_as_df(ts_start+dt.timedelta(days=-1),\ ts_end+dt.timedelta(days=-1), weathervars=['vWind']).mean(axis=1) sunRad24h_before = ens.load_ens_timeseries_as_df(ts_start+dt.timedelta(days=-1),\ ts_end+dt.timedelta(days=-1), weathervars=['sunRad']).mean(axis=1) validation_data['Tout24hdiff'] = validation_data['Tout'] - Tout24h_before validation_data['vWind24hdiff'] = validation_data['vWind'] - vWind24h_before validation_data['sunRad24hdiff'] = validation_data['sunRad'] - sunRad24h_before # fit on fit area X = all_data[cols] res = mlin_regression(all_data['prod'], X, add_const=False) #apply to validation area weather_model = linear_map(validation_data, res.params, cols) timesteps = ens.gen_hourly_timesteps(ts_start, ts_end) plt.plot_date(timesteps, validation_data['prod'],'b-') plt.plot_date(timesteps, weather_model,'r-') residual = weather_model - validation_data['prod'] return validation_data, res, residual
from itertools import combinations import statsmodels.api as sm from statsmodels.graphics.gofplots import qqplot import datetime as dt from statsmodels.sandbox.regression.predstd import wls_prediction_std import numpy as np import matplotlib.pyplot as plt import pandas as pd all_data = ens.repack_ens_mean_as_df() hours = [np.mod(h, 24) for h in range(1,697)] all_data['prod24h_before'] = sq.fetch_production(dt.datetime(2015,12,16,1), dt.datetime(2016,1,14,0)) all_data['(Tout-17)*vWind*hum'] = all_data['(Tout-17)*vWind']*all_data['hum'] all_data['(Toutavg24-17)*vWindavg24*humavg24'] = all_data['(Toutavg-17)*vWindavg24']*all_data['humavg24'] all_data['Tout24hdiff'] = all_data['Tout'] - np.roll(all_data['Tout'], 24) Tout24h_before = ens.load_ens_timeseries_as_df(ts_start=dt.datetime(2015,12,16,1),\ ts_end=dt.datetime(2016,1,14,0), weathervars=['Tout']).mean(axis=1) vWind24h_before = ens.load_ens_timeseries_as_df(ts_start=dt.datetime(2015,12,16,1),\ ts_end=dt.datetime(2016,1,14,0), weathervars=['vWind']).mean(axis=1) sunRad24h_before = ens.load_ens_timeseries_as_df(ts_start=dt.datetime(2015,12,16,1),\ ts_end=dt.datetime(2016,1,14,0), weathervars=['sunRad']).mean(axis=1) hum24h_before = ens.load_ens_timeseries_as_df(ts_start=dt.datetime(2015,12,16,1),\ ts_end=dt.datetime(2016,1,14,0), weathervars=['hum']).mean(axis=1) all_data['Tout24hdiff'] = all_data['Tout'] - Tout24h_before all_data['vWind24hdiff'] = all_data['vWind'] - vWind24h_before all_data['sunRad24hdiff'] = all_data['sunRad'] - sunRad24h_before
def production_model(): # figure 3 plt.close('all') cols = ['prod24h_before', 'Tout24hdiff', 'vWind24hdiff', 'sunRad24hdiff'] ts1 = ens.gen_hourly_timesteps(dt.datetime(2015,12,17,1), dt.datetime(2016,1,15,0)) ts2 = ens.gen_hourly_timesteps(dt.datetime(2016,1,20,1), dt.datetime(2016,2,5,0)) #load the data fit_data = ens.repack_ens_mean_as_df() fit_data['prod24h_before'] = sq.fetch_production(ts1[0]+dt.timedelta(days=-1), ts1[-1]+dt.timedelta(days=-1)) fit_data['Tout24hdiff'] = fit_data['Tout'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts1[0]+dt.timedelta(days=-1),\ ts_end=ts1[-1]+dt.timedelta(days=-1), \ weathervars=['Tout']).mean(axis=1) fit_data['vWind24hdiff'] = fit_data['vWind'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts1[0]+dt.timedelta(days=-1),\ ts_end=ts1[-1]+dt.timedelta(days=-1), \ weathervars=['vWind']).mean(axis=1) fit_data['sunRad24hdiff'] = fit_data['sunRad'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts1[0]+dt.timedelta(days=-1),\ ts_end=ts1[-1]+dt.timedelta(days=-1), \ weathervars=['sunRad']).mean(axis=1) vali_data = ens.repack_ens_mean_as_df(ts2[0], ts2[-1]) vali_data['prod24h_before'] = sq.fetch_production(ts2[0]+dt.timedelta(days=-1), ts2[-1]+dt.timedelta(days=-1)) vali_data['Tout24hdiff'] = vali_data['Tout'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts2[0]+dt.timedelta(days=-1),\ ts_end=ts2[-1]+dt.timedelta(days=-1), \ weathervars=['Tout']).mean(axis=1) vali_data['vWind24hdiff'] = vali_data['vWind'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts2[0]+dt.timedelta(days=-1),\ ts_end=ts2[-1]+dt.timedelta(days=-1), \ weathervars=['vWind']).mean(axis=1) vali_data['sunRad24hdiff'] = vali_data['sunRad'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts2[0]+dt.timedelta(days=-1),\ ts_end=ts2[-1]+dt.timedelta(days=-1), \ weathervars=['sunRad']).mean(axis=1) # correct error in production: new_val = (vali_data['prod'][116] +vali_data['prod'][116])/2 vali_data['prod'][116] = new_val vali_data['prod'][117] = new_val vali_data['prod24h_before'][116+24] = new_val vali_data['prod24h_before'][117+24] = new_val # do the fit X = fit_data[cols] y = fit_data['prod'] res = mlin_regression(y, X, add_const=False) fig, [ax1, ax2] = plt.subplots(2, 1, sharex=True, figsize=(dcolwidth, 0.57*dcolwidth), gridspec_kw={'height_ratios':[4,1]}) # load ensemble data ens_data1 = ens.load_ens_timeseries_as_df(ts_start=ts1[0], ts_end=ts1[-1],\ weathervars=['Tout', 'vWind', 'sunRad']) ens_data1['prod24h_before'] = fit_data['prod24h_before'] ens_data1_24h_before = ens.load_ens_timeseries_as_df(\ ts_start=ts1[0]+dt.timedelta(days=-1),\ ts_end=ts1[-1]+dt.timedelta(days=-1), \ weathervars=['Tout', 'vWind', 'sunRad']) ens_data2 = ens.load_ens_timeseries_as_df(ts_start=ts2[0], ts_end=ts2[-1],\ weathervars=['Tout', 'vWind', 'sunRad']) ens_data2['prod24h_before'] = vali_data['prod24h_before'] ens_data2_24h_before = ens.load_ens_timeseries_as_df(\ ts_start=ts2[0]+dt.timedelta(days=-1),\ ts_end=ts2[-1]+dt.timedelta(days=-1), \ weathervars=['Tout', 'vWind', 'sunRad']) for i in range(25): for v in ['Tout', 'vWind', 'sunRad']: key_raw = v + str(i) key_diff = v + '24hdiff' + str(i) ens_data1[key_diff] = ens_data1[key_raw] - ens_data1_24h_before[key_raw] ens_data2[key_diff] = ens_data2[key_raw] - ens_data2_24h_before[key_raw] all_ens_data = pd.concat([ens_data1, ens_data2]) all_ts = ts1 + ts2 # # # calculate production for each ensemble member ens_prods = np.zeros((len(all_ts), 25)) for i in range(25): ens_cols = ['Tout24hdiff' + str(i), 'vWind24hdiff' + str(i),\ 'sunRad24hdiff' + str(i), 'prod24h_before'] ens_params = pd.Series({'Tout24hdiff' + str(i):res.params['Tout24hdiff'], 'vWind24hdiff' + str(i):res.params['vWind24hdiff'], 'sunRad24hdiff' + str(i):res.params['sunRad24hdiff'], 'prod24h_before':res.params['prod24h_before']}) ens_prods[:,i] = linear_map(all_ens_data, ens_params, ens_cols) # calculate combined confint ens_std = ens_prods.std(axis=1) vali_resid = linear_map(vali_data, res.params, cols) - vali_data['prod'] vali_resid_corrig = vali_resid - np.sign(vali_resid)*1.9599*ens_std[len(ts1):] #mean_conf_int_spread = (vali_resid_corrig.quantile(0.95) - vali_resid_corrig.quantile(0.05))/2 # this conf_int is not used anymore fit_resid = res.resid fit_resid_corrig = fit_resid - np.sign(fit_resid)*1.9599*ens_std[0:len(ts1)] conf_int_spread_lower = - fit_resid_corrig.quantile(0.025) conf_int_spread_higher = fit_resid_corrig.quantile(0.975) combined_conf_ints = conf_int_spread_lower + conf_int_spread_higher + 2*1.9599*ens_std all_prod_model = np.concatenate([res.fittedvalues, linear_map(vali_data, res.params, cols)]) combined_ub95 = all_prod_model + conf_int_spread_higher + 1.9599*ens_std combined_lb95 = all_prod_model - (conf_int_spread_lower + 1.9599*ens_std) # plot confint ax1.fill_between(all_ts[len(ts1):], combined_lb95[len(ts1):], combined_ub95[len(ts1):], label='95% prediction intervals') ax1.fill_between(all_ts[len(ts1):], all_prod_model[len(ts1):] - 1.9599*ens_std[len(ts1):], all_prod_model[len(ts1):] + 1.9599*ens_std[len(ts1):], facecolor='grey', label='Weather ensemble 95% conf. int.') # plot ensempble models ax1.plot_date(all_ts[len(ts1):], ens_prods[len(ts1):], '-', lw=0.5) ax1.plot_date(ts2, vali_data['prod'], 'k-', lw=2, label='Historical production') ax1.plot_date(ts2, linear_map(vali_data, res.params, cols), '-', c=red, lw=2, label='Production model') ax1.set_ylabel('Production [MW]', size=8) ax1.tick_params(axis='both', which='major', labelsize=8) ax1.xaxis.set_major_formatter(DateFormatter('%b %d') ) ax1.legend(loc=1, prop={'size':8}) ax1.set_ylim([300,1100]) N = conf_int_spread_higher + 1.9599*ens_std[len(ts1):].max() ax2.fill_between(ts2, -(1.9599*ens_std[len(ts1):]+conf_int_spread_lower)/N, -1.9599*ens_std[len(ts1):]/N, alpha=0.5) ax2.fill_between(ts2, -1.9599*ens_std[len(ts1):]/N, np.zeros(len(ts2)), facecolor='grey',alpha=0.5) ax2.fill_between(ts2, 1.9599*ens_std[len(ts1):]/N, facecolor='grey') ax2.fill_between(ts2, 1.9599*ens_std[len(ts1):]/N, (conf_int_spread_higher+1.9599*ens_std[len(ts1):])/N) ax2.set_ylabel('Prediction intervals \n[normalized]', size=8) ax2.tick_params(axis='y', which='major', labelsize=8) ax2.set_xlim(dt.datetime(2016,1,20,0), dt.datetime(2016,2,5,0)) fig.tight_layout() print "Min_normalized pos conf bound. ", np.min(1.9599*ens_std[len(ts1):]/N+conf_int_spread_higher/N) print "MAE = " + str(mae(vali_resid)) print "MAPE = " + str(mape(vali_resid, vali_data['prod'])) print "RMSE = " + str(rmse(vali_resid)) print "ME = " + str(np.mean(vali_resid)) print "MAE (fit) = " + str(mae(res.resid)) print "MAPE (fit) = " + str(mape(res.resid, fit_data['prod'])) print "RMSE (fit)= " + str(rmse(res.resid)) print "ME (fit)= " + str(np.mean(res.resid)) print "Width of const blue bands (MW)", conf_int_spread_lower, conf_int_spread_higher plt.savefig('Q:/Projekter/Ens Article 1/figures/production_model.pdf', dpi=400) EO3_fc1 = sq.fetch_EO3_midnight_forecast(ts1[0], ts1[-1]) EO3_fc2 = sq.fetch_EO3_midnight_forecast(ts2[0], ts2[-1]) EO3_err = EO3_fc2-vali_data['prod'] EO3_err_fit = EO3_fc1-fit_data['prod'] print "MAE (EO3) = " + str(mae(EO3_err)) print "MAPE (EO3) = " + str(mape(EO3_err, vali_data['prod'])) print "RMSE (EO3)= " + str(rmse(EO3_err)) print "ME (EO3)= " + str(np.mean(EO3_err)) print "MAE (EO3_fit) = " + str(mae(EO3_err_fit)) print "MAPE (EO3_fit) = " + str(mape(EO3_err_fit, fit_data['prod'])) print "RMSE (EO3_fit)= " + str(rmse(EO3_err_fit)) print "ME (EO3_fit)= " + str(np.mean(EO3_err_fit)) print np.min(combined_conf_ints[len(ts1):]/combined_conf_ints.max()) np.savez('combined_conf_int', combined_conf_int=(conf_int_spread_higher+1.9599*ens_std), timesteps=all_ts) print "Corr coeff: vali ", np.corrcoef(vali_data['prod'],linear_map(vali_data, res.params, cols))[0,1] print "Corr coeff: vali EO3 ", np.corrcoef(vali_data['prod'], EO3_fc2)[0,1] print "Corr coeff: fit ", np.corrcoef(fit_data['prod'],res.fittedvalues)[0,1] print "Corr coeff: fit EO3 ", np.corrcoef(fit_data['prod'], EO3_fc1)[0,1] print "% of actual production in vali period above upper", float(len(np.where(vali_data['prod']>(conf_int_spread_higher+1.9599*ens_std[len(ts1):]+linear_map(vali_data, res.params, cols)))[0]))/len(ts2) print "plus minus: ", 0.5/len(ts2) print "% of actual production in vali period below lower", float(len(np.where(vali_data['prod']<(linear_map(vali_data, res.params, cols)-(conf_int_spread_lower+1.9599*ens_std[len(ts1):])))[0]))/len(ts2) print "plus minus: ", 0.5/len(ts2) return res, fit_data
X = pd.read_pickle('48h60h168h_lagged_X.pkl' ) # run model_selection_ext_horizon to generate these files y = pd.read_pickle('prod_to_gowith.pkl') # add more predictor data: for v in ['Tout', 'vWind', 'hum', 'sunRad']: X[v] = ens.load_ens_mean_avail_at10_series(v, ts[0], ts[-1], pointcode=71699) #X['weekdays'] = [t.weekday() for t in ts] def h_hoursbefore(timestamp, h): return timestamp + dt.timedelta(hours=-h) most_recent_avail_prod = sq.fetch_production(h_hoursbefore(ts[0], 24),\ h_hoursbefore(ts[-1], 24)) for i, t, p48 in zip(range(len(most_recent_avail_prod)), ts, X['prod48hbefore']): if t.hour > 8 or t.hour == 0: most_recent_avail_prod[i] = p48 X['prod24or48hbefore'] = most_recent_avail_prod ## X_scaler = StandardScaler(copy=True, with_mean=True, with_std=True).fit(X) X_scaled = X_scaler.transform(X) y_scaler = StandardScaler(copy=True, with_mean=True, with_std=True).fit(y) y_scaled = y_scaler.transform(y) #%%
def first_ens_prod_fig(): """ This plot is based on a production model taking into account: Tout, vWind and the production 24 hours before """ plt.close('all') cols = ['Tout', 'vWind', 'prod24h_before'] ts1 = ens.gen_hourly_timesteps(dt.datetime(2015,12,17,1), dt.datetime(2016,1,15,0)) ts2 = ens.gen_hourly_timesteps(dt.datetime(2016,1,20,1), dt.datetime(2016,1,28,0)) #load the data fit_data = ens.repack_ens_mean_as_df() fit_data['prod24h_before'] = sq.fetch_production(dt.datetime(2015,12,16,1), dt.datetime(2016,1,14,0)) vali_data = ens.repack_ens_mean_as_df(dt.datetime(2016,1,20,1), dt.datetime(2016,1,28,0)) vali_data['prod24h_before'] = sq.fetch_production(dt.datetime(2016,1,19,1), dt.datetime(2016,1,27,0)) # do the fit X = fit_data[cols] y = fit_data['prod'] res = mlin_regression(y, X, add_const=True) fig, [ax1, ax2] = plt.subplots(2,1, figsize=(40,20)) # load ensemble data ens_data1 = ens.load_ens_timeseries_as_df(ts_start=ts1[0], ts_end=ts1[-1]) ens_data1['prod24h_before'] = fit_data['prod24h_before'] ens_data2 = ens.load_ens_timeseries_as_df(ts_start=ts2[0], ts_end=ts2[-1]) ens_data2['prod24h_before'] = vali_data['prod24h_before'] all_ens_data = pd.concat([ens_data1, ens_data2]) all_ts = ts1 + ts2 # calculate production for each ensemble member ens_prods = np.zeros((len(all_ts), 25)) for i in range(25): ens_cols = ['Tout' + str(i), 'vWind' + str(i), 'prod24h_before'] ens_params = pd.Series({'Tout' + str(i):res.params['Tout'], 'vWind' + str(i):res.params['vWind'], 'const':res.params['const'], 'prod24h_before':res.params['prod24h_before']}) ens_prods[:,i] = linear_map(all_ens_data, ens_params, ens_cols) # calculate combined confint prstd, iv_l, iv_u = wls_prediction_std(res) mean_conf_int_spread = np.mean(res.fittedvalues - iv_l) model_std = np.concatenate([prstd, (1./1.9599)*mean_conf_int_spread*np.ones(len(ts2))]) ens_std = ens_prods.std(axis=1) combined_std = np.sqrt(model_std**2 + ens_std**2) all_prod_model = np.concatenate([res.fittedvalues, linear_map(vali_data, res.params, cols)]) combined_ub95 = all_prod_model + 1.9599*combined_std combined_lb95 = all_prod_model - 1.9599*combined_std # plot confint ax1.fill_between(all_ts, combined_lb95, combined_ub95, label='Combined 95% conf. int.') ax1.fill_between(all_ts, all_prod_model - 1.9599*ens_std, all_prod_model + 1.9599*ens_std, facecolor='grey', label='Ensemble 95% conf. int.') # plot ensempble models ax1.plot_date(all_ts, ens_prods, '-', lw=0.5) ax1.plot_date(ts1, y, 'k-', lw=2, label='Actual production') ax1.plot_date(ts1, res.fittedvalues,'r-', lw=2, label='Model on ensemble mean') ax1.plot_date(ts2, vali_data['prod'], 'k-', lw=2, label='') ax1.plot_date(ts2, linear_map(vali_data, res.params, cols), 'r-', lw=2) ax1.set_ylabel('[MW]') ax1.legend(loc=2) vali_resid = linear_map(vali_data, res.params, cols) - vali_data['prod'] ax2.plot_date(ts1, res.resid, '-', label='Residual, fitted data') ax2.plot_date(ts2, vali_resid, '-', label='Residual, validation data') ax2.set_ylabel('[MW]') ax2.legend(loc=2) print "MAE = " + str(mae(vali_resid)) print "MAPE = " + str(mape(vali_resid, vali_data['prod'])) print "RMSE = " + str(rmse(vali_resid)) print "ME = " + str(np.mean(vali_resid)) print "MAE (fit) = " + str(mae(res.resid)) print "MAPE (fit) = " + str(mape(res.resid, fit_data['prod'])) print "RMSE (fit)= " + str(rmse(res.resid)) print "ME (fit)= " + str(np.mean(res.resid)) plt.savefig('figures/ens_prod_models.pdf', dpi=600) plt.figure() plt.plot_date(all_ts, ens_std) plt.ylabel('Std. of ensemble production models [MW]') plt.savefig('figures/std_ens_prod_models.pdf', dpi=600) sns.jointplot(x=ens_std, y=np.concatenate([res.resid, vali_resid])) return res, all_ens_data, all_ts, fit_data['prod'], vali_data['prod']
def second_ens_prod_fig(): """ This plot is based on a production model taking into account: the production 24 hours before as well as the change in temparature, windspeed and solar radiotion from 24 hours ago to now. """ plt.close('all') cols = ['prod24h_before', 'Tout24hdiff', 'vWind24hdiff', 'sunRad24hdiff'] ts1 = ens.gen_hourly_timesteps(dt.datetime(2015,12,17,1), dt.datetime(2016,1,15,0)) ts2 = ens.gen_hourly_timesteps(dt.datetime(2016,1,20,1), dt.datetime(2016,2,5,0)) #load the data fit_data = ens.repack_ens_mean_as_df() fit_data['prod24h_before'] = sq.fetch_production(ts1[0]+dt.timedelta(days=-1), ts1[-1]+dt.timedelta(days=-1)) fit_data['Tout24hdiff'] = fit_data['Tout'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts1[0]+dt.timedelta(days=-1),\ ts_end=ts1[-1]+dt.timedelta(days=-1), \ weathervars=['Tout']).mean(axis=1) fit_data['vWind24hdiff'] = fit_data['vWind'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts1[0]+dt.timedelta(days=-1),\ ts_end=ts1[-1]+dt.timedelta(days=-1), \ weathervars=['vWind']).mean(axis=1) fit_data['sunRad24hdiff'] = fit_data['sunRad'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts1[0]+dt.timedelta(days=-1),\ ts_end=ts1[-1]+dt.timedelta(days=-1), \ weathervars=['sunRad']).mean(axis=1) vali_data = ens.repack_ens_mean_as_df(ts2[0], ts2[-1]) vali_data['prod24h_before'] = sq.fetch_production(ts2[0]+dt.timedelta(days=-1), ts2[-1]+dt.timedelta(days=-1)) vali_data['Tout24hdiff'] = vali_data['Tout'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts2[0]+dt.timedelta(days=-1),\ ts_end=ts2[-1]+dt.timedelta(days=-1), \ weathervars=['Tout']).mean(axis=1) vali_data['vWind24hdiff'] = vali_data['vWind'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts2[0]+dt.timedelta(days=-1),\ ts_end=ts2[-1]+dt.timedelta(days=-1), \ weathervars=['vWind']).mean(axis=1) vali_data['sunRad24hdiff'] = vali_data['sunRad'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts2[0]+dt.timedelta(days=-1),\ ts_end=ts2[-1]+dt.timedelta(days=-1), \ weathervars=['sunRad']).mean(axis=1) # correct error in production: new_val = (vali_data['prod'][116] +vali_data['prod'][116])/2 vali_data['prod'][116] = new_val vali_data['prod'][117] = new_val vali_data['prod24h_before'][116+24] = new_val vali_data['prod24h_before'][117+24] = new_val # do the fit X = fit_data[cols] y = fit_data['prod'] res = mlin_regression(y, X, add_const=False) fig, [ax1, ax2] = plt.subplots(2,1, figsize=(40,20)) # load ensemble data ens_data1 = ens.load_ens_timeseries_as_df(ts_start=ts1[0], ts_end=ts1[-1],\ weathervars=['Tout', 'vWind', 'sunRad']) ens_data1['prod24h_before'] = fit_data['prod24h_before'] ens_data1_24h_before = ens.load_ens_timeseries_as_df(\ ts_start=ts1[0]+dt.timedelta(days=-1),\ ts_end=ts1[-1]+dt.timedelta(days=-1), \ weathervars=['Tout', 'vWind', 'sunRad']) ens_data2 = ens.load_ens_timeseries_as_df(ts_start=ts2[0], ts_end=ts2[-1],\ weathervars=['Tout', 'vWind', 'sunRad']) ens_data2['prod24h_before'] = vali_data['prod24h_before'] ens_data2_24h_before = ens.load_ens_timeseries_as_df(\ ts_start=ts2[0]+dt.timedelta(days=-1),\ ts_end=ts2[-1]+dt.timedelta(days=-1), \ weathervars=['Tout', 'vWind', 'sunRad']) for i in range(25): for v in ['Tout', 'vWind', 'sunRad']: key_raw = v + str(i) key_diff = v + '24hdiff' + str(i) ens_data1[key_diff] = ens_data1[key_raw] - ens_data1_24h_before[key_raw] ens_data2[key_diff] = ens_data2[key_raw] - ens_data2_24h_before[key_raw] all_ens_data = pd.concat([ens_data1, ens_data2]) all_ts = ts1 + ts2 # # # calculate production for each ensemble member ens_prods = np.zeros((len(all_ts), 25)) for i in range(25): ens_cols = ['Tout24hdiff' + str(i), 'vWind24hdiff' + str(i),\ 'sunRad24hdiff' + str(i), 'prod24h_before'] ens_params = pd.Series({'Tout24hdiff' + str(i):res.params['Tout24hdiff'], 'vWind24hdiff' + str(i):res.params['vWind24hdiff'], 'sunRad24hdiff' + str(i):res.params['sunRad24hdiff'], 'prod24h_before':res.params['prod24h_before']}) ens_prods[:,i] = linear_map(all_ens_data, ens_params, ens_cols) # calculate combined confint ens_std = ens_prods.std(axis=1) vali_resid = linear_map(vali_data, res.params, cols) - vali_data['prod'] vali_resid_corrig = vali_resid - np.sign(vali_resid)*1.9599*ens_std[len(ts1):] mean_conf_int_spread = (vali_resid_corrig.quantile(0.95) - vali_resid_corrig.quantile(0.05))/2 combined_conf_int = mean_conf_int_spread + 1.9599*ens_std all_prod_model = np.concatenate([res.fittedvalues, linear_map(vali_data, res.params, cols)]) combined_ub95 = all_prod_model + combined_conf_int combined_lb95 = all_prod_model - combined_conf_int # plot confint ax1.fill_between(all_ts, combined_lb95, combined_ub95, label='Combined 95% conf. int.') ax1.fill_between(all_ts, all_prod_model - 1.9599*ens_std, all_prod_model + 1.9599*ens_std, facecolor='grey', label='Ensemble 95% conf. int.') # plot ensempble models ax1.plot_date(all_ts, ens_prods, '-', lw=0.5) ax1.plot_date(ts1, y, 'k-', lw=2, label='Actual production') ax1.plot_date(ts1, res.fittedvalues,'r-', lw=2, label='Model on ensemble mean') ax1.plot_date(ts2, vali_data['prod'], 'k-', lw=2, label='') ax1.plot_date(ts2, linear_map(vali_data, res.params, cols), 'r-', lw=2) ax1.set_ylabel('[MW]') ax1.legend(loc=2) ax1.set_ylim([0,1100]) ax2.plot_date(ts1, res.resid, '-', label='Residual, fitted data') ax2.plot_date(ts2, vali_resid, '-', label='Residual, validation data') ax2.set_ylabel('[MW]') ax2.legend(loc=2) ax2.set_ylim([-550, 550]) print "MAE = " + str(mae(vali_resid)) print "MAPE = " + str(mape(vali_resid, vali_data['prod'])) print "RMSE = " + str(rmse(vali_resid)) print "ME = " + str(np.mean(vali_resid)) print "MAE (fit) = " + str(mae(res.resid)) print "MAPE (fit) = " + str(mape(res.resid, fit_data['prod'])) print "RMSE (fit)= " + str(rmse(res.resid)) print "ME (fit)= " + str(np.mean(res.resid)) plt.savefig('figures/ens_prod_models_v2.pdf', dpi=600) plt.figure() plt.plot_date(all_ts, ens_std) plt.ylabel('Std. of ensemble production models [MW]') plt.savefig('figures/std_ens_prod_models.pdf', dpi=600) # vali_ens_std = ens_std[len(ts1):] sns.jointplot(x=pd.Series(vali_ens_std), y=np.abs(vali_resid)) sns.jointplot(x=vali_data['prod'], y=pd.Series(linear_map(vali_data, res.params, cols))) EO3_fc1 = sq.fetch_EO3_midnight_forecast(ts1[0], ts1[-1]) EO3_fc2 = sq.fetch_EO3_midnight_forecast(ts2[0], ts2[-1]) plt.figure() plt.plot_date(ts1, fit_data['prod'], 'k-', label='Actual production') plt.plot_date(ts2, vali_data['prod'], 'k-') plt.plot_date(ts1, EO3_fc1, 'r-', label='EO3 forecast') plt.plot_date(ts2, EO3_fc2, 'r-') EO3_err = EO3_fc2-vali_data['prod'] EO3_err_fit = EO3_fc1-fit_data['prod'] print "MAE (EO3) = " + str(mae(EO3_err)) print "MAPE (EO3) = " + str(mape(EO3_err, vali_data['prod'])) print "RMSE (EO3)= " + str(rmse(EO3_err)) print "ME (EO3)= " + str(np.mean(EO3_err)) print "MAE (EO3_fit) = " + str(mae(EO3_err_fit)) print "MAPE (EO3_fit) = " + str(mape(EO3_err_fit, fit_data['prod'])) print "RMSE (EO3_fit)= " + str(rmse(EO3_err_fit)) print "ME (EO3_fit)= " + str(np.mean(EO3_err_fit)) sns.jointplot(x=pd.Series(vali_ens_std), y=np.abs(EO3_err)) plt.figure(figsize=(20,10)) plt.subplot(2,1,1) plt.plot_date(all_ts, combined_conf_int/combined_conf_int.max(), '-') plt.ylabel('Model + ensemble uncertainty \n [normalized]') plt.ylim(0,1) plt.subplot(2,1,2) plt.plot_date(all_ts, (1-0.2*combined_conf_int/combined_conf_int.max()), '-', label='Dynamic setpoint') plt.plot_date(all_ts, 0.8*np.ones(len(all_ts)), '--', label='Static setpoint') plt.ylabel('Setpoint for pump massflow \n temperature [fraction of max pump cap]') plt.legend() plt.ylim(.7,1) plt.savefig('figures/setpoint.pdf') return vali_data, fit_data, res, ens_std, vali_resid
fit_ts = ens.gen_hourly_timesteps(dt.datetime(2015, 12, 17, 1), dt.datetime(2016, 1, 15, 0)) vali_ts = ens.gen_hourly_timesteps(dt.datetime(2016, 1, 20, 1), dt.datetime(2016, 2, 5, 0)) test_ts = ens.gen_hourly_timesteps(dt.datetime(2016, 2, 5, 1), dt.datetime(2016, 3, 1, 0)) all_ts = fit_ts + vali_ts + test_ts weathervars = ['Tout', 'vWind', 'sunRad', 'hum'] fit_data = pd.DataFrame() vali_data = pd.DataFrame() test_data = pd.DataFrame() fit_data['prod24h_before'] = sq.fetch_production( fit_ts[0] + dt.timedelta(days=-1), fit_ts[-1] + dt.timedelta(days=-1)) vali_data['prod24h_before'] = sq.fetch_production( vali_ts[0] + dt.timedelta(days=-1), vali_ts[-1] + dt.timedelta(days=-1)) test_data['prod24h_before'] = sq.fetch_production( test_ts[0] + dt.timedelta(days=-1), test_ts[-1] + dt.timedelta(days=-1)) fit_data['prod'] = sq.fetch_production(fit_ts[0], fit_ts[-1]) vali_data['prod'] = sq.fetch_production(vali_ts[0], vali_ts[-1]) test_data['prod'] = sq.fetch_production(test_ts[0], test_ts[-1]) for v in weathervars: fit_data['%s24hdiff'%v] = ens.load_ens_timeseries_as_df(\ ts_start=fit_ts[0],\ ts_end=fit_ts[-1], \ weathervars=[v]).mean(axis=1) \ - ens.load_ens_timeseries_as_df(\ ts_start=fit_ts[0]+dt.timedelta(days=-1),\
def autocorr(x): result = np.correlate(x, x, mode='full') return result[result.size/2:] def autocorr2(x, lag=1): rho = np.corrcoef(x, np.roll(x,lag))[0,1] return rho def my_diff(x, lag=24): return x-np.roll(x,lag) ts = ens.gen_hourly_timesteps(dt.datetime(2013, 1, 1, 1), dt.datetime(2016,1,1,0)) prod = sq.fetch_production(ts[0], ts[-1]) norm_prod = (prod-prod.mean())/prod.std() plt.plot_date(ts, prod, '-') auto_c = autocorr(norm_prod) rho_i = [autocorr2(prod, i) for i in range(2*168)] prod_24h_diff = my_diff(prod) rho2 = [autocorr2(prod_24h_diff, i) for i in range(2*168)] prod_48h_diff = my_diff(prod, 48)
from model_selection import linear_map, mlin_regression, gen_all_combinations, summary_to_file, mae, mape, rmse #%% fit_ts = ens.gen_hourly_timesteps(dt.datetime(2015,12,17,1), dt.datetime(2016,1,15,0)) vali_ts = ens.gen_hourly_timesteps(dt.datetime(2016,1,20,1), dt.datetime(2016,2,5,0)) test_ts = ens.gen_hourly_timesteps(dt.datetime(2016,2,5,1), dt.datetime(2016,3,1,0)) all_ts = fit_ts + vali_ts + test_ts weathervars=['Tout', 'vWind', 'sunRad', 'hum'] fit_data = pd.DataFrame() vali_data = pd.DataFrame() test_data = pd.DataFrame() fit_data['prod24h_before'] = sq.fetch_production(fit_ts[0]+dt.timedelta(days=-1), fit_ts[-1]+dt.timedelta(days=-1)) vali_data['prod24h_before'] = sq.fetch_production(vali_ts[0]+dt.timedelta(days=-1), vali_ts[-1]+dt.timedelta(days=-1)) test_data['prod24h_before'] = sq.fetch_production(test_ts[0]+dt.timedelta(days=-1), test_ts[-1]+dt.timedelta(days=-1)) fit_data['prod'] = sq.fetch_production(fit_ts[0], fit_ts[-1]) vali_data['prod'] = sq.fetch_production(vali_ts[0], vali_ts[-1]) test_data['prod'] = sq.fetch_production(test_ts[0], test_ts[-1]) for v in weathervars: fit_data['%s24hdiff'%v] = ens.load_ens_timeseries_as_df(\ ts_start=fit_ts[0],\ ts_end=fit_ts[-1], \ weathervars=[v]).mean(axis=1) \ - ens.load_ens_timeseries_as_df(\ ts_start=fit_ts[0]+dt.timedelta(days=-1),\ ts_end=fit_ts[-1]+dt.timedelta(days=-1), \ weathervars=[v]).mean(axis=1)
def first_ens_prod_fig(): """ This plot is based on a production model taking into account: Tout, vWind and the production 24 hours before """ plt.close('all') cols = ['Tout', 'vWind', 'prod24h_before'] ts1 = ens.gen_hourly_timesteps(dt.datetime(2015, 12, 17, 1), dt.datetime(2016, 1, 15, 0)) ts2 = ens.gen_hourly_timesteps(dt.datetime(2016, 1, 20, 1), dt.datetime(2016, 1, 28, 0)) #load the data fit_data = ens.repack_ens_mean_as_df() fit_data['prod24h_before'] = sq.fetch_production( dt.datetime(2015, 12, 16, 1), dt.datetime(2016, 1, 14, 0)) vali_data = ens.repack_ens_mean_as_df(dt.datetime(2016, 1, 20, 1), dt.datetime(2016, 1, 28, 0)) vali_data['prod24h_before'] = sq.fetch_production( dt.datetime(2016, 1, 19, 1), dt.datetime(2016, 1, 27, 0)) # do the fit X = fit_data[cols] y = fit_data['prod'] res = mlin_regression(y, X, add_const=True) fig, [ax1, ax2] = plt.subplots(2, 1, figsize=(40, 20)) # load ensemble data ens_data1 = ens.load_ens_timeseries_as_df(ts_start=ts1[0], ts_end=ts1[-1]) ens_data1['prod24h_before'] = fit_data['prod24h_before'] ens_data2 = ens.load_ens_timeseries_as_df(ts_start=ts2[0], ts_end=ts2[-1]) ens_data2['prod24h_before'] = vali_data['prod24h_before'] all_ens_data = pd.concat([ens_data1, ens_data2]) all_ts = ts1 + ts2 # calculate production for each ensemble member ens_prods = np.zeros((len(all_ts), 25)) for i in range(25): ens_cols = ['Tout' + str(i), 'vWind' + str(i), 'prod24h_before'] ens_params = pd.Series({ 'Tout' + str(i): res.params['Tout'], 'vWind' + str(i): res.params['vWind'], 'const': res.params['const'], 'prod24h_before': res.params['prod24h_before'] }) ens_prods[:, i] = linear_map(all_ens_data, ens_params, ens_cols) # calculate combined confint prstd, iv_l, iv_u = wls_prediction_std(res) mean_conf_int_spread = np.mean(res.fittedvalues - iv_l) model_std = np.concatenate( [prstd, (1. / 1.9599) * mean_conf_int_spread * np.ones(len(ts2))]) ens_std = ens_prods.std(axis=1) combined_std = np.sqrt(model_std**2 + ens_std**2) all_prod_model = np.concatenate( [res.fittedvalues, linear_map(vali_data, res.params, cols)]) combined_ub95 = all_prod_model + 1.9599 * combined_std combined_lb95 = all_prod_model - 1.9599 * combined_std # plot confint ax1.fill_between(all_ts, combined_lb95, combined_ub95, label='Combined 95% conf. int.') ax1.fill_between(all_ts, all_prod_model - 1.9599 * ens_std, all_prod_model + 1.9599 * ens_std, facecolor='grey', label='Ensemble 95% conf. int.') # plot ensempble models ax1.plot_date(all_ts, ens_prods, '-', lw=0.5) ax1.plot_date(ts1, y, 'k-', lw=2, label='Actual production') ax1.plot_date(ts1, res.fittedvalues, 'r-', lw=2, label='Model on ensemble mean') ax1.plot_date(ts2, vali_data['prod'], 'k-', lw=2, label='') ax1.plot_date(ts2, linear_map(vali_data, res.params, cols), 'r-', lw=2) ax1.set_ylabel('[MW]') ax1.legend(loc=2) vali_resid = linear_map(vali_data, res.params, cols) - vali_data['prod'] ax2.plot_date(ts1, res.resid, '-', label='Residual, fitted data') ax2.plot_date(ts2, vali_resid, '-', label='Residual, validation data') ax2.set_ylabel('[MW]') ax2.legend(loc=2) print "MAE = " + str(mae(vali_resid)) print "MAPE = " + str(mape(vali_resid, vali_data['prod'])) print "RMSE = " + str(rmse(vali_resid)) print "ME = " + str(np.mean(vali_resid)) print "MAE (fit) = " + str(mae(res.resid)) print "MAPE (fit) = " + str(mape(res.resid, fit_data['prod'])) print "RMSE (fit)= " + str(rmse(res.resid)) print "ME (fit)= " + str(np.mean(res.resid)) plt.savefig('figures/ens_prod_models.pdf', dpi=600) plt.figure() plt.plot_date(all_ts, ens_std) plt.ylabel('Std. of ensemble production models [MW]') plt.savefig('figures/std_ens_prod_models.pdf', dpi=600) sns.jointplot(x=ens_std, y=np.concatenate([res.resid, vali_resid])) return res, all_ens_data, all_ts, fit_data['prod'], vali_data['prod']
def production_model(): # figure 3 plt.close('all') cols = ['prod24h_before', 'Tout24hdiff', 'vWind24hdiff', 'sunRad24hdiff'] ts1 = ens.gen_hourly_timesteps(dt.datetime(2015, 12, 17, 1), dt.datetime(2016, 1, 15, 0)) ts2 = ens.gen_hourly_timesteps(dt.datetime(2016, 1, 20, 1), dt.datetime(2016, 2, 5, 0)) #load the data fit_data = ens.repack_ens_mean_as_df() fit_data['prod24h_before'] = sq.fetch_production( ts1[0] + dt.timedelta(days=-1), ts1[-1] + dt.timedelta(days=-1)) fit_data['Tout24hdiff'] = fit_data['Tout'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts1[0]+dt.timedelta(days=-1),\ ts_end=ts1[-1]+dt.timedelta(days=-1), \ weathervars=['Tout']).mean(axis=1) fit_data['vWind24hdiff'] = fit_data['vWind'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts1[0]+dt.timedelta(days=-1),\ ts_end=ts1[-1]+dt.timedelta(days=-1), \ weathervars=['vWind']).mean(axis=1) fit_data['sunRad24hdiff'] = fit_data['sunRad'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts1[0]+dt.timedelta(days=-1),\ ts_end=ts1[-1]+dt.timedelta(days=-1), \ weathervars=['sunRad']).mean(axis=1) vali_data = ens.repack_ens_mean_as_df(ts2[0], ts2[-1]) vali_data['prod24h_before'] = sq.fetch_production( ts2[0] + dt.timedelta(days=-1), ts2[-1] + dt.timedelta(days=-1)) vali_data['Tout24hdiff'] = vali_data['Tout'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts2[0]+dt.timedelta(days=-1),\ ts_end=ts2[-1]+dt.timedelta(days=-1), \ weathervars=['Tout']).mean(axis=1) vali_data['vWind24hdiff'] = vali_data['vWind'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts2[0]+dt.timedelta(days=-1),\ ts_end=ts2[-1]+dt.timedelta(days=-1), \ weathervars=['vWind']).mean(axis=1) vali_data['sunRad24hdiff'] = vali_data['sunRad'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts2[0]+dt.timedelta(days=-1),\ ts_end=ts2[-1]+dt.timedelta(days=-1), \ weathervars=['sunRad']).mean(axis=1) # correct error in production: new_val = (vali_data['prod'][116] + vali_data['prod'][116]) / 2 vali_data['prod'][116] = new_val vali_data['prod'][117] = new_val vali_data['prod24h_before'][116 + 24] = new_val vali_data['prod24h_before'][117 + 24] = new_val # do the fit X = fit_data[cols] y = fit_data['prod'] res = mlin_regression(y, X, add_const=False) fig, [ax1, ax2] = plt.subplots(2, 1, sharex=True, figsize=(dcolwidth, 0.57 * dcolwidth), gridspec_kw={'height_ratios': [4, 1]}) # load ensemble data ens_data1 = ens.load_ens_timeseries_as_df(ts_start=ts1[0], ts_end=ts1[-1],\ weathervars=['Tout', 'vWind', 'sunRad']) ens_data1['prod24h_before'] = fit_data['prod24h_before'] ens_data1_24h_before = ens.load_ens_timeseries_as_df(\ ts_start=ts1[0]+dt.timedelta(days=-1),\ ts_end=ts1[-1]+dt.timedelta(days=-1), \ weathervars=['Tout', 'vWind', 'sunRad']) ens_data2 = ens.load_ens_timeseries_as_df(ts_start=ts2[0], ts_end=ts2[-1],\ weathervars=['Tout', 'vWind', 'sunRad']) ens_data2['prod24h_before'] = vali_data['prod24h_before'] ens_data2_24h_before = ens.load_ens_timeseries_as_df(\ ts_start=ts2[0]+dt.timedelta(days=-1),\ ts_end=ts2[-1]+dt.timedelta(days=-1), \ weathervars=['Tout', 'vWind', 'sunRad']) for i in range(25): for v in ['Tout', 'vWind', 'sunRad']: key_raw = v + str(i) key_diff = v + '24hdiff' + str(i) ens_data1[ key_diff] = ens_data1[key_raw] - ens_data1_24h_before[key_raw] ens_data2[ key_diff] = ens_data2[key_raw] - ens_data2_24h_before[key_raw] all_ens_data = pd.concat([ens_data1, ens_data2]) all_ts = ts1 + ts2 # # # calculate production for each ensemble member ens_prods = np.zeros((len(all_ts), 25)) for i in range(25): ens_cols = ['Tout24hdiff' + str(i), 'vWind24hdiff' + str(i),\ 'sunRad24hdiff' + str(i), 'prod24h_before'] ens_params = pd.Series({ 'Tout24hdiff' + str(i): res.params['Tout24hdiff'], 'vWind24hdiff' + str(i): res.params['vWind24hdiff'], 'sunRad24hdiff' + str(i): res.params['sunRad24hdiff'], 'prod24h_before': res.params['prod24h_before'] }) ens_prods[:, i] = linear_map(all_ens_data, ens_params, ens_cols) # calculate combined confint ens_std = ens_prods.std(axis=1) vali_resid = linear_map(vali_data, res.params, cols) - vali_data['prod'] vali_resid_corrig = vali_resid - np.sign( vali_resid) * 1.9599 * ens_std[len(ts1):] #mean_conf_int_spread = (vali_resid_corrig.quantile(0.95) - vali_resid_corrig.quantile(0.05))/2 # this conf_int is not used anymore fit_resid = res.resid fit_resid_corrig = fit_resid - np.sign( fit_resid) * 1.9599 * ens_std[0:len(ts1)] conf_int_spread_lower = -fit_resid_corrig.quantile(0.025) conf_int_spread_higher = fit_resid_corrig.quantile(0.975) combined_conf_ints = conf_int_spread_lower + conf_int_spread_higher + 2 * 1.9599 * ens_std all_prod_model = np.concatenate( [res.fittedvalues, linear_map(vali_data, res.params, cols)]) combined_ub95 = all_prod_model + conf_int_spread_higher + 1.9599 * ens_std combined_lb95 = all_prod_model - (conf_int_spread_lower + 1.9599 * ens_std) # plot confint ax1.fill_between(all_ts[len(ts1):], combined_lb95[len(ts1):], combined_ub95[len(ts1):], label='95% prediction intervals') ax1.fill_between(all_ts[len(ts1):], all_prod_model[len(ts1):] - 1.9599 * ens_std[len(ts1):], all_prod_model[len(ts1):] + 1.9599 * ens_std[len(ts1):], facecolor='grey', label='Weather ensemble 95% conf. int.') # plot ensempble models ax1.plot_date(all_ts[len(ts1):], ens_prods[len(ts1):], '-', lw=0.5) ax1.plot_date(ts2, vali_data['prod'], 'k-', lw=2, label='Historical production') ax1.plot_date(ts2, linear_map(vali_data, res.params, cols), '-', c=red, lw=2, label='Production model') ax1.set_ylabel('Production [MW]', size=8) ax1.tick_params(axis='both', which='major', labelsize=8) ax1.xaxis.set_major_formatter(DateFormatter('%b %d')) ax1.legend(loc=1, prop={'size': 8}) ax1.set_ylim([300, 1100]) N = conf_int_spread_higher + 1.9599 * ens_std[len(ts1):].max() ax2.fill_between(ts2, -(1.9599 * ens_std[len(ts1):] + conf_int_spread_lower) / N, -1.9599 * ens_std[len(ts1):] / N, alpha=0.5) ax2.fill_between(ts2, -1.9599 * ens_std[len(ts1):] / N, np.zeros(len(ts2)), facecolor='grey', alpha=0.5) ax2.fill_between(ts2, 1.9599 * ens_std[len(ts1):] / N, facecolor='grey') ax2.fill_between(ts2, 1.9599 * ens_std[len(ts1):] / N, (conf_int_spread_higher + 1.9599 * ens_std[len(ts1):]) / N) ax2.set_ylabel('Prediction intervals \n[normalized]', size=8) ax2.tick_params(axis='y', which='major', labelsize=8) ax2.set_xlim(dt.datetime(2016, 1, 20, 0), dt.datetime(2016, 2, 5, 0)) fig.tight_layout() print "Min_normalized pos conf bound. ", np.min(1.9599 * ens_std[len(ts1):] / N + conf_int_spread_higher / N) print "MAE = " + str(mae(vali_resid)) print "MAPE = " + str(mape(vali_resid, vali_data['prod'])) print "RMSE = " + str(rmse(vali_resid)) print "ME = " + str(np.mean(vali_resid)) print "MAE (fit) = " + str(mae(res.resid)) print "MAPE (fit) = " + str(mape(res.resid, fit_data['prod'])) print "RMSE (fit)= " + str(rmse(res.resid)) print "ME (fit)= " + str(np.mean(res.resid)) print "Width of const blue bands (MW)", conf_int_spread_lower, conf_int_spread_higher plt.savefig('Q:/Projekter/Ens Article 1/figures/production_model.pdf', dpi=400) EO3_fc1 = sq.fetch_EO3_midnight_forecast(ts1[0], ts1[-1]) EO3_fc2 = sq.fetch_EO3_midnight_forecast(ts2[0], ts2[-1]) EO3_err = EO3_fc2 - vali_data['prod'] EO3_err_fit = EO3_fc1 - fit_data['prod'] print "MAE (EO3) = " + str(mae(EO3_err)) print "MAPE (EO3) = " + str(mape(EO3_err, vali_data['prod'])) print "RMSE (EO3)= " + str(rmse(EO3_err)) print "ME (EO3)= " + str(np.mean(EO3_err)) print "MAE (EO3_fit) = " + str(mae(EO3_err_fit)) print "MAPE (EO3_fit) = " + str(mape(EO3_err_fit, fit_data['prod'])) print "RMSE (EO3_fit)= " + str(rmse(EO3_err_fit)) print "ME (EO3_fit)= " + str(np.mean(EO3_err_fit)) print np.min(combined_conf_ints[len(ts1):] / combined_conf_ints.max()) np.savez('combined_conf_int', combined_conf_int=(conf_int_spread_higher + 1.9599 * ens_std), timesteps=all_ts) print "Corr coeff: vali ", np.corrcoef( vali_data['prod'], linear_map(vali_data, res.params, cols))[0, 1] print "Corr coeff: vali EO3 ", np.corrcoef(vali_data['prod'], EO3_fc2)[0, 1] print "Corr coeff: fit ", np.corrcoef(fit_data['prod'], res.fittedvalues)[0, 1] print "Corr coeff: fit EO3 ", np.corrcoef(fit_data['prod'], EO3_fc1)[0, 1] print "% of actual production in vali period above upper", float( len( np.where(vali_data['prod'] > (conf_int_spread_higher + 1.9599 * ens_std[len(ts1):] + linear_map(vali_data, res.params, cols)))[0])) / len(ts2) print "plus minus: ", 0.5 / len(ts2) print "% of actual production in vali period below lower", float( len( np.where(vali_data['prod'] < (linear_map(vali_data, res.params, cols) - (conf_int_spread_lower + 1.9599 * ens_std[len(ts1):]))) [0])) / len(ts2) print "plus minus: ", 0.5 / len(ts2) return res, fit_data
def second_ens_prod_fig(): """ This plot is based on a production model taking into account: the production 24 hours before as well as the change in temparature, windspeed and solar radiotion from 24 hours ago to now. """ plt.close('all') cols = ['prod24h_before', 'Tout24hdiff', 'vWind24hdiff', 'sunRad24hdiff'] ts1 = ens.gen_hourly_timesteps(dt.datetime(2015, 12, 17, 1), dt.datetime(2016, 1, 15, 0)) ts2 = ens.gen_hourly_timesteps(dt.datetime(2016, 1, 20, 1), dt.datetime(2016, 2, 5, 0)) #load the data fit_data = ens.repack_ens_mean_as_df() fit_data['prod24h_before'] = sq.fetch_production( ts1[0] + dt.timedelta(days=-1), ts1[-1] + dt.timedelta(days=-1)) fit_data['Tout24hdiff'] = fit_data['Tout'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts1[0]+dt.timedelta(days=-1),\ ts_end=ts1[-1]+dt.timedelta(days=-1), \ weathervars=['Tout']).mean(axis=1) fit_data['vWind24hdiff'] = fit_data['vWind'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts1[0]+dt.timedelta(days=-1),\ ts_end=ts1[-1]+dt.timedelta(days=-1), \ weathervars=['vWind']).mean(axis=1) fit_data['sunRad24hdiff'] = fit_data['sunRad'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts1[0]+dt.timedelta(days=-1),\ ts_end=ts1[-1]+dt.timedelta(days=-1), \ weathervars=['sunRad']).mean(axis=1) vali_data = ens.repack_ens_mean_as_df(ts2[0], ts2[-1]) vali_data['prod24h_before'] = sq.fetch_production( ts2[0] + dt.timedelta(days=-1), ts2[-1] + dt.timedelta(days=-1)) vali_data['Tout24hdiff'] = vali_data['Tout'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts2[0]+dt.timedelta(days=-1),\ ts_end=ts2[-1]+dt.timedelta(days=-1), \ weathervars=['Tout']).mean(axis=1) vali_data['vWind24hdiff'] = vali_data['vWind'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts2[0]+dt.timedelta(days=-1),\ ts_end=ts2[-1]+dt.timedelta(days=-1), \ weathervars=['vWind']).mean(axis=1) vali_data['sunRad24hdiff'] = vali_data['sunRad'] \ - ens.load_ens_timeseries_as_df(\ ts_start=ts2[0]+dt.timedelta(days=-1),\ ts_end=ts2[-1]+dt.timedelta(days=-1), \ weathervars=['sunRad']).mean(axis=1) # correct error in production: new_val = (vali_data['prod'][116] + vali_data['prod'][116]) / 2 vali_data['prod'][116] = new_val vali_data['prod'][117] = new_val vali_data['prod24h_before'][116 + 24] = new_val vali_data['prod24h_before'][117 + 24] = new_val # do the fit X = fit_data[cols] y = fit_data['prod'] res = mlin_regression(y, X, add_const=False) fig, [ax1, ax2] = plt.subplots(2, 1, figsize=(40, 20)) # load ensemble data ens_data1 = ens.load_ens_timeseries_as_df(ts_start=ts1[0], ts_end=ts1[-1],\ weathervars=['Tout', 'vWind', 'sunRad']) ens_data1['prod24h_before'] = fit_data['prod24h_before'] ens_data1_24h_before = ens.load_ens_timeseries_as_df(\ ts_start=ts1[0]+dt.timedelta(days=-1),\ ts_end=ts1[-1]+dt.timedelta(days=-1), \ weathervars=['Tout', 'vWind', 'sunRad']) ens_data2 = ens.load_ens_timeseries_as_df(ts_start=ts2[0], ts_end=ts2[-1],\ weathervars=['Tout', 'vWind', 'sunRad']) ens_data2['prod24h_before'] = vali_data['prod24h_before'] ens_data2_24h_before = ens.load_ens_timeseries_as_df(\ ts_start=ts2[0]+dt.timedelta(days=-1),\ ts_end=ts2[-1]+dt.timedelta(days=-1), \ weathervars=['Tout', 'vWind', 'sunRad']) for i in range(25): for v in ['Tout', 'vWind', 'sunRad']: key_raw = v + str(i) key_diff = v + '24hdiff' + str(i) ens_data1[ key_diff] = ens_data1[key_raw] - ens_data1_24h_before[key_raw] ens_data2[ key_diff] = ens_data2[key_raw] - ens_data2_24h_before[key_raw] all_ens_data = pd.concat([ens_data1, ens_data2]) all_ts = ts1 + ts2 # # # calculate production for each ensemble member ens_prods = np.zeros((len(all_ts), 25)) for i in range(25): ens_cols = ['Tout24hdiff' + str(i), 'vWind24hdiff' + str(i),\ 'sunRad24hdiff' + str(i), 'prod24h_before'] ens_params = pd.Series({ 'Tout24hdiff' + str(i): res.params['Tout24hdiff'], 'vWind24hdiff' + str(i): res.params['vWind24hdiff'], 'sunRad24hdiff' + str(i): res.params['sunRad24hdiff'], 'prod24h_before': res.params['prod24h_before'] }) ens_prods[:, i] = linear_map(all_ens_data, ens_params, ens_cols) # calculate combined confint ens_std = ens_prods.std(axis=1) vali_resid = linear_map(vali_data, res.params, cols) - vali_data['prod'] vali_resid_corrig = vali_resid - np.sign( vali_resid) * 1.9599 * ens_std[len(ts1):] mean_conf_int_spread = (vali_resid_corrig.quantile(0.95) - vali_resid_corrig.quantile(0.05)) / 2 combined_conf_int = mean_conf_int_spread + 1.9599 * ens_std all_prod_model = np.concatenate( [res.fittedvalues, linear_map(vali_data, res.params, cols)]) combined_ub95 = all_prod_model + combined_conf_int combined_lb95 = all_prod_model - combined_conf_int # plot confint ax1.fill_between(all_ts, combined_lb95, combined_ub95, label='Combined 95% conf. int.') ax1.fill_between(all_ts, all_prod_model - 1.9599 * ens_std, all_prod_model + 1.9599 * ens_std, facecolor='grey', label='Ensemble 95% conf. int.') # plot ensempble models ax1.plot_date(all_ts, ens_prods, '-', lw=0.5) ax1.plot_date(ts1, y, 'k-', lw=2, label='Actual production') ax1.plot_date(ts1, res.fittedvalues, 'r-', lw=2, label='Model on ensemble mean') ax1.plot_date(ts2, vali_data['prod'], 'k-', lw=2, label='') ax1.plot_date(ts2, linear_map(vali_data, res.params, cols), 'r-', lw=2) ax1.set_ylabel('[MW]') ax1.legend(loc=2) ax1.set_ylim([0, 1100]) ax2.plot_date(ts1, res.resid, '-', label='Residual, fitted data') ax2.plot_date(ts2, vali_resid, '-', label='Residual, validation data') ax2.set_ylabel('[MW]') ax2.legend(loc=2) ax2.set_ylim([-550, 550]) print "MAE = " + str(mae(vali_resid)) print "MAPE = " + str(mape(vali_resid, vali_data['prod'])) print "RMSE = " + str(rmse(vali_resid)) print "ME = " + str(np.mean(vali_resid)) print "MAE (fit) = " + str(mae(res.resid)) print "MAPE (fit) = " + str(mape(res.resid, fit_data['prod'])) print "RMSE (fit)= " + str(rmse(res.resid)) print "ME (fit)= " + str(np.mean(res.resid)) plt.savefig('figures/ens_prod_models_v2.pdf', dpi=600) plt.figure() plt.plot_date(all_ts, ens_std) plt.ylabel('Std. of ensemble production models [MW]') plt.savefig('figures/std_ens_prod_models.pdf', dpi=600) # vali_ens_std = ens_std[len(ts1):] sns.jointplot(x=pd.Series(vali_ens_std), y=np.abs(vali_resid)) sns.jointplot(x=vali_data['prod'], y=pd.Series(linear_map(vali_data, res.params, cols))) EO3_fc1 = sq.fetch_EO3_midnight_forecast(ts1[0], ts1[-1]) EO3_fc2 = sq.fetch_EO3_midnight_forecast(ts2[0], ts2[-1]) plt.figure() plt.plot_date(ts1, fit_data['prod'], 'k-', label='Actual production') plt.plot_date(ts2, vali_data['prod'], 'k-') plt.plot_date(ts1, EO3_fc1, 'r-', label='EO3 forecast') plt.plot_date(ts2, EO3_fc2, 'r-') EO3_err = EO3_fc2 - vali_data['prod'] EO3_err_fit = EO3_fc1 - fit_data['prod'] print "MAE (EO3) = " + str(mae(EO3_err)) print "MAPE (EO3) = " + str(mape(EO3_err, vali_data['prod'])) print "RMSE (EO3)= " + str(rmse(EO3_err)) print "ME (EO3)= " + str(np.mean(EO3_err)) print "MAE (EO3_fit) = " + str(mae(EO3_err_fit)) print "MAPE (EO3_fit) = " + str(mape(EO3_err_fit, fit_data['prod'])) print "RMSE (EO3_fit)= " + str(rmse(EO3_err_fit)) print "ME (EO3_fit)= " + str(np.mean(EO3_err_fit)) sns.jointplot(x=pd.Series(vali_ens_std), y=np.abs(EO3_err)) plt.figure(figsize=(20, 10)) plt.subplot(2, 1, 1) plt.plot_date(all_ts, combined_conf_int / combined_conf_int.max(), '-') plt.ylabel('Model + ensemble uncertainty \n [normalized]') plt.ylim(0, 1) plt.subplot(2, 1, 2) plt.plot_date(all_ts, (1 - 0.2 * combined_conf_int / combined_conf_int.max()), '-', label='Dynamic setpoint') plt.plot_date(all_ts, 0.8 * np.ones(len(all_ts)), '--', label='Static setpoint') plt.ylabel( 'Setpoint for pump massflow \n temperature [fraction of max pump cap]') plt.legend() plt.ylim(.7, 1) plt.savefig('figures/setpoint.pdf') return vali_data, fit_data, res, ens_std, vali_resid
#%% SVR experinment ts = ens.gen_hourly_timesteps(dt.datetime(2016,1,26,1), dt.datetime(2016,4,1,0)) X = pd.read_pickle('48h60h168h_lagged_X.pkl') # run model_selection_ext_horizon to generate these files y = pd.read_pickle('prod_to_gowith.pkl') # add more predictor data: for v in ['Tout', 'vWind', 'hum', 'sunRad']: X[v] = ens.load_ens_mean_avail_at10_series(v, ts[0], ts[-1], pointcode=71699) #X['weekdays'] = [t.weekday() for t in ts] def h_hoursbefore(timestamp, h): return timestamp + dt.timedelta(hours=-h) most_recent_avail_prod = sq.fetch_production(h_hoursbefore(ts[0], 24),\ h_hoursbefore(ts[-1], 24)) for i, t, p48 in zip(range(len(most_recent_avail_prod)), ts, X['prod48hbefore']): if t.hour > 8 or t.hour == 0: most_recent_avail_prod[i] = p48 X['prod24or48hbefore'] = most_recent_avail_prod ## X_scaler = StandardScaler(copy=True, with_mean=True, with_std=True).fit(X) X_scaled = X_scaler.transform(X) y_scaler = StandardScaler(copy=True, with_mean=True, with_std=True).fit(y) y_scaled = y_scaler.transform(y) #%%
elif correct_signs[var]*res.params[var] < 0: return False, var if np.abs(res.params['prod24h_before']-1) > 0.05: print "WARNING: prod24h_before is weighted with: " + str(res.params['prod24h_before']) if res.resid.mean()>5: print "WARNING: Bias in model: " + res.resid.mean() return True, None ts_start = dt.datetime(2015, 10, 17, 1) ts_end = dt.datetime(2016,1,16,0) timesteps = gen_hourly_timesteps(ts_start, ts_end) df = pd.DataFrame() df['prod'] = sq.fetch_production(ts_start, ts_end) df['prod24h_before'] = sq.fetch_production(ts_start + dt.timedelta(days=-1), \ ts_end + dt.timedelta(days=-1)) for v in ['Tout', 'vWind', 'sunRad', 'hum']: df[v] = sq.fetch_BrabrandSydWeather(v, ts_start, ts_end) df[v + '24h_before'] = sq.fetch_BrabrandSydWeather(v, ts_start + dt.timedelta(days=-1), \ ts_end + dt.timedelta(days=-1)) df[v + '24hdiff'] = df[v] - df[v + '24h_before'] cols = ['Tout24hdiff', 'vWind24hdiff', 'prod24h_before', 'sunRad24hdiff', 'hum24hdiff'] good_fit = False while not good_fit: X = df[cols] res = mlin_regression(df['prod'], X, add_const=False) print res.summary()
import sql_tools as sq from itertools import combinations import statsmodels.api as sm from statsmodels.graphics.gofplots import qqplot import datetime as dt from statsmodels.sandbox.regression.predstd import wls_prediction_std import numpy as np import matplotlib.pyplot as plt import pandas as pd all_data = ens.repack_ens_mean_as_df() hours = [np.mod(h, 24) for h in range(1, 697)] all_data['prod24h_before'] = sq.fetch_production(dt.datetime(2015, 12, 16, 1), dt.datetime(2016, 1, 14, 0)) all_data['(Tout-17)*vWind*hum'] = all_data['(Tout-17)*vWind'] * all_data['hum'] all_data['(Toutavg24-17)*vWindavg24*humavg24'] = all_data[ '(Toutavg-17)*vWindavg24'] * all_data['humavg24'] all_data['Tout24hdiff'] = all_data['Tout'] - np.roll(all_data['Tout'], 24) Tout24h_before = ens.load_ens_timeseries_as_df(ts_start=dt.datetime(2015,12,16,1),\ ts_end=dt.datetime(2016,1,14,0), weathervars=['Tout']).mean(axis=1) vWind24h_before = ens.load_ens_timeseries_as_df(ts_start=dt.datetime(2015,12,16,1),\ ts_end=dt.datetime(2016,1,14,0), weathervars=['vWind']).mean(axis=1) sunRad24h_before = ens.load_ens_timeseries_as_df(ts_start=dt.datetime(2015,12,16,1),\ ts_end=dt.datetime(2016,1,14,0), weathervars=['sunRad']).mean(axis=1) hum24h_before = ens.load_ens_timeseries_as_df(ts_start=dt.datetime(2015,12,16,1),\ ts_end=dt.datetime(2016,1,14,0), weathervars=['hum']).mean(axis=1) all_data['Tout24hdiff'] = all_data['Tout'] - Tout24h_before all_data['vWind24hdiff'] = all_data['vWind'] - vWind24h_before