def weather_forecast_ensemble():  # figure 2
    plt.close('all')
    ts = ens.gen_hourly_timesteps(dt.datetime(2016, 1, 20, 1),
                                  dt.datetime(2016, 2, 5, 0))
    ens_data = ens.load_ens_timeseries_as_df(ts_start=ts[0], ts_end=ts[-1],\
                                             weathervars=['Tout', 'vWind', 'sunRad'])
    fig, axes = plt.subplots(3,
                             1,
                             sharex=True,
                             figsize=(colwidth, 1.65 * colwidth))
    plt.xticks(size=5)

    ylabels = [
        u'Outside temperature [%sC]' % uni_degree, 'Wind speed [m/s]',
        u'Solar irradiance [W/m%s]' % uni_squared
    ]

    for ax, v, cshift, ylab in zip(axes, ['Tout', 'vWind', 'sunRad'],
                                   (15, 23, 6), ylabels):
        color_list = plt.cm.Dark2(np.roll(np.linspace(0, 1, 25), cshift))
        ax.set_prop_cycle(cycler('color', color_list))
        v_ens_data = ens_data[[v + str(i) for i in range(25)]]
        ax.plot_date(ts, v_ens_data, '-', lw=0.5)
        ax.set_ylabel(ylab, size=8)
        ax.tick_params(axis='y', which='major', labelsize=8)
        plt.box(True)
    plt.tight_layout()
    axes[-1].xaxis.set_major_formatter(DateFormatter('%b %d'))
    axes[-1].set_xlim(dt.datetime(2016, 1, 20, 0), dt.datetime(2016, 2, 5, 0))
    fig.savefig('figures/first_articlefigs/weather_forecast_ensemble.pdf')
    return ens_data, axes
def validate_prod24h_before_and_diffsmodel():
    plt.close('all')

    cols = ['prod24h_before', 'Tout24hdiff', 'vWind24hdiff', 'sunRad24hdiff']
    ts_start = dt.datetime(2016, 1, 20, 1)
    ts_end = dt.datetime(2016, 1, 31, 0)

    validation_data = ens.repack_ens_mean_as_df(ts_start, ts_end)

    # correct error in production:
    new_val = (validation_data['prod'][116] + validation_data['prod'][116]) / 2
    validation_data['prod'][116] = new_val
    validation_data['prod'][117] = new_val
    validation_data['prod24h_before'] = sq.fetch_production(
        ts_start + dt.timedelta(days=-1), ts_end + dt.timedelta(days=-1))
    validation_data['prod24h_before'][116 + 24] = new_val
    validation_data['prod24h_before'][117 + 24] = new_val
    Tout24h_before = ens.load_ens_timeseries_as_df(ts_start+dt.timedelta(days=-1),\
                         ts_end+dt.timedelta(days=-1), weathervars=['Tout']).mean(axis=1)
    vWind24h_before = ens.load_ens_timeseries_as_df(ts_start+dt.timedelta(days=-1),\
                         ts_end+dt.timedelta(days=-1), weathervars=['vWind']).mean(axis=1)
    sunRad24h_before = ens.load_ens_timeseries_as_df(ts_start+dt.timedelta(days=-1),\
                         ts_end+dt.timedelta(days=-1), weathervars=['sunRad']).mean(axis=1)
    validation_data['Tout24hdiff'] = validation_data['Tout'] - Tout24h_before
    validation_data[
        'vWind24hdiff'] = validation_data['vWind'] - vWind24h_before
    validation_data[
        'sunRad24hdiff'] = validation_data['sunRad'] - sunRad24h_before

    # fit on fit area
    X = all_data[cols]
    res = mlin_regression(all_data['prod'], X, add_const=False)

    #apply to validation area
    weather_model = linear_map(validation_data, res.params, cols)
    timesteps = ens.gen_hourly_timesteps(ts_start, ts_end)

    plt.plot_date(timesteps, validation_data['prod'], 'b-')
    plt.plot_date(timesteps, weather_model, 'r-')
    residual = weather_model - validation_data['prod']

    return validation_data, res, residual
def validate_prod24h_before_and_diffsmodel():
    plt.close('all')
    
    cols = ['prod24h_before', 'Tout24hdiff', 'vWind24hdiff', 'sunRad24hdiff']
    ts_start = dt.datetime(2016,1,20,1)
    ts_end = dt.datetime(2016,1,31,0)
    
    validation_data = ens.repack_ens_mean_as_df(ts_start, ts_end)
    
    # correct error in production:
    new_val = (validation_data['prod'][116] +validation_data['prod'][116])/2
    validation_data['prod'][116] = new_val
    validation_data['prod'][117] = new_val
    validation_data['prod24h_before'] = sq.fetch_production(ts_start+dt.timedelta(days=-1), ts_end+dt.timedelta(days=-1))
    validation_data['prod24h_before'][116+24] = new_val
    validation_data['prod24h_before'][117+24] = new_val
    Tout24h_before = ens.load_ens_timeseries_as_df(ts_start+dt.timedelta(days=-1),\
                         ts_end+dt.timedelta(days=-1), weathervars=['Tout']).mean(axis=1)
    vWind24h_before = ens.load_ens_timeseries_as_df(ts_start+dt.timedelta(days=-1),\
                         ts_end+dt.timedelta(days=-1), weathervars=['vWind']).mean(axis=1)
    sunRad24h_before = ens.load_ens_timeseries_as_df(ts_start+dt.timedelta(days=-1),\
                         ts_end+dt.timedelta(days=-1), weathervars=['sunRad']).mean(axis=1)    
    validation_data['Tout24hdiff'] = validation_data['Tout'] - Tout24h_before
    validation_data['vWind24hdiff'] = validation_data['vWind'] - vWind24h_before
    validation_data['sunRad24hdiff'] = validation_data['sunRad'] - sunRad24h_before
    
    # fit on fit area
    X = all_data[cols]
    res = mlin_regression(all_data['prod'], X, add_const=False)
    
    #apply to validation area
    weather_model = linear_map(validation_data, res.params, cols)
    timesteps = ens.gen_hourly_timesteps(ts_start, ts_end)
    
    plt.plot_date(timesteps, validation_data['prod'],'b-')
    plt.plot_date(timesteps, weather_model,'r-')
    residual = weather_model - validation_data['prod']
    
    return validation_data, res, residual
def load_cons_model_ens_dfs(df):
    fit_ts = ens.gen_hourly_timesteps(dt.datetime(2015,12,17,1), dt.datetime(2016,1,15,0))
    vali_ts = ens.gen_hourly_timesteps(dt.datetime(2016,1,20,1), dt.datetime(2016,2,5,0))
    test_ts = ens.gen_hourly_timesteps(dt.datetime(2016,2,5,1), dt.datetime(2016,3,1,0))

    
    weathervars=['Tout', 'vWind', 'sunRad', 'hum']
    
    fit_data = [pd.DataFrame() for i in range(25)]
    vali_data = [pd.DataFrame() for i in range(25)]          
    test_data = [pd.DataFrame() for i in range(25)]
    
    for i in range(25):
        fit_data[i]['cons'] = np.array(df.ix[fit_ts[0]:fit_ts[-1]]['cons'])
        vali_data[i]['cons'] = np.array(df.ix[vali_ts[0]:vali_ts[-1]]['cons']) # the casting is a hack to avoid the index f*****g up
        test_data[i]['cons'] = np.array(df.ix[test_ts[0]:test_ts[-1]]['cons'])

        fit_data[i]['cons24hbefore'] = np.array(df.ix[fit_ts[0]+dt.timedelta(days=-1):fit_ts[-1]+dt.timedelta(days=-1)]['cons']) 
        vali_data[i]['cons24hbefore'] = np.array(df.ix[vali_ts[0]+dt.timedelta(days=-1):vali_ts[-1]+dt.timedelta(days=-1)]['cons']) # the casting is a hack to avoid the index f*****g up
        test_data[i]['cons24hbefore'] = np.array(df.ix[test_ts[0]+dt.timedelta(days=-1):test_ts[-1]+dt.timedelta(days=-1)]['cons'])

    for v in weathervars:
        all_ens_fit = ens.load_ens_timeseries_as_df(\
                                    ts_start=fit_ts[0],\
                                    ts_end=fit_ts[-1], \
                                    weathervars=[v]) \
                                  - ens.load_ens_timeseries_as_df(\
                                    ts_start=fit_ts[0]+dt.timedelta(days=-1),\
                                    ts_end=fit_ts[-1]+dt.timedelta(days=-1), \
                                    weathervars=[v])
        all_ens_vali = ens.load_ens_timeseries_as_df(\
                                    ts_start=vali_ts[0],\
                                    ts_end=vali_ts[-1], \
                                    weathervars=[v]) \
                                  - ens.load_ens_timeseries_as_df(\
                                    ts_start=vali_ts[0]+dt.timedelta(days=-1),\
                                    ts_end=vali_ts[-1]+dt.timedelta(days=-1), \
                                    weathervars=[v])
        all_ens_test = ens.load_ens_timeseries_as_df(\
                                    ts_start=test_ts[0],\
                                    ts_end=test_ts[-1], \
                                    weathervars=[v]) \
                                  - ens.load_ens_timeseries_as_df(\
                                    ts_start=test_ts[0]+dt.timedelta(days=-1),\
                                    ts_end=test_ts[-1]+dt.timedelta(days=-1), \
                                    weathervars=[v])
                                    
        for i in range(25):
            fit_data[i]['%s24hdiff'%v] = all_ens_fit[v + str(i)]
            vali_data[i]['%s24hdiff'%v] = all_ens_vali[v + str(i)]
            test_data[i]['%s24hdiff'%v] = all_ens_test[v + str(i)]
    
    all_data = []
    for i in range(25):
        for d, t in zip([fit_data[i], vali_data[i], test_data[i]], [fit_ts, vali_ts, test_ts]):
            d.set_index(pd.DatetimeIndex(t), inplace=True)
        all_data.append(pd.concat([fit_data[i], vali_data[i], test_data[i]]))
        
    return all_data
def load_cons_model_dfs(df):
    # Takes the data frame with the already calculated consumptions
        #%%
    fit_ts = ens.gen_hourly_timesteps(dt.datetime(2015,12,17,1), dt.datetime(2016,1,15,0))
    vali_ts = ens.gen_hourly_timesteps(dt.datetime(2016,1,20,1), dt.datetime(2016,2,5,0))
    test_ts = ens.gen_hourly_timesteps(dt.datetime(2016,2,5,1), dt.datetime(2016,3,1,0))
    
    weathervars=['Tout', 'vWind', 'sunRad', 'hum']
    
    fit_data = pd.DataFrame()
    vali_data = pd.DataFrame()            
    test_data = pd.DataFrame()
    
    fit_data['cons'] = np.array(df.ix[fit_ts[0]:fit_ts[-1]]['cons'])
    vali_data['cons'] = np.array(df.ix[vali_ts[0]:vali_ts[-1]]['cons']) # the casting is a hack to avoid the index f*****g up
    test_data['cons'] = np.array(df.ix[test_ts[0]:test_ts[-1]]['cons'])

    fit_data['cons24hbefore'] = np.array(df.ix[fit_ts[0]+dt.timedelta(days=-1):fit_ts[-1]+dt.timedelta(days=-1)]['cons']) 
    vali_data['cons24hbefore'] = np.array(df.ix[vali_ts[0]+dt.timedelta(days=-1):vali_ts[-1]+dt.timedelta(days=-1)]['cons']) # the casting is a hack to avoid the index f*****g up
    test_data['cons24hbefore'] = np.array(df.ix[test_ts[0]+dt.timedelta(days=-1):test_ts[-1]+dt.timedelta(days=-1)]['cons'])

    for v in weathervars:
        fit_data['%s24hdiff'%v] = ens.load_ens_timeseries_as_df(\
                                    ts_start=fit_ts[0],\
                                    ts_end=fit_ts[-1], \
                                    weathervars=[v]).mean(axis=1) \
                                  - ens.load_ens_timeseries_as_df(\
                                    ts_start=fit_ts[0]+dt.timedelta(days=-1),\
                                    ts_end=fit_ts[-1]+dt.timedelta(days=-1), \
                                    weathervars=[v]).mean(axis=1)
        vali_data['%s24hdiff'%v] = ens.load_ens_timeseries_as_df(\
                                    ts_start=vali_ts[0],\
                                    ts_end=vali_ts[-1], \
                                    weathervars=[v]).mean(axis=1) \
                                  - ens.load_ens_timeseries_as_df(\
                                    ts_start=vali_ts[0]+dt.timedelta(days=-1),\
                                    ts_end=vali_ts[-1]+dt.timedelta(days=-1), \
                                    weathervars=[v]).mean(axis=1)
        test_data['%s24hdiff'%v] = ens.load_ens_timeseries_as_df(\
                                    ts_start=test_ts[0],\
                                    ts_end=test_ts[-1], \
                                    weathervars=[v]).mean(axis=1) \
                                  - ens.load_ens_timeseries_as_df(\
                                    ts_start=test_ts[0]+dt.timedelta(days=-1),\
                                    ts_end=test_ts[-1]+dt.timedelta(days=-1), \
                                    weathervars=[v]).mean(axis=1)
    
    for d, t in zip([fit_data, vali_data, test_data], [fit_ts, vali_ts, test_ts]):
        d.set_index(pd.DatetimeIndex(t), inplace=True)
                                                                   
    all_data = pd.concat([fit_data, vali_data, test_data])

    return fit_data, vali_data, test_data, all_data
def weather_forecast_ensemble(): # figure 2
    plt.close('all')
    ts = ens.gen_hourly_timesteps(dt.datetime(2016,1,20,1), dt.datetime(2016,2,5,0))
    ens_data = ens.load_ens_timeseries_as_df(ts_start=ts[0], ts_end=ts[-1],\
                                             weathervars=['Tout', 'vWind', 'sunRad'])
    fig, axes = plt.subplots(3,1, sharex=True, figsize=(colwidth, 1.65*colwidth))
    plt.xticks(size=5)
    
    ylabels = [u'Outside temperature [%sC]'%uni_degree, 'Wind speed [m/s]', u'Solar irradiance [W/m%s]'%uni_squared]
    
    for  ax, v, cshift, ylab in zip(axes, ['Tout', 'vWind', 'sunRad'], (15,23,6), ylabels):
        color_list = plt.cm.Dark2(np.roll(np.linspace(0, 1, 25), cshift))        
        ax.set_prop_cycle(cycler('color',color_list))
        v_ens_data = ens_data[[v + str(i) for i in range(25)]]
        ax.plot_date(ts, v_ens_data, '-', lw=0.5)
        ax.set_ylabel(ylab, size=8)
        ax.tick_params(axis='y', which='major', labelsize=8)
        plt.box(True)
    plt.tight_layout()
    axes[-1].xaxis.set_major_formatter(DateFormatter('%b %d') )
    axes[-1].set_xlim(dt.datetime(2016,1,20,0), dt.datetime(2016,2,5,0))
    fig.savefig('figures/first_articlefigs/weather_forecast_ensemble.pdf')
    return ens_data, axes
def production_model(): # figure 3
    
    plt.close('all')
    cols = ['prod24h_before', 'Tout24hdiff', 'vWind24hdiff', 'sunRad24hdiff']
        
    ts1 = ens.gen_hourly_timesteps(dt.datetime(2015,12,17,1), dt.datetime(2016,1,15,0))
    ts2 = ens.gen_hourly_timesteps(dt.datetime(2016,1,20,1), dt.datetime(2016,2,5,0))
    
    #load the data
    fit_data = ens.repack_ens_mean_as_df()
    fit_data['prod24h_before'] = sq.fetch_production(ts1[0]+dt.timedelta(days=-1), ts1[-1]+dt.timedelta(days=-1))
    
    fit_data['Tout24hdiff'] = fit_data['Tout'] \
                                - ens.load_ens_timeseries_as_df(\
                                    ts_start=ts1[0]+dt.timedelta(days=-1),\
                                    ts_end=ts1[-1]+dt.timedelta(days=-1), \
                                    weathervars=['Tout']).mean(axis=1)
    fit_data['vWind24hdiff'] = fit_data['vWind'] \
                                - ens.load_ens_timeseries_as_df(\
                                    ts_start=ts1[0]+dt.timedelta(days=-1),\
                                    ts_end=ts1[-1]+dt.timedelta(days=-1), \
                                    weathervars=['vWind']).mean(axis=1)
    fit_data['sunRad24hdiff'] = fit_data['sunRad'] \
                                - ens.load_ens_timeseries_as_df(\
                                    ts_start=ts1[0]+dt.timedelta(days=-1),\
                                    ts_end=ts1[-1]+dt.timedelta(days=-1), \
                                    weathervars=['sunRad']).mean(axis=1)
                                    
    vali_data = ens.repack_ens_mean_as_df(ts2[0], ts2[-1])
    vali_data['prod24h_before'] = sq.fetch_production(ts2[0]+dt.timedelta(days=-1), ts2[-1]+dt.timedelta(days=-1))
    vali_data['Tout24hdiff'] = vali_data['Tout'] \
                                - ens.load_ens_timeseries_as_df(\
                                    ts_start=ts2[0]+dt.timedelta(days=-1),\
                                    ts_end=ts2[-1]+dt.timedelta(days=-1), \
                                    weathervars=['Tout']).mean(axis=1)
    vali_data['vWind24hdiff'] = vali_data['vWind'] \
                                - ens.load_ens_timeseries_as_df(\
                                    ts_start=ts2[0]+dt.timedelta(days=-1),\
                                    ts_end=ts2[-1]+dt.timedelta(days=-1), \
                                    weathervars=['vWind']).mean(axis=1)
    vali_data['sunRad24hdiff'] = vali_data['sunRad'] \
                                - ens.load_ens_timeseries_as_df(\
                                    ts_start=ts2[0]+dt.timedelta(days=-1),\
                                    ts_end=ts2[-1]+dt.timedelta(days=-1), \
                                    weathervars=['sunRad']).mean(axis=1)
    
    # correct error in production:
    new_val = (vali_data['prod'][116] +vali_data['prod'][116])/2
    vali_data['prod'][116] = new_val
    vali_data['prod'][117] = new_val
    vali_data['prod24h_before'][116+24] = new_val
    vali_data['prod24h_before'][117+24] = new_val
    
    
 
    # do the fit
    X = fit_data[cols]
    y = fit_data['prod']
    res = mlin_regression(y, X, add_const=False)    

    fig, [ax1, ax2] = plt.subplots(2, 1, sharex=True, figsize=(dcolwidth, 0.57*dcolwidth), gridspec_kw={'height_ratios':[4,1]})

    # load ensemble data
    ens_data1 = ens.load_ens_timeseries_as_df(ts_start=ts1[0], ts_end=ts1[-1],\
                                             weathervars=['Tout', 'vWind', 'sunRad'])
    ens_data1['prod24h_before'] = fit_data['prod24h_before']
    ens_data1_24h_before =  ens.load_ens_timeseries_as_df(\
                                    ts_start=ts1[0]+dt.timedelta(days=-1),\
                                    ts_end=ts1[-1]+dt.timedelta(days=-1), \
                                    weathervars=['Tout', 'vWind', 'sunRad']) 
    ens_data2 = ens.load_ens_timeseries_as_df(ts_start=ts2[0], ts_end=ts2[-1],\
                                             weathervars=['Tout', 'vWind', 'sunRad'])
    ens_data2['prod24h_before'] = vali_data['prod24h_before']
    ens_data2_24h_before = ens.load_ens_timeseries_as_df(\
                                    ts_start=ts2[0]+dt.timedelta(days=-1),\
                                    ts_end=ts2[-1]+dt.timedelta(days=-1), \
                                    weathervars=['Tout', 'vWind', 'sunRad']) 
    for i in range(25):
        for v in ['Tout', 'vWind', 'sunRad']:
            key_raw = v + str(i)
            key_diff = v + '24hdiff' + str(i)
            ens_data1[key_diff] = ens_data1[key_raw] - ens_data1_24h_before[key_raw]
            ens_data2[key_diff] = ens_data2[key_raw] - ens_data2_24h_before[key_raw]

    
    all_ens_data = pd.concat([ens_data1, ens_data2])
    all_ts = ts1 + ts2    
#    
#    
    # calculate production for each ensemble member
    ens_prods = np.zeros((len(all_ts), 25))
    for i in range(25):
        ens_cols = ['Tout24hdiff' + str(i), 'vWind24hdiff' + str(i),\
                    'sunRad24hdiff' + str(i), 'prod24h_before']
        ens_params = pd.Series({'Tout24hdiff' + str(i):res.params['Tout24hdiff'],
                                'vWind24hdiff' + str(i):res.params['vWind24hdiff'],
                                'sunRad24hdiff' + str(i):res.params['sunRad24hdiff'],
                                'prod24h_before':res.params['prod24h_before']})
        ens_prods[:,i] = linear_map(all_ens_data, ens_params, ens_cols)    
    
    
       
    # calculate combined confint
    ens_std = ens_prods.std(axis=1)
    vali_resid = linear_map(vali_data, res.params, cols) - vali_data['prod']
    vali_resid_corrig = vali_resid - np.sign(vali_resid)*1.9599*ens_std[len(ts1):]
    #mean_conf_int_spread = (vali_resid_corrig.quantile(0.95) - vali_resid_corrig.quantile(0.05))/2 # this conf_int is not used anymore


    fit_resid = res.resid
    fit_resid_corrig = fit_resid - np.sign(fit_resid)*1.9599*ens_std[0:len(ts1)]
    conf_int_spread_lower = - fit_resid_corrig.quantile(0.025)
    conf_int_spread_higher = fit_resid_corrig.quantile(0.975) 
    
    combined_conf_ints = conf_int_spread_lower + conf_int_spread_higher + 2*1.9599*ens_std
    all_prod_model = np.concatenate([res.fittedvalues, linear_map(vali_data, res.params, cols)])
    combined_ub95 = all_prod_model + conf_int_spread_higher + 1.9599*ens_std
    combined_lb95 = all_prod_model - (conf_int_spread_lower + 1.9599*ens_std)
    
    # plot confint
    ax1.fill_between(all_ts[len(ts1):], combined_lb95[len(ts1):], combined_ub95[len(ts1):], label='95% prediction intervals')
    ax1.fill_between(all_ts[len(ts1):], all_prod_model[len(ts1):] - 1.9599*ens_std[len(ts1):], all_prod_model[len(ts1):] + 1.9599*ens_std[len(ts1):], facecolor='grey', label='Weather ensemble 95% conf. int.')
    
    # plot ensempble models    
    ax1.plot_date(all_ts[len(ts1):], ens_prods[len(ts1):], '-', lw=0.5)    

    ax1.plot_date(ts2, vali_data['prod'], 'k-', lw=2, label='Historical production')
    ax1.plot_date(ts2, linear_map(vali_data, res.params, cols), '-', c=red, lw=2, label='Production model')
    ax1.set_ylabel('Production [MW]', size=8)
    ax1.tick_params(axis='both', which='major', labelsize=8)
    ax1.xaxis.set_major_formatter(DateFormatter('%b %d') )    
    ax1.legend(loc=1, prop={'size':8})
    ax1.set_ylim([300,1100])
    
    N = conf_int_spread_higher + 1.9599*ens_std[len(ts1):].max()
    ax2.fill_between(ts2, -(1.9599*ens_std[len(ts1):]+conf_int_spread_lower)/N, -1.9599*ens_std[len(ts1):]/N, alpha=0.5)
    ax2.fill_between(ts2, -1.9599*ens_std[len(ts1):]/N, np.zeros(len(ts2)), facecolor='grey',alpha=0.5)
    ax2.fill_between(ts2, 1.9599*ens_std[len(ts1):]/N, facecolor='grey')
    ax2.fill_between(ts2, 1.9599*ens_std[len(ts1):]/N, (conf_int_spread_higher+1.9599*ens_std[len(ts1):])/N) 
    ax2.set_ylabel('Prediction intervals \n[normalized]', size=8)
    ax2.tick_params(axis='y', which='major', labelsize=8)
    ax2.set_xlim(dt.datetime(2016,1,20,0), dt.datetime(2016,2,5,0))
    fig.tight_layout()
    print "Min_normalized pos conf bound. ", np.min(1.9599*ens_std[len(ts1):]/N+conf_int_spread_higher/N)
    
    print "MAE = " + str(mae(vali_resid))
    print "MAPE = " + str(mape(vali_resid, vali_data['prod']))
    print "RMSE = " + str(rmse(vali_resid))
    print "ME = " + str(np.mean(vali_resid))
    
    print "MAE (fit) = " + str(mae(res.resid))
    print "MAPE (fit) = " + str(mape(res.resid, fit_data['prod']))
    print "RMSE (fit)= " + str(rmse(res.resid))
    print "ME (fit)= " + str(np.mean(res.resid))
    
    print "Width of const blue bands (MW)", conf_int_spread_lower, conf_int_spread_higher

    plt.savefig('Q:/Projekter/Ens Article 1/figures/production_model.pdf', dpi=400) 

   
    EO3_fc1 = sq.fetch_EO3_midnight_forecast(ts1[0], ts1[-1])
    EO3_fc2 = sq.fetch_EO3_midnight_forecast(ts2[0], ts2[-1])
    EO3_err = EO3_fc2-vali_data['prod']
    EO3_err_fit = EO3_fc1-fit_data['prod']
    print "MAE (EO3) = " + str(mae(EO3_err))
    print "MAPE (EO3) = " + str(mape(EO3_err, vali_data['prod']))
    print "RMSE (EO3)= " + str(rmse(EO3_err))
    print "ME (EO3)= " + str(np.mean(EO3_err))
    
    print "MAE (EO3_fit) = " + str(mae(EO3_err_fit))
    print "MAPE (EO3_fit) = " + str(mape(EO3_err_fit, fit_data['prod']))
    print "RMSE (EO3_fit)= " + str(rmse(EO3_err_fit))
    print "ME (EO3_fit)= " + str(np.mean(EO3_err_fit))
    
    print np.min(combined_conf_ints[len(ts1):]/combined_conf_ints.max())
    np.savez('combined_conf_int', combined_conf_int=(conf_int_spread_higher+1.9599*ens_std), timesteps=all_ts)

    print "Corr coeff: vali ", np.corrcoef(vali_data['prod'],linear_map(vali_data, res.params, cols))[0,1]
    print "Corr coeff: vali EO3 ", np.corrcoef(vali_data['prod'], EO3_fc2)[0,1]
    print "Corr coeff: fit ", np.corrcoef(fit_data['prod'],res.fittedvalues)[0,1]
    print "Corr coeff: fit EO3 ", np.corrcoef(fit_data['prod'], EO3_fc1)[0,1]
    
    print "% of actual production in vali period above upper", float(len(np.where(vali_data['prod']>(conf_int_spread_higher+1.9599*ens_std[len(ts1):]+linear_map(vali_data, res.params, cols)))[0]))/len(ts2)
    print "plus minus: ", 0.5/len(ts2)
    
    print "% of actual production in vali period below lower", float(len(np.where(vali_data['prod']<(linear_map(vali_data, res.params, cols)-(conf_int_spread_lower+1.9599*ens_std[len(ts1):])))[0]))/len(ts2)
    print "plus minus: ", 0.5/len(ts2)
    
    return res, fit_data
def second_ens_prod_fig():
    """ This plot is based on a production model taking into account:
        the production 24 hours before as well as the change in
        temparature, windspeed and solar radiotion from 24 hours ago to now.
        
        """
        
    plt.close('all')
    cols = ['prod24h_before', 'Tout24hdiff', 'vWind24hdiff', 'sunRad24hdiff']
        
    ts1 = ens.gen_hourly_timesteps(dt.datetime(2015,12,17,1), dt.datetime(2016,1,15,0))
    ts2 = ens.gen_hourly_timesteps(dt.datetime(2016,1,20,1), dt.datetime(2016,2,5,0))
    
    #load the data
    fit_data = ens.repack_ens_mean_as_df()
    fit_data['prod24h_before'] = sq.fetch_production(ts1[0]+dt.timedelta(days=-1), ts1[-1]+dt.timedelta(days=-1))
    
    fit_data['Tout24hdiff'] = fit_data['Tout'] \
                                - ens.load_ens_timeseries_as_df(\
                                    ts_start=ts1[0]+dt.timedelta(days=-1),\
                                    ts_end=ts1[-1]+dt.timedelta(days=-1), \
                                    weathervars=['Tout']).mean(axis=1)
    fit_data['vWind24hdiff'] = fit_data['vWind'] \
                                - ens.load_ens_timeseries_as_df(\
                                    ts_start=ts1[0]+dt.timedelta(days=-1),\
                                    ts_end=ts1[-1]+dt.timedelta(days=-1), \
                                    weathervars=['vWind']).mean(axis=1)
    fit_data['sunRad24hdiff'] = fit_data['sunRad'] \
                                - ens.load_ens_timeseries_as_df(\
                                    ts_start=ts1[0]+dt.timedelta(days=-1),\
                                    ts_end=ts1[-1]+dt.timedelta(days=-1), \
                                    weathervars=['sunRad']).mean(axis=1)
                                    
    vali_data = ens.repack_ens_mean_as_df(ts2[0], ts2[-1])
    vali_data['prod24h_before'] = sq.fetch_production(ts2[0]+dt.timedelta(days=-1), ts2[-1]+dt.timedelta(days=-1))
    vali_data['Tout24hdiff'] = vali_data['Tout'] \
                                - ens.load_ens_timeseries_as_df(\
                                    ts_start=ts2[0]+dt.timedelta(days=-1),\
                                    ts_end=ts2[-1]+dt.timedelta(days=-1), \
                                    weathervars=['Tout']).mean(axis=1)
    vali_data['vWind24hdiff'] = vali_data['vWind'] \
                                - ens.load_ens_timeseries_as_df(\
                                    ts_start=ts2[0]+dt.timedelta(days=-1),\
                                    ts_end=ts2[-1]+dt.timedelta(days=-1), \
                                    weathervars=['vWind']).mean(axis=1)
    vali_data['sunRad24hdiff'] = vali_data['sunRad'] \
                                - ens.load_ens_timeseries_as_df(\
                                    ts_start=ts2[0]+dt.timedelta(days=-1),\
                                    ts_end=ts2[-1]+dt.timedelta(days=-1), \
                                    weathervars=['sunRad']).mean(axis=1)
    
    # correct error in production:
    new_val = (vali_data['prod'][116] +vali_data['prod'][116])/2
    vali_data['prod'][116] = new_val
    vali_data['prod'][117] = new_val
    vali_data['prod24h_before'][116+24] = new_val
    vali_data['prod24h_before'][117+24] = new_val
    
    
 
    # do the fit
    X = fit_data[cols]
    y = fit_data['prod']
    res = mlin_regression(y, X, add_const=False)    
    
    fig, [ax1, ax2] = plt.subplots(2,1, figsize=(40,20))
 
    # load ensemble data
    ens_data1 = ens.load_ens_timeseries_as_df(ts_start=ts1[0], ts_end=ts1[-1],\
                                             weathervars=['Tout', 'vWind', 'sunRad'])
    ens_data1['prod24h_before'] = fit_data['prod24h_before']
    ens_data1_24h_before =  ens.load_ens_timeseries_as_df(\
                                    ts_start=ts1[0]+dt.timedelta(days=-1),\
                                    ts_end=ts1[-1]+dt.timedelta(days=-1), \
                                    weathervars=['Tout', 'vWind', 'sunRad']) 
    ens_data2 = ens.load_ens_timeseries_as_df(ts_start=ts2[0], ts_end=ts2[-1],\
                                             weathervars=['Tout', 'vWind', 'sunRad'])
    ens_data2['prod24h_before'] = vali_data['prod24h_before']
    ens_data2_24h_before = ens.load_ens_timeseries_as_df(\
                                    ts_start=ts2[0]+dt.timedelta(days=-1),\
                                    ts_end=ts2[-1]+dt.timedelta(days=-1), \
                                    weathervars=['Tout', 'vWind', 'sunRad']) 
    for i in range(25):
        for v in ['Tout', 'vWind', 'sunRad']:
            key_raw = v + str(i)
            key_diff = v + '24hdiff' + str(i)
            ens_data1[key_diff] = ens_data1[key_raw] - ens_data1_24h_before[key_raw]
            ens_data2[key_diff] = ens_data2[key_raw] - ens_data2_24h_before[key_raw]

    
    all_ens_data = pd.concat([ens_data1, ens_data2])
    all_ts = ts1 + ts2    
#    
#    
    # calculate production for each ensemble member
    ens_prods = np.zeros((len(all_ts), 25))
    for i in range(25):
        ens_cols = ['Tout24hdiff' + str(i), 'vWind24hdiff' + str(i),\
                    'sunRad24hdiff' + str(i), 'prod24h_before']
        ens_params = pd.Series({'Tout24hdiff' + str(i):res.params['Tout24hdiff'],
                                'vWind24hdiff' + str(i):res.params['vWind24hdiff'],
                                'sunRad24hdiff' + str(i):res.params['sunRad24hdiff'],
                                'prod24h_before':res.params['prod24h_before']})
        ens_prods[:,i] = linear_map(all_ens_data, ens_params, ens_cols)    
    
    
       
    # calculate combined confint
    ens_std = ens_prods.std(axis=1)
    vali_resid = linear_map(vali_data, res.params, cols) - vali_data['prod']
    vali_resid_corrig = vali_resid - np.sign(vali_resid)*1.9599*ens_std[len(ts1):]
    mean_conf_int_spread = (vali_resid_corrig.quantile(0.95) - vali_resid_corrig.quantile(0.05))/2
    
    
    combined_conf_int = mean_conf_int_spread + 1.9599*ens_std
    all_prod_model = np.concatenate([res.fittedvalues, linear_map(vali_data, res.params, cols)])
    combined_ub95 = all_prod_model + combined_conf_int
    combined_lb95 = all_prod_model - combined_conf_int 
    
    # plot confint
    ax1.fill_between(all_ts, combined_lb95, combined_ub95, label='Combined 95% conf. int.')
    ax1.fill_between(all_ts, all_prod_model - 1.9599*ens_std, all_prod_model + 1.9599*ens_std, facecolor='grey', label='Ensemble 95% conf. int.')
    
    # plot ensempble models    
    ax1.plot_date(all_ts, ens_prods, '-', lw=0.5)    
    
    ax1.plot_date(ts1, y, 'k-', lw=2, label='Actual production')
    ax1.plot_date(ts1, res.fittedvalues,'r-', lw=2, label='Model on ensemble mean')
         
    ax1.plot_date(ts2, vali_data['prod'], 'k-', lw=2, label='')
    ax1.plot_date(ts2, linear_map(vali_data, res.params, cols), 'r-', lw=2)
    ax1.set_ylabel('[MW]')
    ax1.legend(loc=2)
    ax1.set_ylim([0,1100])
    
    
    ax2.plot_date(ts1, res.resid, '-', label='Residual, fitted data')
    ax2.plot_date(ts2, vali_resid, '-', label='Residual, validation data')
    ax2.set_ylabel('[MW]')
    ax2.legend(loc=2)
    ax2.set_ylim([-550, 550])
    print "MAE = " + str(mae(vali_resid))
    print "MAPE = " + str(mape(vali_resid, vali_data['prod']))
    print "RMSE = " + str(rmse(vali_resid))
    print "ME = " + str(np.mean(vali_resid))
    
    print "MAE (fit) = " + str(mae(res.resid))
    print "MAPE (fit) = " + str(mape(res.resid, fit_data['prod']))
    print "RMSE (fit)= " + str(rmse(res.resid))
    print "ME (fit)= " + str(np.mean(res.resid))

    plt.savefig('figures/ens_prod_models_v2.pdf', dpi=600) 
    plt.figure()
    plt.plot_date(all_ts, ens_std)
    plt.ylabel('Std. of ensemble production models [MW]')
    plt.savefig('figures/std_ens_prod_models.pdf', dpi=600) 
    # 
    
    vali_ens_std = ens_std[len(ts1):]
    sns.jointplot(x=pd.Series(vali_ens_std), y=np.abs(vali_resid))
    sns.jointplot(x=vali_data['prod'], y=pd.Series(linear_map(vali_data, res.params, cols)))
   
    EO3_fc1 = sq.fetch_EO3_midnight_forecast(ts1[0], ts1[-1])
    EO3_fc2 = sq.fetch_EO3_midnight_forecast(ts2[0], ts2[-1])
    plt.figure()
    plt.plot_date(ts1, fit_data['prod'], 'k-', label='Actual production')
    plt.plot_date(ts2, vali_data['prod'], 'k-')
    plt.plot_date(ts1, EO3_fc1, 'r-', label='EO3 forecast')
    plt.plot_date(ts2, EO3_fc2, 'r-')
    EO3_err = EO3_fc2-vali_data['prod']
    EO3_err_fit = EO3_fc1-fit_data['prod']
    print "MAE (EO3) = " + str(mae(EO3_err))
    print "MAPE (EO3) = " + str(mape(EO3_err, vali_data['prod']))
    print "RMSE (EO3)= " + str(rmse(EO3_err))
    print "ME (EO3)= " + str(np.mean(EO3_err))
    
    print "MAE (EO3_fit) = " + str(mae(EO3_err_fit))
    print "MAPE (EO3_fit) = " + str(mape(EO3_err_fit, fit_data['prod']))
    print "RMSE (EO3_fit)= " + str(rmse(EO3_err_fit))
    print "ME (EO3_fit)= " + str(np.mean(EO3_err_fit))
     
    sns.jointplot(x=pd.Series(vali_ens_std), y=np.abs(EO3_err))
    
    plt.figure(figsize=(20,10))
    plt.subplot(2,1,1)
    plt.plot_date(all_ts, combined_conf_int/combined_conf_int.max(), '-')
    plt.ylabel('Model + ensemble uncertainty \n [normalized]')
    plt.ylim(0,1)    
    plt.subplot(2,1,2)
    plt.plot_date(all_ts, (1-0.2*combined_conf_int/combined_conf_int.max()), '-', label='Dynamic setpoint')
    plt.plot_date(all_ts, 0.8*np.ones(len(all_ts)), '--', label='Static setpoint')
    plt.ylabel('Setpoint for pump massflow \n temperature [fraction of max pump cap]')
    plt.legend()
    plt.ylim(.7,1)
    plt.savefig('figures/setpoint.pdf')

    
    return vali_data, fit_data, res, ens_std, vali_resid
def first_ens_prod_fig():
    """ This plot is based on a production model taking into account:
        Tout, vWind and the production 24 hours before
        
        """
        
    plt.close('all')
    cols = ['Tout', 'vWind', 'prod24h_before']
        
    ts1 = ens.gen_hourly_timesteps(dt.datetime(2015,12,17,1), dt.datetime(2016,1,15,0))
    ts2 = ens.gen_hourly_timesteps(dt.datetime(2016,1,20,1), dt.datetime(2016,1,28,0))
    
    #load the data
    fit_data = ens.repack_ens_mean_as_df()
    fit_data['prod24h_before'] = sq.fetch_production(dt.datetime(2015,12,16,1), dt.datetime(2016,1,14,0))

    vali_data = ens.repack_ens_mean_as_df(dt.datetime(2016,1,20,1), dt.datetime(2016,1,28,0))
    vali_data['prod24h_before'] = sq.fetch_production(dt.datetime(2016,1,19,1), dt.datetime(2016,1,27,0))   
    
 
    # do the fit
    X = fit_data[cols]
    y = fit_data['prod']
    res = mlin_regression(y, X, add_const=True)    
    
    fig, [ax1, ax2] = plt.subplots(2,1, figsize=(40,20))
    
    # load ensemble data
    ens_data1 = ens.load_ens_timeseries_as_df(ts_start=ts1[0], ts_end=ts1[-1])
    ens_data1['prod24h_before'] = fit_data['prod24h_before']    
    ens_data2 = ens.load_ens_timeseries_as_df(ts_start=ts2[0], ts_end=ts2[-1])
    ens_data2['prod24h_before'] = vali_data['prod24h_before']
    
    all_ens_data = pd.concat([ens_data1, ens_data2])
    all_ts = ts1 + ts2    
    
    
    # calculate production for each ensemble member
    ens_prods = np.zeros((len(all_ts), 25))
    for i in range(25):
        ens_cols = ['Tout' + str(i), 'vWind' + str(i), 'prod24h_before']
        ens_params = pd.Series({'Tout' + str(i):res.params['Tout'],
                                'vWind' + str(i):res.params['vWind'],
                                'const':res.params['const'],
                                'prod24h_before':res.params['prod24h_before']})
        ens_prods[:,i] = linear_map(all_ens_data, ens_params, ens_cols)    
    
    
       
    # calculate combined confint
    prstd, iv_l, iv_u = wls_prediction_std(res)
    mean_conf_int_spread = np.mean(res.fittedvalues - iv_l)
    model_std = np.concatenate([prstd, (1./1.9599)*mean_conf_int_spread*np.ones(len(ts2))])
    ens_std = ens_prods.std(axis=1)
    combined_std = np.sqrt(model_std**2 + ens_std**2)
    all_prod_model = np.concatenate([res.fittedvalues, linear_map(vali_data, res.params, cols)])
    combined_ub95 = all_prod_model + 1.9599*combined_std
    combined_lb95 = all_prod_model - 1.9599*combined_std 
    
    # plot confint
    ax1.fill_between(all_ts, combined_lb95, combined_ub95, label='Combined 95% conf. int.')
    ax1.fill_between(all_ts, all_prod_model - 1.9599*ens_std, all_prod_model + 1.9599*ens_std, facecolor='grey', label='Ensemble 95% conf. int.')
    
    # plot ensempble models    
    ax1.plot_date(all_ts, ens_prods, '-', lw=0.5)    
    
    ax1.plot_date(ts1, y, 'k-', lw=2, label='Actual production')
    ax1.plot_date(ts1, res.fittedvalues,'r-', lw=2, label='Model on ensemble mean')
         
    ax1.plot_date(ts2, vali_data['prod'], 'k-', lw=2, label='')
    ax1.plot_date(ts2, linear_map(vali_data, res.params, cols), 'r-', lw=2)
    ax1.set_ylabel('[MW]')
    ax1.legend(loc=2)
    
    vali_resid = linear_map(vali_data, res.params, cols) - vali_data['prod']
    ax2.plot_date(ts1, res.resid, '-', label='Residual, fitted data')
    ax2.plot_date(ts2, vali_resid, '-', label='Residual, validation data')
    ax2.set_ylabel('[MW]')
    ax2.legend(loc=2)
    print "MAE = " + str(mae(vali_resid))
    print "MAPE = " + str(mape(vali_resid, vali_data['prod']))
    print "RMSE = " + str(rmse(vali_resid))
    print "ME = " + str(np.mean(vali_resid))
    
    print "MAE (fit) = " + str(mae(res.resid))
    print "MAPE (fit) = " + str(mape(res.resid, fit_data['prod']))
    print "RMSE (fit)= " + str(rmse(res.resid))
    print "ME (fit)= " + str(np.mean(res.resid))

    plt.savefig('figures/ens_prod_models.pdf', dpi=600) 
    plt.figure()
    plt.plot_date(all_ts, ens_std)
    plt.ylabel('Std. of ensemble production models [MW]')
    plt.savefig('figures/std_ens_prod_models.pdf', dpi=600) 
    
    
    sns.jointplot(x=ens_std, y=np.concatenate([res.resid, vali_resid]))
   
        
    return res, all_ens_data, all_ts, fit_data['prod'], vali_data['prod']
def main(argv):
    plt.close('all')
    
    try:
        station = argv[0]
        if not station in PI_T_sup_dict.keys():
            print "Wrong station, use rundhoej, holme or hoerning"
            return
    except:
        print "No station provided. Defaults to holme."
        station = 'holme'
        
    print station
    
    plt.close('all')
    #%%
    fit_ts = ens.gen_hourly_timesteps(dt.datetime(2015,12,17,1), dt.datetime(2016,1,15,0))
    vali_ts = ens.gen_hourly_timesteps(dt.datetime(2016,1,20,1), dt.datetime(2016,2,5,0))
    test_ts = ens.gen_hourly_timesteps(dt.datetime(2016,2,5,1), dt.datetime(2016,4,1,0))
    
    all_ts = fit_ts + vali_ts + test_ts
    
    weathervars=['Tout', 'vWind', 'sunRad', 'hum']
    
    fit_data = pd.DataFrame()
    vali_data = pd.DataFrame()            
    test_data = pd.DataFrame()
    
    cons_key = sq.consumption_place_key_dict[station]
    fit_data['cons24h_before'] = sq.fetch_consumption(cons_key, fit_ts[0]+dt.timedelta(days=-1), fit_ts[-1]+dt.timedelta(days=-1))
    vali_data['cons24h_before'] = sq.fetch_consumption(cons_key, vali_ts[0]+dt.timedelta(days=-1), vali_ts[-1]+dt.timedelta(days=-1))
    test_data['cons24h_before'] = sq.fetch_consumption(cons_key, test_ts[0]+dt.timedelta(days=-1), test_ts[-1]+dt.timedelta(days=-1))
    
    fit_data['cons'] = sq.fetch_consumption(cons_key, fit_ts[0], fit_ts[-1])
    vali_data['cons'] = sq.fetch_consumption(cons_key, vali_ts[0], vali_ts[-1])
    test_data['cons'] = sq.fetch_consumption(cons_key, test_ts[0], test_ts[-1])
    for v in weathervars:
        fit_data['%s24hdiff'%v] = ens.load_ens_timeseries_as_df(\
                                    ts_start=fit_ts[0],\
                                    ts_end=fit_ts[-1], \
                                    weathervars=[v]).mean(axis=1) \
                                  - ens.load_ens_timeseries_as_df(\
                                    ts_start=fit_ts[0]+dt.timedelta(days=-1),\
                                    ts_end=fit_ts[-1]+dt.timedelta(days=-1), \
                                    weathervars=[v]).mean(axis=1)
        vali_data['%s24hdiff'%v] = ens.load_ens_timeseries_as_df(\
                                    ts_start=vali_ts[0],\
                                    ts_end=vali_ts[-1], \
                                    weathervars=[v]).mean(axis=1) \
                                  - ens.load_ens_timeseries_as_df(\
                                    ts_start=vali_ts[0]+dt.timedelta(days=-1),\
                                    ts_end=vali_ts[-1]+dt.timedelta(days=-1), \
                                    weathervars=[v]).mean(axis=1)
        test_data['%s24hdiff'%v] = ens.load_ens_timeseries_as_df(\
                                    ts_start=test_ts[0],\
                                    ts_end=test_ts[-1], \
                                    weathervars=[v]).mean(axis=1) \
                                  - ens.load_ens_timeseries_as_df(\
                                    ts_start=test_ts[0]+dt.timedelta(days=-1),\
                                    ts_end=test_ts[-1]+dt.timedelta(days=-1), \
                                    weathervars=[v]).mean(axis=1)
                                    
                                    
    #%%
    all_data = pd.concat([fit_data, vali_data, test_data])
    no_blind_data = pd.concat([fit_data, vali_data])
    
    corr = no_blind_data.corr()
    
    fit_y = fit_data['cons']
    columns = ['cons24h_before', 'Tout24hdiff', 'vWind24hdiff', 'sunRad24hdiff']
    X = fit_data[columns]
    res = mlin_regression(fit_y,X, add_const=False)
    
    fiterr = res.fittedvalues - fit_y
    print "Errors fit period: ", rmse(fiterr), mae(fiterr), mape(fiterr, fit_y)
    
    vali_pred = linear_map(vali_data, res.params, columns)
    valierr = vali_pred - vali_data['cons']
    print "Errors validation period: ", rmse(valierr), mae(valierr), mape(valierr, vali_data['cons'])
    
    test_pred = linear_map(test_data, res.params, columns)
    testerr = test_pred - test_data['cons']
    print "Errors test period: ", rmse(testerr), mae(testerr), mape(testerr, test_data['cons'])
    
    plt.figure()
    plt.plot_date(all_ts, all_data['cons'], 'k-')
    plt.plot_date(all_ts, np.concatenate([res.fittedvalues, vali_pred, test_pred]), 'r-')
Beispiel #11
0
def load_cons_model_dfs(df):
    # Takes the data frame with the already calculated consumptions
    #%%
    fit_ts = ens.gen_hourly_timesteps(dt.datetime(2015, 12, 17, 1),
                                      dt.datetime(2016, 1, 15, 0))
    vali_ts = ens.gen_hourly_timesteps(dt.datetime(2016, 1, 20, 1),
                                       dt.datetime(2016, 2, 5, 0))
    test_ts = ens.gen_hourly_timesteps(dt.datetime(2016, 2, 5, 1),
                                       dt.datetime(2016, 3, 1, 0))

    weathervars = ['Tout', 'vWind', 'sunRad', 'hum']

    fit_data = pd.DataFrame()
    vali_data = pd.DataFrame()
    test_data = pd.DataFrame()

    fit_data['cons'] = np.array(df.ix[fit_ts[0]:fit_ts[-1]]['cons'])
    vali_data['cons'] = np.array(
        df.ix[vali_ts[0]:vali_ts[-1]]
        ['cons'])  # the casting is a hack to avoid the index f*****g up
    test_data['cons'] = np.array(df.ix[test_ts[0]:test_ts[-1]]['cons'])

    fit_data['cons24hbefore'] = np.array(
        df.ix[fit_ts[0] + dt.timedelta(days=-1):fit_ts[-1] +
              dt.timedelta(days=-1)]['cons'])
    vali_data['cons24hbefore'] = np.array(
        df.ix[vali_ts[0] + dt.timedelta(days=-1):vali_ts[-1] +
              dt.timedelta(days=-1)]
        ['cons'])  # the casting is a hack to avoid the index f*****g up
    test_data['cons24hbefore'] = np.array(
        df.ix[test_ts[0] + dt.timedelta(days=-1):test_ts[-1] +
              dt.timedelta(days=-1)]['cons'])

    for v in weathervars:
        fit_data['%s24hdiff'%v] = ens.load_ens_timeseries_as_df(\
                                    ts_start=fit_ts[0],\
                                    ts_end=fit_ts[-1], \
                                    weathervars=[v]).mean(axis=1) \
                                  - ens.load_ens_timeseries_as_df(\
                                    ts_start=fit_ts[0]+dt.timedelta(days=-1),\
                                    ts_end=fit_ts[-1]+dt.timedelta(days=-1), \
                                    weathervars=[v]).mean(axis=1)
        vali_data['%s24hdiff'%v] = ens.load_ens_timeseries_as_df(\
                                    ts_start=vali_ts[0],\
                                    ts_end=vali_ts[-1], \
                                    weathervars=[v]).mean(axis=1) \
                                  - ens.load_ens_timeseries_as_df(\
                                    ts_start=vali_ts[0]+dt.timedelta(days=-1),\
                                    ts_end=vali_ts[-1]+dt.timedelta(days=-1), \
                                    weathervars=[v]).mean(axis=1)
        test_data['%s24hdiff'%v] = ens.load_ens_timeseries_as_df(\
                                    ts_start=test_ts[0],\
                                    ts_end=test_ts[-1], \
                                    weathervars=[v]).mean(axis=1) \
                                  - ens.load_ens_timeseries_as_df(\
                                    ts_start=test_ts[0]+dt.timedelta(days=-1),\
                                    ts_end=test_ts[-1]+dt.timedelta(days=-1), \
                                    weathervars=[v]).mean(axis=1)

    for d, t in zip([fit_data, vali_data, test_data],
                    [fit_ts, vali_ts, test_ts]):
        d.set_index(pd.DatetimeIndex(t), inplace=True)

    all_data = pd.concat([fit_data, vali_data, test_data])

    return fit_data, vali_data, test_data, all_data
from statsmodels.sandbox.regression.predstd import wls_prediction_std
import numpy as np
import matplotlib.pyplot as plt

import pandas as pd


all_data = ens.repack_ens_mean_as_df()

hours = [np.mod(h, 24) for h in range(1,697)]

all_data['prod24h_before'] = sq.fetch_production(dt.datetime(2015,12,16,1), dt.datetime(2016,1,14,0))
all_data['(Tout-17)*vWind*hum'] = all_data['(Tout-17)*vWind']*all_data['hum']
all_data['(Toutavg24-17)*vWindavg24*humavg24'] = all_data['(Toutavg-17)*vWindavg24']*all_data['humavg24']
all_data['Tout24hdiff'] = all_data['Tout'] - np.roll(all_data['Tout'], 24)
Tout24h_before = ens.load_ens_timeseries_as_df(ts_start=dt.datetime(2015,12,16,1),\
                         ts_end=dt.datetime(2016,1,14,0), weathervars=['Tout']).mean(axis=1)
vWind24h_before = ens.load_ens_timeseries_as_df(ts_start=dt.datetime(2015,12,16,1),\
                         ts_end=dt.datetime(2016,1,14,0), weathervars=['vWind']).mean(axis=1)
sunRad24h_before = ens.load_ens_timeseries_as_df(ts_start=dt.datetime(2015,12,16,1),\
                         ts_end=dt.datetime(2016,1,14,0), weathervars=['sunRad']).mean(axis=1)
hum24h_before = ens.load_ens_timeseries_as_df(ts_start=dt.datetime(2015,12,16,1),\
                         ts_end=dt.datetime(2016,1,14,0), weathervars=['hum']).mean(axis=1)
                         
all_data['Tout24hdiff'] = all_data['Tout'] - Tout24h_before
all_data['vWind24hdiff'] = all_data['vWind'] - vWind24h_before
all_data['sunRad24hdiff'] = all_data['sunRad'] - sunRad24h_before
all_data['sunRadavg2424hdiff'] = all_data['sunRadavg24'] - np.roll(all_data['sunRadavg24'],24)
all_data['hum24hdiff'] = all_data['hum'] - hum24h_before

for c in all_data.columns:
    all_data['Z' + c] = (all_data[c]-all_data[c].mean())/all_data[c].std()
vali_data = pd.DataFrame()
test_data = pd.DataFrame()

fit_data['prod24h_before'] = sq.fetch_production(
    fit_ts[0] + dt.timedelta(days=-1), fit_ts[-1] + dt.timedelta(days=-1))
vali_data['prod24h_before'] = sq.fetch_production(
    vali_ts[0] + dt.timedelta(days=-1), vali_ts[-1] + dt.timedelta(days=-1))
test_data['prod24h_before'] = sq.fetch_production(
    test_ts[0] + dt.timedelta(days=-1), test_ts[-1] + dt.timedelta(days=-1))

fit_data['prod'] = sq.fetch_production(fit_ts[0], fit_ts[-1])
vali_data['prod'] = sq.fetch_production(vali_ts[0], vali_ts[-1])
test_data['prod'] = sq.fetch_production(test_ts[0], test_ts[-1])
for v in weathervars:
    fit_data['%s24hdiff'%v] = ens.load_ens_timeseries_as_df(\
                                ts_start=fit_ts[0],\
                                ts_end=fit_ts[-1], \
                                weathervars=[v]).mean(axis=1) \
                              - ens.load_ens_timeseries_as_df(\
                                ts_start=fit_ts[0]+dt.timedelta(days=-1),\
                                ts_end=fit_ts[-1]+dt.timedelta(days=-1), \
                                weathervars=[v]).mean(axis=1)
    vali_data['%s24hdiff'%v] = ens.load_ens_timeseries_as_df(\
                                ts_start=vali_ts[0],\
                                ts_end=vali_ts[-1], \
                                weathervars=[v]).mean(axis=1) \
                              - ens.load_ens_timeseries_as_df(\
                                ts_start=vali_ts[0]+dt.timedelta(days=-1),\
                                ts_end=vali_ts[-1]+dt.timedelta(days=-1), \
                                weathervars=[v]).mean(axis=1)
    test_data['%s24hdiff'%v] = ens.load_ens_timeseries_as_df(\
                                ts_start=test_ts[0],\
def main(argv):
    plt.close('all')

    try:
        station = argv[0]
        if not station in PI_T_sup_dict.keys():
            print "Wrong station, use rundhoej, holme or hoerning"
            return
    except:
        print "No station provided. Defaults to holme."
        station = 'holme'

    print station

    plt.close('all')
    #%%
    fit_ts = ens.gen_hourly_timesteps(dt.datetime(2015, 12, 17, 1),
                                      dt.datetime(2016, 1, 15, 0))
    vali_ts = ens.gen_hourly_timesteps(dt.datetime(2016, 1, 20, 1),
                                       dt.datetime(2016, 2, 5, 0))
    test_ts = ens.gen_hourly_timesteps(dt.datetime(2016, 2, 5, 1),
                                       dt.datetime(2016, 4, 1, 0))

    all_ts = fit_ts + vali_ts + test_ts

    weathervars = ['Tout', 'vWind', 'sunRad', 'hum']

    fit_data = pd.DataFrame()
    vali_data = pd.DataFrame()
    test_data = pd.DataFrame()

    cons_key = sq.consumption_place_key_dict[station]
    fit_data['cons24h_before'] = sq.fetch_consumption(
        cons_key, fit_ts[0] + dt.timedelta(days=-1),
        fit_ts[-1] + dt.timedelta(days=-1))
    vali_data['cons24h_before'] = sq.fetch_consumption(
        cons_key, vali_ts[0] + dt.timedelta(days=-1),
        vali_ts[-1] + dt.timedelta(days=-1))
    test_data['cons24h_before'] = sq.fetch_consumption(
        cons_key, test_ts[0] + dt.timedelta(days=-1),
        test_ts[-1] + dt.timedelta(days=-1))

    fit_data['cons'] = sq.fetch_consumption(cons_key, fit_ts[0], fit_ts[-1])
    vali_data['cons'] = sq.fetch_consumption(cons_key, vali_ts[0], vali_ts[-1])
    test_data['cons'] = sq.fetch_consumption(cons_key, test_ts[0], test_ts[-1])
    for v in weathervars:
        fit_data['%s24hdiff'%v] = ens.load_ens_timeseries_as_df(\
                                    ts_start=fit_ts[0],\
                                    ts_end=fit_ts[-1], \
                                    weathervars=[v]).mean(axis=1) \
                                  - ens.load_ens_timeseries_as_df(\
                                    ts_start=fit_ts[0]+dt.timedelta(days=-1),\
                                    ts_end=fit_ts[-1]+dt.timedelta(days=-1), \
                                    weathervars=[v]).mean(axis=1)
        vali_data['%s24hdiff'%v] = ens.load_ens_timeseries_as_df(\
                                    ts_start=vali_ts[0],\
                                    ts_end=vali_ts[-1], \
                                    weathervars=[v]).mean(axis=1) \
                                  - ens.load_ens_timeseries_as_df(\
                                    ts_start=vali_ts[0]+dt.timedelta(days=-1),\
                                    ts_end=vali_ts[-1]+dt.timedelta(days=-1), \
                                    weathervars=[v]).mean(axis=1)
        test_data['%s24hdiff'%v] = ens.load_ens_timeseries_as_df(\
                                    ts_start=test_ts[0],\
                                    ts_end=test_ts[-1], \
                                    weathervars=[v]).mean(axis=1) \
                                  - ens.load_ens_timeseries_as_df(\
                                    ts_start=test_ts[0]+dt.timedelta(days=-1),\
                                    ts_end=test_ts[-1]+dt.timedelta(days=-1), \
                                    weathervars=[v]).mean(axis=1)

    #%%
    all_data = pd.concat([fit_data, vali_data, test_data])
    no_blind_data = pd.concat([fit_data, vali_data])

    corr = no_blind_data.corr()

    fit_y = fit_data['cons']
    columns = [
        'cons24h_before', 'Tout24hdiff', 'vWind24hdiff', 'sunRad24hdiff'
    ]
    X = fit_data[columns]
    res = mlin_regression(fit_y, X, add_const=False)

    fiterr = res.fittedvalues - fit_y
    print "Errors fit period: ", rmse(fiterr), mae(fiterr), mape(fiterr, fit_y)

    vali_pred = linear_map(vali_data, res.params, columns)
    valierr = vali_pred - vali_data['cons']
    print "Errors validation period: ", rmse(valierr), mae(valierr), mape(
        valierr, vali_data['cons'])

    test_pred = linear_map(test_data, res.params, columns)
    testerr = test_pred - test_data['cons']
    print "Errors test period: ", rmse(testerr), mae(testerr), mape(
        testerr, test_data['cons'])

    plt.figure()
    plt.plot_date(all_ts, all_data['cons'], 'k-')
    plt.plot_date(all_ts,
                  np.concatenate([res.fittedvalues, vali_pred, test_pred]),
                  'r-')
def first_ens_prod_fig():
    """ This plot is based on a production model taking into account:
        Tout, vWind and the production 24 hours before
        
        """

    plt.close('all')
    cols = ['Tout', 'vWind', 'prod24h_before']

    ts1 = ens.gen_hourly_timesteps(dt.datetime(2015, 12, 17, 1),
                                   dt.datetime(2016, 1, 15, 0))
    ts2 = ens.gen_hourly_timesteps(dt.datetime(2016, 1, 20, 1),
                                   dt.datetime(2016, 1, 28, 0))

    #load the data
    fit_data = ens.repack_ens_mean_as_df()
    fit_data['prod24h_before'] = sq.fetch_production(
        dt.datetime(2015, 12, 16, 1), dt.datetime(2016, 1, 14, 0))

    vali_data = ens.repack_ens_mean_as_df(dt.datetime(2016, 1, 20, 1),
                                          dt.datetime(2016, 1, 28, 0))
    vali_data['prod24h_before'] = sq.fetch_production(
        dt.datetime(2016, 1, 19, 1), dt.datetime(2016, 1, 27, 0))

    # do the fit
    X = fit_data[cols]
    y = fit_data['prod']
    res = mlin_regression(y, X, add_const=True)

    fig, [ax1, ax2] = plt.subplots(2, 1, figsize=(40, 20))

    # load ensemble data
    ens_data1 = ens.load_ens_timeseries_as_df(ts_start=ts1[0], ts_end=ts1[-1])
    ens_data1['prod24h_before'] = fit_data['prod24h_before']
    ens_data2 = ens.load_ens_timeseries_as_df(ts_start=ts2[0], ts_end=ts2[-1])
    ens_data2['prod24h_before'] = vali_data['prod24h_before']

    all_ens_data = pd.concat([ens_data1, ens_data2])
    all_ts = ts1 + ts2

    # calculate production for each ensemble member
    ens_prods = np.zeros((len(all_ts), 25))
    for i in range(25):
        ens_cols = ['Tout' + str(i), 'vWind' + str(i), 'prod24h_before']
        ens_params = pd.Series({
            'Tout' + str(i): res.params['Tout'],
            'vWind' + str(i): res.params['vWind'],
            'const': res.params['const'],
            'prod24h_before': res.params['prod24h_before']
        })
        ens_prods[:, i] = linear_map(all_ens_data, ens_params, ens_cols)

    # calculate combined confint
    prstd, iv_l, iv_u = wls_prediction_std(res)
    mean_conf_int_spread = np.mean(res.fittedvalues - iv_l)
    model_std = np.concatenate(
        [prstd, (1. / 1.9599) * mean_conf_int_spread * np.ones(len(ts2))])
    ens_std = ens_prods.std(axis=1)
    combined_std = np.sqrt(model_std**2 + ens_std**2)
    all_prod_model = np.concatenate(
        [res.fittedvalues,
         linear_map(vali_data, res.params, cols)])
    combined_ub95 = all_prod_model + 1.9599 * combined_std
    combined_lb95 = all_prod_model - 1.9599 * combined_std

    # plot confint
    ax1.fill_between(all_ts,
                     combined_lb95,
                     combined_ub95,
                     label='Combined 95% conf. int.')
    ax1.fill_between(all_ts,
                     all_prod_model - 1.9599 * ens_std,
                     all_prod_model + 1.9599 * ens_std,
                     facecolor='grey',
                     label='Ensemble 95% conf. int.')

    # plot ensempble models
    ax1.plot_date(all_ts, ens_prods, '-', lw=0.5)

    ax1.plot_date(ts1, y, 'k-', lw=2, label='Actual production')
    ax1.plot_date(ts1,
                  res.fittedvalues,
                  'r-',
                  lw=2,
                  label='Model on ensemble mean')

    ax1.plot_date(ts2, vali_data['prod'], 'k-', lw=2, label='')
    ax1.plot_date(ts2, linear_map(vali_data, res.params, cols), 'r-', lw=2)
    ax1.set_ylabel('[MW]')
    ax1.legend(loc=2)

    vali_resid = linear_map(vali_data, res.params, cols) - vali_data['prod']
    ax2.plot_date(ts1, res.resid, '-', label='Residual, fitted data')
    ax2.plot_date(ts2, vali_resid, '-', label='Residual, validation data')
    ax2.set_ylabel('[MW]')
    ax2.legend(loc=2)
    print "MAE = " + str(mae(vali_resid))
    print "MAPE = " + str(mape(vali_resid, vali_data['prod']))
    print "RMSE = " + str(rmse(vali_resid))
    print "ME = " + str(np.mean(vali_resid))

    print "MAE (fit) = " + str(mae(res.resid))
    print "MAPE (fit) = " + str(mape(res.resid, fit_data['prod']))
    print "RMSE (fit)= " + str(rmse(res.resid))
    print "ME (fit)= " + str(np.mean(res.resid))

    plt.savefig('figures/ens_prod_models.pdf', dpi=600)
    plt.figure()
    plt.plot_date(all_ts, ens_std)
    plt.ylabel('Std. of ensemble production models [MW]')
    plt.savefig('figures/std_ens_prod_models.pdf', dpi=600)

    sns.jointplot(x=ens_std, y=np.concatenate([res.resid, vali_resid]))

    return res, all_ens_data, all_ts, fit_data['prod'], vali_data['prod']
Beispiel #16
0
def load_cons_model_ens_dfs(df):
    fit_ts = ens.gen_hourly_timesteps(dt.datetime(2015, 12, 17, 1),
                                      dt.datetime(2016, 1, 15, 0))
    vali_ts = ens.gen_hourly_timesteps(dt.datetime(2016, 1, 20, 1),
                                       dt.datetime(2016, 2, 5, 0))
    test_ts = ens.gen_hourly_timesteps(dt.datetime(2016, 2, 5, 1),
                                       dt.datetime(2016, 3, 1, 0))

    weathervars = ['Tout', 'vWind', 'sunRad', 'hum']

    fit_data = [pd.DataFrame() for i in range(25)]
    vali_data = [pd.DataFrame() for i in range(25)]
    test_data = [pd.DataFrame() for i in range(25)]

    for i in range(25):
        fit_data[i]['cons'] = np.array(df.ix[fit_ts[0]:fit_ts[-1]]['cons'])
        vali_data[i]['cons'] = np.array(
            df.ix[vali_ts[0]:vali_ts[-1]]
            ['cons'])  # the casting is a hack to avoid the index f*****g up
        test_data[i]['cons'] = np.array(df.ix[test_ts[0]:test_ts[-1]]['cons'])

        fit_data[i]['cons24hbefore'] = np.array(
            df.ix[fit_ts[0] + dt.timedelta(days=-1):fit_ts[-1] +
                  dt.timedelta(days=-1)]['cons'])
        vali_data[i]['cons24hbefore'] = np.array(
            df.ix[vali_ts[0] + dt.timedelta(days=-1):vali_ts[-1] +
                  dt.timedelta(days=-1)]
            ['cons'])  # the casting is a hack to avoid the index f*****g up
        test_data[i]['cons24hbefore'] = np.array(
            df.ix[test_ts[0] + dt.timedelta(days=-1):test_ts[-1] +
                  dt.timedelta(days=-1)]['cons'])

    for v in weathervars:
        all_ens_fit = ens.load_ens_timeseries_as_df(\
                                    ts_start=fit_ts[0],\
                                    ts_end=fit_ts[-1], \
                                    weathervars=[v]) \
                                  - ens.load_ens_timeseries_as_df(\
                                    ts_start=fit_ts[0]+dt.timedelta(days=-1),\
                                    ts_end=fit_ts[-1]+dt.timedelta(days=-1), \
                                    weathervars=[v])
        all_ens_vali = ens.load_ens_timeseries_as_df(\
                                    ts_start=vali_ts[0],\
                                    ts_end=vali_ts[-1], \
                                    weathervars=[v]) \
                                  - ens.load_ens_timeseries_as_df(\
                                    ts_start=vali_ts[0]+dt.timedelta(days=-1),\
                                    ts_end=vali_ts[-1]+dt.timedelta(days=-1), \
                                    weathervars=[v])
        all_ens_test = ens.load_ens_timeseries_as_df(\
                                    ts_start=test_ts[0],\
                                    ts_end=test_ts[-1], \
                                    weathervars=[v]) \
                                  - ens.load_ens_timeseries_as_df(\
                                    ts_start=test_ts[0]+dt.timedelta(days=-1),\
                                    ts_end=test_ts[-1]+dt.timedelta(days=-1), \
                                    weathervars=[v])

        for i in range(25):
            fit_data[i]['%s24hdiff' % v] = all_ens_fit[v + str(i)]
            vali_data[i]['%s24hdiff' % v] = all_ens_vali[v + str(i)]
            test_data[i]['%s24hdiff' % v] = all_ens_test[v + str(i)]

    all_data = []
    for i in range(25):
        for d, t in zip([fit_data[i], vali_data[i], test_data[i]],
                        [fit_ts, vali_ts, test_ts]):
            d.set_index(pd.DatetimeIndex(t), inplace=True)
        all_data.append(pd.concat([fit_data[i], vali_data[i], test_data[i]]))

    return all_data
def second_ens_prod_fig():
    """ This plot is based on a production model taking into account:
        the production 24 hours before as well as the change in
        temparature, windspeed and solar radiotion from 24 hours ago to now.
        
        """

    plt.close('all')
    cols = ['prod24h_before', 'Tout24hdiff', 'vWind24hdiff', 'sunRad24hdiff']

    ts1 = ens.gen_hourly_timesteps(dt.datetime(2015, 12, 17, 1),
                                   dt.datetime(2016, 1, 15, 0))
    ts2 = ens.gen_hourly_timesteps(dt.datetime(2016, 1, 20, 1),
                                   dt.datetime(2016, 2, 5, 0))

    #load the data
    fit_data = ens.repack_ens_mean_as_df()
    fit_data['prod24h_before'] = sq.fetch_production(
        ts1[0] + dt.timedelta(days=-1), ts1[-1] + dt.timedelta(days=-1))

    fit_data['Tout24hdiff'] = fit_data['Tout'] \
                                - ens.load_ens_timeseries_as_df(\
                                    ts_start=ts1[0]+dt.timedelta(days=-1),\
                                    ts_end=ts1[-1]+dt.timedelta(days=-1), \
                                    weathervars=['Tout']).mean(axis=1)
    fit_data['vWind24hdiff'] = fit_data['vWind'] \
                                - ens.load_ens_timeseries_as_df(\
                                    ts_start=ts1[0]+dt.timedelta(days=-1),\
                                    ts_end=ts1[-1]+dt.timedelta(days=-1), \
                                    weathervars=['vWind']).mean(axis=1)
    fit_data['sunRad24hdiff'] = fit_data['sunRad'] \
                                - ens.load_ens_timeseries_as_df(\
                                    ts_start=ts1[0]+dt.timedelta(days=-1),\
                                    ts_end=ts1[-1]+dt.timedelta(days=-1), \
                                    weathervars=['sunRad']).mean(axis=1)

    vali_data = ens.repack_ens_mean_as_df(ts2[0], ts2[-1])
    vali_data['prod24h_before'] = sq.fetch_production(
        ts2[0] + dt.timedelta(days=-1), ts2[-1] + dt.timedelta(days=-1))
    vali_data['Tout24hdiff'] = vali_data['Tout'] \
                                - ens.load_ens_timeseries_as_df(\
                                    ts_start=ts2[0]+dt.timedelta(days=-1),\
                                    ts_end=ts2[-1]+dt.timedelta(days=-1), \
                                    weathervars=['Tout']).mean(axis=1)
    vali_data['vWind24hdiff'] = vali_data['vWind'] \
                                - ens.load_ens_timeseries_as_df(\
                                    ts_start=ts2[0]+dt.timedelta(days=-1),\
                                    ts_end=ts2[-1]+dt.timedelta(days=-1), \
                                    weathervars=['vWind']).mean(axis=1)
    vali_data['sunRad24hdiff'] = vali_data['sunRad'] \
                                - ens.load_ens_timeseries_as_df(\
                                    ts_start=ts2[0]+dt.timedelta(days=-1),\
                                    ts_end=ts2[-1]+dt.timedelta(days=-1), \
                                    weathervars=['sunRad']).mean(axis=1)

    # correct error in production:
    new_val = (vali_data['prod'][116] + vali_data['prod'][116]) / 2
    vali_data['prod'][116] = new_val
    vali_data['prod'][117] = new_val
    vali_data['prod24h_before'][116 + 24] = new_val
    vali_data['prod24h_before'][117 + 24] = new_val

    # do the fit
    X = fit_data[cols]
    y = fit_data['prod']
    res = mlin_regression(y, X, add_const=False)

    fig, [ax1, ax2] = plt.subplots(2, 1, figsize=(40, 20))

    # load ensemble data
    ens_data1 = ens.load_ens_timeseries_as_df(ts_start=ts1[0], ts_end=ts1[-1],\
                                             weathervars=['Tout', 'vWind', 'sunRad'])
    ens_data1['prod24h_before'] = fit_data['prod24h_before']
    ens_data1_24h_before =  ens.load_ens_timeseries_as_df(\
                                    ts_start=ts1[0]+dt.timedelta(days=-1),\
                                    ts_end=ts1[-1]+dt.timedelta(days=-1), \
                                    weathervars=['Tout', 'vWind', 'sunRad'])
    ens_data2 = ens.load_ens_timeseries_as_df(ts_start=ts2[0], ts_end=ts2[-1],\
                                             weathervars=['Tout', 'vWind', 'sunRad'])
    ens_data2['prod24h_before'] = vali_data['prod24h_before']
    ens_data2_24h_before = ens.load_ens_timeseries_as_df(\
                                    ts_start=ts2[0]+dt.timedelta(days=-1),\
                                    ts_end=ts2[-1]+dt.timedelta(days=-1), \
                                    weathervars=['Tout', 'vWind', 'sunRad'])
    for i in range(25):
        for v in ['Tout', 'vWind', 'sunRad']:
            key_raw = v + str(i)
            key_diff = v + '24hdiff' + str(i)
            ens_data1[
                key_diff] = ens_data1[key_raw] - ens_data1_24h_before[key_raw]
            ens_data2[
                key_diff] = ens_data2[key_raw] - ens_data2_24h_before[key_raw]

    all_ens_data = pd.concat([ens_data1, ens_data2])
    all_ts = ts1 + ts2
    #
    #
    # calculate production for each ensemble member
    ens_prods = np.zeros((len(all_ts), 25))
    for i in range(25):
        ens_cols = ['Tout24hdiff' + str(i), 'vWind24hdiff' + str(i),\
                    'sunRad24hdiff' + str(i), 'prod24h_before']
        ens_params = pd.Series({
            'Tout24hdiff' + str(i):
            res.params['Tout24hdiff'],
            'vWind24hdiff' + str(i):
            res.params['vWind24hdiff'],
            'sunRad24hdiff' + str(i):
            res.params['sunRad24hdiff'],
            'prod24h_before':
            res.params['prod24h_before']
        })
        ens_prods[:, i] = linear_map(all_ens_data, ens_params, ens_cols)

    # calculate combined confint
    ens_std = ens_prods.std(axis=1)
    vali_resid = linear_map(vali_data, res.params, cols) - vali_data['prod']
    vali_resid_corrig = vali_resid - np.sign(
        vali_resid) * 1.9599 * ens_std[len(ts1):]
    mean_conf_int_spread = (vali_resid_corrig.quantile(0.95) -
                            vali_resid_corrig.quantile(0.05)) / 2

    combined_conf_int = mean_conf_int_spread + 1.9599 * ens_std
    all_prod_model = np.concatenate(
        [res.fittedvalues,
         linear_map(vali_data, res.params, cols)])
    combined_ub95 = all_prod_model + combined_conf_int
    combined_lb95 = all_prod_model - combined_conf_int

    # plot confint
    ax1.fill_between(all_ts,
                     combined_lb95,
                     combined_ub95,
                     label='Combined 95% conf. int.')
    ax1.fill_between(all_ts,
                     all_prod_model - 1.9599 * ens_std,
                     all_prod_model + 1.9599 * ens_std,
                     facecolor='grey',
                     label='Ensemble 95% conf. int.')

    # plot ensempble models
    ax1.plot_date(all_ts, ens_prods, '-', lw=0.5)

    ax1.plot_date(ts1, y, 'k-', lw=2, label='Actual production')
    ax1.plot_date(ts1,
                  res.fittedvalues,
                  'r-',
                  lw=2,
                  label='Model on ensemble mean')

    ax1.plot_date(ts2, vali_data['prod'], 'k-', lw=2, label='')
    ax1.plot_date(ts2, linear_map(vali_data, res.params, cols), 'r-', lw=2)
    ax1.set_ylabel('[MW]')
    ax1.legend(loc=2)
    ax1.set_ylim([0, 1100])

    ax2.plot_date(ts1, res.resid, '-', label='Residual, fitted data')
    ax2.plot_date(ts2, vali_resid, '-', label='Residual, validation data')
    ax2.set_ylabel('[MW]')
    ax2.legend(loc=2)
    ax2.set_ylim([-550, 550])
    print "MAE = " + str(mae(vali_resid))
    print "MAPE = " + str(mape(vali_resid, vali_data['prod']))
    print "RMSE = " + str(rmse(vali_resid))
    print "ME = " + str(np.mean(vali_resid))

    print "MAE (fit) = " + str(mae(res.resid))
    print "MAPE (fit) = " + str(mape(res.resid, fit_data['prod']))
    print "RMSE (fit)= " + str(rmse(res.resid))
    print "ME (fit)= " + str(np.mean(res.resid))

    plt.savefig('figures/ens_prod_models_v2.pdf', dpi=600)
    plt.figure()
    plt.plot_date(all_ts, ens_std)
    plt.ylabel('Std. of ensemble production models [MW]')
    plt.savefig('figures/std_ens_prod_models.pdf', dpi=600)
    #

    vali_ens_std = ens_std[len(ts1):]
    sns.jointplot(x=pd.Series(vali_ens_std), y=np.abs(vali_resid))
    sns.jointplot(x=vali_data['prod'],
                  y=pd.Series(linear_map(vali_data, res.params, cols)))

    EO3_fc1 = sq.fetch_EO3_midnight_forecast(ts1[0], ts1[-1])
    EO3_fc2 = sq.fetch_EO3_midnight_forecast(ts2[0], ts2[-1])
    plt.figure()
    plt.plot_date(ts1, fit_data['prod'], 'k-', label='Actual production')
    plt.plot_date(ts2, vali_data['prod'], 'k-')
    plt.plot_date(ts1, EO3_fc1, 'r-', label='EO3 forecast')
    plt.plot_date(ts2, EO3_fc2, 'r-')
    EO3_err = EO3_fc2 - vali_data['prod']
    EO3_err_fit = EO3_fc1 - fit_data['prod']
    print "MAE (EO3) = " + str(mae(EO3_err))
    print "MAPE (EO3) = " + str(mape(EO3_err, vali_data['prod']))
    print "RMSE (EO3)= " + str(rmse(EO3_err))
    print "ME (EO3)= " + str(np.mean(EO3_err))

    print "MAE (EO3_fit) = " + str(mae(EO3_err_fit))
    print "MAPE (EO3_fit) = " + str(mape(EO3_err_fit, fit_data['prod']))
    print "RMSE (EO3_fit)= " + str(rmse(EO3_err_fit))
    print "ME (EO3_fit)= " + str(np.mean(EO3_err_fit))

    sns.jointplot(x=pd.Series(vali_ens_std), y=np.abs(EO3_err))

    plt.figure(figsize=(20, 10))
    plt.subplot(2, 1, 1)
    plt.plot_date(all_ts, combined_conf_int / combined_conf_int.max(), '-')
    plt.ylabel('Model + ensemble uncertainty \n [normalized]')
    plt.ylim(0, 1)
    plt.subplot(2, 1, 2)
    plt.plot_date(all_ts,
                  (1 - 0.2 * combined_conf_int / combined_conf_int.max()),
                  '-',
                  label='Dynamic setpoint')
    plt.plot_date(all_ts,
                  0.8 * np.ones(len(all_ts)),
                  '--',
                  label='Static setpoint')
    plt.ylabel(
        'Setpoint for pump massflow \n temperature [fraction of max pump cap]')
    plt.legend()
    plt.ylim(.7, 1)
    plt.savefig('figures/setpoint.pdf')

    return vali_data, fit_data, res, ens_std, vali_resid
weathervars=['Tout', 'vWind', 'sunRad', 'hum']

fit_data = pd.DataFrame()
vali_data = pd.DataFrame()            
test_data = pd.DataFrame()
                
fit_data['prod24h_before'] = sq.fetch_production(fit_ts[0]+dt.timedelta(days=-1), fit_ts[-1]+dt.timedelta(days=-1))
vali_data['prod24h_before'] = sq.fetch_production(vali_ts[0]+dt.timedelta(days=-1), vali_ts[-1]+dt.timedelta(days=-1))
test_data['prod24h_before'] = sq.fetch_production(test_ts[0]+dt.timedelta(days=-1), test_ts[-1]+dt.timedelta(days=-1))

fit_data['prod'] = sq.fetch_production(fit_ts[0], fit_ts[-1])
vali_data['prod'] = sq.fetch_production(vali_ts[0], vali_ts[-1])
test_data['prod'] = sq.fetch_production(test_ts[0], test_ts[-1])
for v in weathervars:
    fit_data['%s24hdiff'%v] = ens.load_ens_timeseries_as_df(\
                                ts_start=fit_ts[0],\
                                ts_end=fit_ts[-1], \
                                weathervars=[v]).mean(axis=1) \
                              - ens.load_ens_timeseries_as_df(\
                                ts_start=fit_ts[0]+dt.timedelta(days=-1),\
                                ts_end=fit_ts[-1]+dt.timedelta(days=-1), \
                                weathervars=[v]).mean(axis=1)
    vali_data['%s24hdiff'%v] = ens.load_ens_timeseries_as_df(\
                                ts_start=vali_ts[0],\
                                ts_end=vali_ts[-1], \
                                weathervars=[v]).mean(axis=1) \
                              - ens.load_ens_timeseries_as_df(\
                                ts_start=vali_ts[0]+dt.timedelta(days=-1),\
                                ts_end=vali_ts[-1]+dt.timedelta(days=-1), \
                                weathervars=[v]).mean(axis=1)
    test_data['%s24hdiff'%v] = ens.load_ens_timeseries_as_df(\
                                ts_start=test_ts[0],\
def production_model():  # figure 3

    plt.close('all')
    cols = ['prod24h_before', 'Tout24hdiff', 'vWind24hdiff', 'sunRad24hdiff']

    ts1 = ens.gen_hourly_timesteps(dt.datetime(2015, 12, 17, 1),
                                   dt.datetime(2016, 1, 15, 0))
    ts2 = ens.gen_hourly_timesteps(dt.datetime(2016, 1, 20, 1),
                                   dt.datetime(2016, 2, 5, 0))

    #load the data
    fit_data = ens.repack_ens_mean_as_df()
    fit_data['prod24h_before'] = sq.fetch_production(
        ts1[0] + dt.timedelta(days=-1), ts1[-1] + dt.timedelta(days=-1))

    fit_data['Tout24hdiff'] = fit_data['Tout'] \
                                - ens.load_ens_timeseries_as_df(\
                                    ts_start=ts1[0]+dt.timedelta(days=-1),\
                                    ts_end=ts1[-1]+dt.timedelta(days=-1), \
                                    weathervars=['Tout']).mean(axis=1)
    fit_data['vWind24hdiff'] = fit_data['vWind'] \
                                - ens.load_ens_timeseries_as_df(\
                                    ts_start=ts1[0]+dt.timedelta(days=-1),\
                                    ts_end=ts1[-1]+dt.timedelta(days=-1), \
                                    weathervars=['vWind']).mean(axis=1)
    fit_data['sunRad24hdiff'] = fit_data['sunRad'] \
                                - ens.load_ens_timeseries_as_df(\
                                    ts_start=ts1[0]+dt.timedelta(days=-1),\
                                    ts_end=ts1[-1]+dt.timedelta(days=-1), \
                                    weathervars=['sunRad']).mean(axis=1)

    vali_data = ens.repack_ens_mean_as_df(ts2[0], ts2[-1])
    vali_data['prod24h_before'] = sq.fetch_production(
        ts2[0] + dt.timedelta(days=-1), ts2[-1] + dt.timedelta(days=-1))
    vali_data['Tout24hdiff'] = vali_data['Tout'] \
                                - ens.load_ens_timeseries_as_df(\
                                    ts_start=ts2[0]+dt.timedelta(days=-1),\
                                    ts_end=ts2[-1]+dt.timedelta(days=-1), \
                                    weathervars=['Tout']).mean(axis=1)
    vali_data['vWind24hdiff'] = vali_data['vWind'] \
                                - ens.load_ens_timeseries_as_df(\
                                    ts_start=ts2[0]+dt.timedelta(days=-1),\
                                    ts_end=ts2[-1]+dt.timedelta(days=-1), \
                                    weathervars=['vWind']).mean(axis=1)
    vali_data['sunRad24hdiff'] = vali_data['sunRad'] \
                                - ens.load_ens_timeseries_as_df(\
                                    ts_start=ts2[0]+dt.timedelta(days=-1),\
                                    ts_end=ts2[-1]+dt.timedelta(days=-1), \
                                    weathervars=['sunRad']).mean(axis=1)

    # correct error in production:
    new_val = (vali_data['prod'][116] + vali_data['prod'][116]) / 2
    vali_data['prod'][116] = new_val
    vali_data['prod'][117] = new_val
    vali_data['prod24h_before'][116 + 24] = new_val
    vali_data['prod24h_before'][117 + 24] = new_val

    # do the fit
    X = fit_data[cols]
    y = fit_data['prod']
    res = mlin_regression(y, X, add_const=False)

    fig, [ax1, ax2] = plt.subplots(2,
                                   1,
                                   sharex=True,
                                   figsize=(dcolwidth, 0.57 * dcolwidth),
                                   gridspec_kw={'height_ratios': [4, 1]})

    # load ensemble data
    ens_data1 = ens.load_ens_timeseries_as_df(ts_start=ts1[0], ts_end=ts1[-1],\
                                             weathervars=['Tout', 'vWind', 'sunRad'])
    ens_data1['prod24h_before'] = fit_data['prod24h_before']
    ens_data1_24h_before =  ens.load_ens_timeseries_as_df(\
                                    ts_start=ts1[0]+dt.timedelta(days=-1),\
                                    ts_end=ts1[-1]+dt.timedelta(days=-1), \
                                    weathervars=['Tout', 'vWind', 'sunRad'])
    ens_data2 = ens.load_ens_timeseries_as_df(ts_start=ts2[0], ts_end=ts2[-1],\
                                             weathervars=['Tout', 'vWind', 'sunRad'])
    ens_data2['prod24h_before'] = vali_data['prod24h_before']
    ens_data2_24h_before = ens.load_ens_timeseries_as_df(\
                                    ts_start=ts2[0]+dt.timedelta(days=-1),\
                                    ts_end=ts2[-1]+dt.timedelta(days=-1), \
                                    weathervars=['Tout', 'vWind', 'sunRad'])
    for i in range(25):
        for v in ['Tout', 'vWind', 'sunRad']:
            key_raw = v + str(i)
            key_diff = v + '24hdiff' + str(i)
            ens_data1[
                key_diff] = ens_data1[key_raw] - ens_data1_24h_before[key_raw]
            ens_data2[
                key_diff] = ens_data2[key_raw] - ens_data2_24h_before[key_raw]

    all_ens_data = pd.concat([ens_data1, ens_data2])
    all_ts = ts1 + ts2
    #
    #
    # calculate production for each ensemble member
    ens_prods = np.zeros((len(all_ts), 25))
    for i in range(25):
        ens_cols = ['Tout24hdiff' + str(i), 'vWind24hdiff' + str(i),\
                    'sunRad24hdiff' + str(i), 'prod24h_before']
        ens_params = pd.Series({
            'Tout24hdiff' + str(i):
            res.params['Tout24hdiff'],
            'vWind24hdiff' + str(i):
            res.params['vWind24hdiff'],
            'sunRad24hdiff' + str(i):
            res.params['sunRad24hdiff'],
            'prod24h_before':
            res.params['prod24h_before']
        })
        ens_prods[:, i] = linear_map(all_ens_data, ens_params, ens_cols)

    # calculate combined confint
    ens_std = ens_prods.std(axis=1)
    vali_resid = linear_map(vali_data, res.params, cols) - vali_data['prod']
    vali_resid_corrig = vali_resid - np.sign(
        vali_resid) * 1.9599 * ens_std[len(ts1):]
    #mean_conf_int_spread = (vali_resid_corrig.quantile(0.95) - vali_resid_corrig.quantile(0.05))/2 # this conf_int is not used anymore

    fit_resid = res.resid
    fit_resid_corrig = fit_resid - np.sign(
        fit_resid) * 1.9599 * ens_std[0:len(ts1)]
    conf_int_spread_lower = -fit_resid_corrig.quantile(0.025)
    conf_int_spread_higher = fit_resid_corrig.quantile(0.975)

    combined_conf_ints = conf_int_spread_lower + conf_int_spread_higher + 2 * 1.9599 * ens_std
    all_prod_model = np.concatenate(
        [res.fittedvalues,
         linear_map(vali_data, res.params, cols)])
    combined_ub95 = all_prod_model + conf_int_spread_higher + 1.9599 * ens_std
    combined_lb95 = all_prod_model - (conf_int_spread_lower + 1.9599 * ens_std)

    # plot confint
    ax1.fill_between(all_ts[len(ts1):],
                     combined_lb95[len(ts1):],
                     combined_ub95[len(ts1):],
                     label='95% prediction intervals')
    ax1.fill_between(all_ts[len(ts1):],
                     all_prod_model[len(ts1):] - 1.9599 * ens_std[len(ts1):],
                     all_prod_model[len(ts1):] + 1.9599 * ens_std[len(ts1):],
                     facecolor='grey',
                     label='Weather ensemble 95% conf. int.')

    # plot ensempble models
    ax1.plot_date(all_ts[len(ts1):], ens_prods[len(ts1):], '-', lw=0.5)

    ax1.plot_date(ts2,
                  vali_data['prod'],
                  'k-',
                  lw=2,
                  label='Historical production')
    ax1.plot_date(ts2,
                  linear_map(vali_data, res.params, cols),
                  '-',
                  c=red,
                  lw=2,
                  label='Production model')
    ax1.set_ylabel('Production [MW]', size=8)
    ax1.tick_params(axis='both', which='major', labelsize=8)
    ax1.xaxis.set_major_formatter(DateFormatter('%b %d'))
    ax1.legend(loc=1, prop={'size': 8})
    ax1.set_ylim([300, 1100])

    N = conf_int_spread_higher + 1.9599 * ens_std[len(ts1):].max()
    ax2.fill_between(ts2,
                     -(1.9599 * ens_std[len(ts1):] + conf_int_spread_lower) /
                     N,
                     -1.9599 * ens_std[len(ts1):] / N,
                     alpha=0.5)
    ax2.fill_between(ts2,
                     -1.9599 * ens_std[len(ts1):] / N,
                     np.zeros(len(ts2)),
                     facecolor='grey',
                     alpha=0.5)
    ax2.fill_between(ts2, 1.9599 * ens_std[len(ts1):] / N, facecolor='grey')
    ax2.fill_between(ts2, 1.9599 * ens_std[len(ts1):] / N,
                     (conf_int_spread_higher + 1.9599 * ens_std[len(ts1):]) /
                     N)
    ax2.set_ylabel('Prediction intervals \n[normalized]', size=8)
    ax2.tick_params(axis='y', which='major', labelsize=8)
    ax2.set_xlim(dt.datetime(2016, 1, 20, 0), dt.datetime(2016, 2, 5, 0))
    fig.tight_layout()
    print "Min_normalized pos conf bound. ", np.min(1.9599 *
                                                    ens_std[len(ts1):] / N +
                                                    conf_int_spread_higher / N)

    print "MAE = " + str(mae(vali_resid))
    print "MAPE = " + str(mape(vali_resid, vali_data['prod']))
    print "RMSE = " + str(rmse(vali_resid))
    print "ME = " + str(np.mean(vali_resid))

    print "MAE (fit) = " + str(mae(res.resid))
    print "MAPE (fit) = " + str(mape(res.resid, fit_data['prod']))
    print "RMSE (fit)= " + str(rmse(res.resid))
    print "ME (fit)= " + str(np.mean(res.resid))

    print "Width of const blue bands (MW)", conf_int_spread_lower, conf_int_spread_higher

    plt.savefig('Q:/Projekter/Ens Article 1/figures/production_model.pdf',
                dpi=400)

    EO3_fc1 = sq.fetch_EO3_midnight_forecast(ts1[0], ts1[-1])
    EO3_fc2 = sq.fetch_EO3_midnight_forecast(ts2[0], ts2[-1])
    EO3_err = EO3_fc2 - vali_data['prod']
    EO3_err_fit = EO3_fc1 - fit_data['prod']
    print "MAE (EO3) = " + str(mae(EO3_err))
    print "MAPE (EO3) = " + str(mape(EO3_err, vali_data['prod']))
    print "RMSE (EO3)= " + str(rmse(EO3_err))
    print "ME (EO3)= " + str(np.mean(EO3_err))

    print "MAE (EO3_fit) = " + str(mae(EO3_err_fit))
    print "MAPE (EO3_fit) = " + str(mape(EO3_err_fit, fit_data['prod']))
    print "RMSE (EO3_fit)= " + str(rmse(EO3_err_fit))
    print "ME (EO3_fit)= " + str(np.mean(EO3_err_fit))

    print np.min(combined_conf_ints[len(ts1):] / combined_conf_ints.max())
    np.savez('combined_conf_int',
             combined_conf_int=(conf_int_spread_higher + 1.9599 * ens_std),
             timesteps=all_ts)

    print "Corr coeff: vali ", np.corrcoef(
        vali_data['prod'], linear_map(vali_data, res.params, cols))[0, 1]
    print "Corr coeff: vali EO3 ", np.corrcoef(vali_data['prod'], EO3_fc2)[0,
                                                                           1]
    print "Corr coeff: fit ", np.corrcoef(fit_data['prod'],
                                          res.fittedvalues)[0, 1]
    print "Corr coeff: fit EO3 ", np.corrcoef(fit_data['prod'], EO3_fc1)[0, 1]

    print "% of actual production in vali period above upper", float(
        len(
            np.where(vali_data['prod'] >
                     (conf_int_spread_higher + 1.9599 * ens_std[len(ts1):] +
                      linear_map(vali_data, res.params, cols)))[0])) / len(ts2)
    print "plus minus: ", 0.5 / len(ts2)

    print "% of actual production in vali period below lower", float(
        len(
            np.where(vali_data['prod'] <
                     (linear_map(vali_data, res.params, cols) -
                      (conf_int_spread_lower + 1.9599 * ens_std[len(ts1):])))
            [0])) / len(ts2)
    print "plus minus: ", 0.5 / len(ts2)

    return res, fit_data
import numpy as np
import matplotlib.pyplot as plt

import pandas as pd

all_data = ens.repack_ens_mean_as_df()

hours = [np.mod(h, 24) for h in range(1, 697)]

all_data['prod24h_before'] = sq.fetch_production(dt.datetime(2015, 12, 16, 1),
                                                 dt.datetime(2016, 1, 14, 0))
all_data['(Tout-17)*vWind*hum'] = all_data['(Tout-17)*vWind'] * all_data['hum']
all_data['(Toutavg24-17)*vWindavg24*humavg24'] = all_data[
    '(Toutavg-17)*vWindavg24'] * all_data['humavg24']
all_data['Tout24hdiff'] = all_data['Tout'] - np.roll(all_data['Tout'], 24)
Tout24h_before = ens.load_ens_timeseries_as_df(ts_start=dt.datetime(2015,12,16,1),\
                         ts_end=dt.datetime(2016,1,14,0), weathervars=['Tout']).mean(axis=1)
vWind24h_before = ens.load_ens_timeseries_as_df(ts_start=dt.datetime(2015,12,16,1),\
                         ts_end=dt.datetime(2016,1,14,0), weathervars=['vWind']).mean(axis=1)
sunRad24h_before = ens.load_ens_timeseries_as_df(ts_start=dt.datetime(2015,12,16,1),\
                         ts_end=dt.datetime(2016,1,14,0), weathervars=['sunRad']).mean(axis=1)
hum24h_before = ens.load_ens_timeseries_as_df(ts_start=dt.datetime(2015,12,16,1),\
                         ts_end=dt.datetime(2016,1,14,0), weathervars=['hum']).mean(axis=1)

all_data['Tout24hdiff'] = all_data['Tout'] - Tout24h_before
all_data['vWind24hdiff'] = all_data['vWind'] - vWind24h_before
all_data['sunRad24hdiff'] = all_data['sunRad'] - sunRad24h_before
all_data['sunRadavg2424hdiff'] = all_data['sunRadavg24'] - np.roll(
    all_data['sunRadavg24'], 24)
all_data['hum24hdiff'] = all_data['hum'] - hum24h_before

for c in all_data.columns: