def load_cons_model_ens_dfs(df):
    fit_ts = ens.gen_hourly_timesteps(dt.datetime(2015,12,17,1), dt.datetime(2016,1,15,0))
    vali_ts = ens.gen_hourly_timesteps(dt.datetime(2016,1,20,1), dt.datetime(2016,2,5,0))
    test_ts = ens.gen_hourly_timesteps(dt.datetime(2016,2,5,1), dt.datetime(2016,3,1,0))

    
    weathervars=['Tout', 'vWind', 'sunRad', 'hum']
    
    fit_data = [pd.DataFrame() for i in range(25)]
    vali_data = [pd.DataFrame() for i in range(25)]          
    test_data = [pd.DataFrame() for i in range(25)]
    
    for i in range(25):
        fit_data[i]['cons'] = np.array(df.ix[fit_ts[0]:fit_ts[-1]]['cons'])
        vali_data[i]['cons'] = np.array(df.ix[vali_ts[0]:vali_ts[-1]]['cons']) # the casting is a hack to avoid the index f*****g up
        test_data[i]['cons'] = np.array(df.ix[test_ts[0]:test_ts[-1]]['cons'])

        fit_data[i]['cons24hbefore'] = np.array(df.ix[fit_ts[0]+dt.timedelta(days=-1):fit_ts[-1]+dt.timedelta(days=-1)]['cons']) 
        vali_data[i]['cons24hbefore'] = np.array(df.ix[vali_ts[0]+dt.timedelta(days=-1):vali_ts[-1]+dt.timedelta(days=-1)]['cons']) # the casting is a hack to avoid the index f*****g up
        test_data[i]['cons24hbefore'] = np.array(df.ix[test_ts[0]+dt.timedelta(days=-1):test_ts[-1]+dt.timedelta(days=-1)]['cons'])

    for v in weathervars:
        all_ens_fit = ens.load_ens_timeseries_as_df(\
                                    ts_start=fit_ts[0],\
                                    ts_end=fit_ts[-1], \
                                    weathervars=[v]) \
                                  - ens.load_ens_timeseries_as_df(\
                                    ts_start=fit_ts[0]+dt.timedelta(days=-1),\
                                    ts_end=fit_ts[-1]+dt.timedelta(days=-1), \
                                    weathervars=[v])
        all_ens_vali = ens.load_ens_timeseries_as_df(\
                                    ts_start=vali_ts[0],\
                                    ts_end=vali_ts[-1], \
                                    weathervars=[v]) \
                                  - ens.load_ens_timeseries_as_df(\
                                    ts_start=vali_ts[0]+dt.timedelta(days=-1),\
                                    ts_end=vali_ts[-1]+dt.timedelta(days=-1), \
                                    weathervars=[v])
        all_ens_test = ens.load_ens_timeseries_as_df(\
                                    ts_start=test_ts[0],\
                                    ts_end=test_ts[-1], \
                                    weathervars=[v]) \
                                  - ens.load_ens_timeseries_as_df(\
                                    ts_start=test_ts[0]+dt.timedelta(days=-1),\
                                    ts_end=test_ts[-1]+dt.timedelta(days=-1), \
                                    weathervars=[v])
                                    
        for i in range(25):
            fit_data[i]['%s24hdiff'%v] = all_ens_fit[v + str(i)]
            vali_data[i]['%s24hdiff'%v] = all_ens_vali[v + str(i)]
            test_data[i]['%s24hdiff'%v] = all_ens_test[v + str(i)]
    
    all_data = []
    for i in range(25):
        for d, t in zip([fit_data[i], vali_data[i], test_data[i]], [fit_ts, vali_ts, test_ts]):
            d.set_index(pd.DatetimeIndex(t), inplace=True)
        all_data.append(pd.concat([fit_data[i], vali_data[i], test_data[i]]))
        
    return all_data
def load_cons_model_dfs(df):
    # Takes the data frame with the already calculated consumptions
        #%%
    fit_ts = ens.gen_hourly_timesteps(dt.datetime(2015,12,17,1), dt.datetime(2016,1,15,0))
    vali_ts = ens.gen_hourly_timesteps(dt.datetime(2016,1,20,1), dt.datetime(2016,2,5,0))
    test_ts = ens.gen_hourly_timesteps(dt.datetime(2016,2,5,1), dt.datetime(2016,3,1,0))
    
    weathervars=['Tout', 'vWind', 'sunRad', 'hum']
    
    fit_data = pd.DataFrame()
    vali_data = pd.DataFrame()            
    test_data = pd.DataFrame()
    
    fit_data['cons'] = np.array(df.ix[fit_ts[0]:fit_ts[-1]]['cons'])
    vali_data['cons'] = np.array(df.ix[vali_ts[0]:vali_ts[-1]]['cons']) # the casting is a hack to avoid the index f*****g up
    test_data['cons'] = np.array(df.ix[test_ts[0]:test_ts[-1]]['cons'])

    fit_data['cons24hbefore'] = np.array(df.ix[fit_ts[0]+dt.timedelta(days=-1):fit_ts[-1]+dt.timedelta(days=-1)]['cons']) 
    vali_data['cons24hbefore'] = np.array(df.ix[vali_ts[0]+dt.timedelta(days=-1):vali_ts[-1]+dt.timedelta(days=-1)]['cons']) # the casting is a hack to avoid the index f*****g up
    test_data['cons24hbefore'] = np.array(df.ix[test_ts[0]+dt.timedelta(days=-1):test_ts[-1]+dt.timedelta(days=-1)]['cons'])

    for v in weathervars:
        fit_data['%s24hdiff'%v] = ens.load_ens_timeseries_as_df(\
                                    ts_start=fit_ts[0],\
                                    ts_end=fit_ts[-1], \
                                    weathervars=[v]).mean(axis=1) \
                                  - ens.load_ens_timeseries_as_df(\
                                    ts_start=fit_ts[0]+dt.timedelta(days=-1),\
                                    ts_end=fit_ts[-1]+dt.timedelta(days=-1), \
                                    weathervars=[v]).mean(axis=1)
        vali_data['%s24hdiff'%v] = ens.load_ens_timeseries_as_df(\
                                    ts_start=vali_ts[0],\
                                    ts_end=vali_ts[-1], \
                                    weathervars=[v]).mean(axis=1) \
                                  - ens.load_ens_timeseries_as_df(\
                                    ts_start=vali_ts[0]+dt.timedelta(days=-1),\
                                    ts_end=vali_ts[-1]+dt.timedelta(days=-1), \
                                    weathervars=[v]).mean(axis=1)
        test_data['%s24hdiff'%v] = ens.load_ens_timeseries_as_df(\
                                    ts_start=test_ts[0],\
                                    ts_end=test_ts[-1], \
                                    weathervars=[v]).mean(axis=1) \
                                  - ens.load_ens_timeseries_as_df(\
                                    ts_start=test_ts[0]+dt.timedelta(days=-1),\
                                    ts_end=test_ts[-1]+dt.timedelta(days=-1), \
                                    weathervars=[v]).mean(axis=1)
    
    for d, t in zip([fit_data, vali_data, test_data], [fit_ts, vali_ts, test_ts]):
        d.set_index(pd.DatetimeIndex(t), inplace=True)
                                                                   
    all_data = pd.concat([fit_data, vali_data, test_data])

    return fit_data, vali_data, test_data, all_data
def plot_best_model():
    plt.close('all')
    columns = ['Tout', 'Toutavg24', 'vWind', 'vWindavg24']#, 'hours', 'hours2','hours3', 'hours4','hours5', 'hours6']#, 'hours7', 'hours8']#,'hours5', 'hours6']
    X = all_data[columns]
    res = mlin_regression(y, X)
    timesteps = ens.gen_hourly_timesteps(dt.datetime(2015,12,17,1), dt.datetime(2016,1,15,0))
    
    plt.subplot(2,1,1)
    plt.plot_date(timesteps, y, 'b', label='Actual prodution')
    plt.plot_date(timesteps, res.fittedvalues, 'r', label='Weather model')
    prstd, iv_l, iv_u = wls_prediction_std(res)    
    plt.plot_date(timesteps, iv_u, 'r--', label='95% conf. int.')
    plt.plot_date(timesteps, iv_l, 'r--')
    mean_day_resid = [res.resid[i::24].mean() for i in range(24)]
    mean_resid_series = np.tile(mean_day_resid, 29)
    plt.plot_date(timesteps, res.fittedvalues + mean_resid_series, 'g', label='Weather model + avg daily profile')
    plt.ylabel('MW')
    plt.legend(loc=2)
    plt.subplot(2,1,2)
    plt.plot_date(timesteps, res.resid, '-', label='Residual')
    
    plt.plot_date(timesteps, mean_resid_series)
    plt.ylabel('MW')
    plt.legend()
    
    mape = np.mean(np.abs((res.fittedvalues + mean_resid_series-y)/y))
    mape2 = np.mean(np.abs((res.resid)/y))
    mae = np.mean(np.abs((res.fittedvalues + mean_resid_series-y)))
    
    print mape, mape2, mae
    
    
    res.summary()
    return res
def fetch_BrabrandSydWeather(weathervar, from_time, to_time):
    """ This function takes a weather variable as a string (from BBSyd_pi_dict)
        as well as first and last step timestep (as datetime objects).
        It returns the hourly time series from the Brabrand Syd Weather station.
        Note that this data has not been validated!
        
        """
        
    conn = connect()
    PInr = BBSyd_pi_dict[weathervar]
    sql_query = """USE [DM_VLP]
                    SELECT 
                       [TimeStamp],
                       [Value],
                       [Beskrivelse]
                    FROM [dbo].[Meteorologi]
                        WHERE PInr=%s 
                            AND TimeStamp BETWEEN '%s' AND  '%s'
                        ORDER BY TimeStamp"""% (PInr, str(from_time), str(to_time))

       
    data = extractdata(conn, sql_query)
    timestamps, values, description = zip(*data)
    assert(list(timestamps)==ens.gen_hourly_timesteps(from_time, to_time)), "Timesteps are not hour by hour"
    
    return np.array(values, dtype=float)
def weather_forecast_ensemble():  # figure 2
    plt.close('all')
    ts = ens.gen_hourly_timesteps(dt.datetime(2016, 1, 20, 1),
                                  dt.datetime(2016, 2, 5, 0))
    ens_data = ens.load_ens_timeseries_as_df(ts_start=ts[0], ts_end=ts[-1],\
                                             weathervars=['Tout', 'vWind', 'sunRad'])
    fig, axes = plt.subplots(3,
                             1,
                             sharex=True,
                             figsize=(colwidth, 1.65 * colwidth))
    plt.xticks(size=5)

    ylabels = [
        u'Outside temperature [%sC]' % uni_degree, 'Wind speed [m/s]',
        u'Solar irradiance [W/m%s]' % uni_squared
    ]

    for ax, v, cshift, ylab in zip(axes, ['Tout', 'vWind', 'sunRad'],
                                   (15, 23, 6), ylabels):
        color_list = plt.cm.Dark2(np.roll(np.linspace(0, 1, 25), cshift))
        ax.set_prop_cycle(cycler('color', color_list))
        v_ens_data = ens_data[[v + str(i) for i in range(25)]]
        ax.plot_date(ts, v_ens_data, '-', lw=0.5)
        ax.set_ylabel(ylab, size=8)
        ax.tick_params(axis='y', which='major', labelsize=8)
        plt.box(True)
    plt.tight_layout()
    axes[-1].xaxis.set_major_formatter(DateFormatter('%b %d'))
    axes[-1].set_xlim(dt.datetime(2016, 1, 20, 0), dt.datetime(2016, 2, 5, 0))
    fig.savefig('figures/first_articlefigs/weather_forecast_ensemble.pdf')
    return ens_data, axes
def try_prod24h_before(
        columns=['Tout', 'vWind', 'vWindavg24', 'prod24h_before'],
        add_const=False,
        y=y):
    plt.close('all')
    X = all_data[columns]
    res = mlin_regression(y, X, add_const=add_const)
    timesteps = ens.gen_hourly_timesteps(dt.datetime(2015, 12, 17, 1),
                                         dt.datetime(2016, 1, 15, 0))

    plt.subplot(2, 1, 1)
    plt.plot_date(timesteps, y, 'b', label='Actual prodution')
    plt.plot_date(timesteps, res.fittedvalues, 'r', label='Weather model')
    prstd, iv_l, iv_u = wls_prediction_std(res)
    plt.plot_date(timesteps, iv_u, 'r--', label='95% conf. int.')
    plt.plot_date(timesteps, iv_l, 'r--')
    plt.ylabel('MW')
    plt.legend(loc=2)
    plt.subplot(2, 1, 2)
    plt.plot_date(timesteps, res.resid, '-', label='Residual')
    plt.ylabel('MW')
    plt.legend()

    print "MAE = " + str(mae(res.resid))
    print "MAPE = " + str(mape(res.resid, y))
    print "RMSE = " + str(rmse(res.resid))

    print res.summary()

    return res
def validate_ToutToutavg24vWindvWindavg24_model():
    plt.close('all')

    ts_start = dt.datetime(2016, 1, 19, 1)
    ts_end = dt.datetime(2016, 1, 26, 0)

    daily_profile = np.load('daily_profile.npy')
    params = pd.read_pickle('lin_reg_fit_params.pkl')
    validation_data = ens.repack_ens_mean_as_df(ts_start, ts_end)

    weather_model = linear_map(validation_data, params,
                               ['Tout', 'Toutavg24', 'vWind', 'vWindavg24'])
    timesteps = ens.gen_hourly_timesteps(ts_start, ts_end)

    plt.plot_date(timesteps, validation_data['prod'], 'b-')
    plt.plot_date(timesteps, weather_model, 'r-')

    weather_model_wdailyprofile = []
    for ts, wm in zip(timesteps, weather_model):
        print ts.hour
        weather_model_wdailyprofile.append(
            wm + daily_profile[np.mod(ts.hour - 1, 24)])

    plt.plot_date(timesteps, weather_model_wdailyprofile, 'g-')

    return validation_data
def try_prod24h_before(columns=['Tout', 'vWind', 'vWindavg24', 'prod24h_before'], add_const=False, y=y):
    plt.close('all')
    X = all_data[columns]
    res = mlin_regression(y, X, add_const=add_const)
    timesteps = ens.gen_hourly_timesteps(dt.datetime(2015,12,17,1), dt.datetime(2016,1,15,0))
    
    plt.subplot(2,1,1)
    plt.plot_date(timesteps, y, 'b', label='Actual prodution')
    plt.plot_date(timesteps, res.fittedvalues, 'r', label='Weather model')
    prstd, iv_l, iv_u = wls_prediction_std(res)    
    plt.plot_date(timesteps, iv_u, 'r--', label='95% conf. int.')
    plt.plot_date(timesteps, iv_l, 'r--')
    plt.ylabel('MW')
    plt.legend(loc=2)
    plt.subplot(2,1,2)
    plt.plot_date(timesteps, res.resid, '-', label='Residual')
    plt.ylabel('MW')
    plt.legend()
    
    print "MAE = " + str(mae(res.resid))
    print "MAPE = " + str(mape(res.resid, y))
    print "RMSE = " + str(rmse(res.resid))
    
    print res.summary()
    
       
    return res
def most_recent_ens_timeseries(start_stop=(dt.datetime(2015,12,16,0), dt.datetime(2016,1,19,0)), pointcode=71699, shift_steno_one=False):
    """ star_stop can be a tupple with 2 date tim objects. The first
        is the first time step in the time series, the second is the last.
        
        """
    plt.close('all')    
    ylabels = ['[$\degree $C]', '[m/s]', '[%]', '[W/m$^2$]']  
    
    suffix = ''.join(['_geo', str(pointcode), '_', ens.timestamp_str(start_stop[0]), \
                        '_to_', ens.timestamp_str(start_stop[1]), '.npy'])
    timesteps = ens.gen_hourly_timesteps(start_stop[0], start_stop[1])
    
    Steno_data = np.load('Q:/Weatherdata/Steno_weatherstation/Steno_hourly_2015120111_to_2016011800.npz')
    Steno_Tvhs = Steno_data['Tout_vWind_hum_sunRad']
    Steno_timesteps = Steno_data['timesteps']
        
    for v, ylab in zip(weathervars, ylabels):
        plt.figure(figsize=(15,20))
        plt.grid(True)
        plt.subplot(2,1,1)        
        ens_data = np.load('time_series/' + v + suffix)
        BBSYD_measured = sq.fetch_BrabrandSydWeather(v, start_stop[0], start_stop[1])
        Steno_measured = Steno_Tvhs[:,weathervars.index(v)]
        if shift_steno_one:
            Steno_measured = np.roll(Steno_measured, -1)
        
        if v =='Tout':
            ens_data = ens.Kelvin_to_Celcius(ens_data)
        elif v=='hum':
            ens_data = ens.frac_to_percent(ens_data) # convert to percentage                
        
        
        plt.plot_date(timesteps, ens_data, '-')
        
        plt.plot_date(timesteps, BBSYD_measured, 'k-', lw=2, label='Measured: Brabrand Syd')
        plt.plot_date(Steno_timesteps, Steno_measured, 'r-', lw=2, label='Measured: Steno Museum')
        plt.ylabel(ylab)
        plt.grid(True)
        plt.xlim(start_stop)
        plt.title(v)
        plt.legend()
        
        plt.subplot(2,1,2)
        plt.plot_date(timesteps, ens.ensemble_std(ens_data), '-', label='Ensemble std')        
        plt.plot_date(timesteps, ens.ensemble_abs_spread(ens_data), '-', label='Max ensemble spread')
        plt.ylabel(ylab)
        plt.legend()
        plt.grid(True)
        plt.tight_layout()
        
        figfilename = v + '_most_recent_ens_timeseries.pdf'
        plt.savefig('figures/' + figfilename)
def check_ens_mean_data():
    plt.close('all')
    start_stop=(dt.datetime(2015,12,17,1), dt.datetime(2016,1,15,0))
    timesteps = ens.gen_hourly_timesteps(start_stop[0], start_stop[1])
     
    for v in weathervars:
        hourly_data = np.load('time_series/ens_means/' + v +'_geo71699_2015121701_to_2016011500.npy')
        daily_avg_data = np.load('time_series/ens_means/' + v +'avg24_geo71699_2015121701_to_2016011500.npy')
        plt.figure()
        plt.title(v)
        plt.plot_date(timesteps, hourly_data, '-', label='Hourly')
        plt.plot_date(timesteps, daily_avg_data, '-', label='Average over last 24h')
        plt.legend()
def fetch_hourly_vals_from_PIno(PIno, from_time, to_time):
    conn = connect()
    sql_query = """USE [EDW_Stage]
                    SELECT [Pinr]
                    ,[TimeStamp]
                    ,[dValue]
                    FROM [sro].[vHourSerier_Udtræk]
                    WHERE [Pinr]='%s' AND TimeStamp BETWEEN '%s' AND '%s'"""%(PIno, str(from_time), str(to_time))
    data = extractdata(conn, sql_query)
    PI, timestamps, vals= zip(*data)
    assert(list(timestamps)==ens.gen_hourly_timesteps(from_time, to_time)), "Timesteps are not hour by hour"
    
    return np.array(vals, dtype=float)
def fetch_consumption(Forbrugssted_Key, from_time, to_time):
    conn = connect()
    sql_query = """ USE [DM_VT]
                    SELECT
                      [Tid_Key]
                      ,[ForbrugMWh]
                      FROM [DM_VT].[dbo].[vForbrug_Doegn]
                      WHERE Forbrugssted_Key = %i AND Tid_Key BETWEEN '%s' AND  '%s'
                      ORDER BY Tid_Key""" % (Forbrugssted_Key, ens.timestamp_str(from_time), ens.timestamp_str(to_time))
    data = extractdata(conn, sql_query)                 
    timestamps, consumption = zip(*data)
    assert(list(timestamps)==[int(ens.timestamp_str(ts)) for ts in ens.gen_hourly_timesteps(from_time, to_time)]), "Timesteps are not hour by hour"
    cons_array = np.array(consumption, dtype=float) 

    return cons_array
def check_ens_mean_data():
    plt.close('all')
    start_stop = (dt.datetime(2015, 12, 17, 1), dt.datetime(2016, 1, 15, 0))
    timesteps = ens.gen_hourly_timesteps(start_stop[0], start_stop[1])

    for v in weathervars:
        hourly_data = np.load('time_series/ens_means/' + v +
                              '_geo71699_2015121701_to_2016011500.npy')
        daily_avg_data = np.load('time_series/ens_means/' + v +
                                 'avg24_geo71699_2015121701_to_2016011500.npy')
        plt.figure()
        plt.title(v)
        plt.plot_date(timesteps, hourly_data, '-', label='Hourly')
        plt.plot_date(timesteps,
                      daily_avg_data,
                      '-',
                      label='Average over last 24h')
        plt.legend()
def fetch_price(from_time, to_time, price_name='Timenspris'):
    """ Price_name should be either "Timenspris", "VariabelTimenspris"
        or "TimensprisMovingAVG".
        
        """
        
    conn = connect()
    sql_query = """ USE [DM_VT]
                    SELECT [Tid_Key]
                          ,[%s]
                      FROM [dbo].[vFact_Timepris_Doegn]
                      WHERE Tid_Key BETWEEN '%s' AND  '%s'
                      ORDER BY Tid_Key""" % (price_name, ens.timestamp_str(from_time), ens.timestamp_str(to_time))
                      
    data = extractdata(conn, sql_query)
    timestamps, price = zip(*data)
    assert(list(timestamps)==[int(ens.timestamp_str(ts)) for ts in ens.gen_hourly_timesteps(from_time, to_time)]), "Timesteps are not hour by hour"
    
    return np.array(price, dtype=float)
def validate_prod24h_before_and_diffsmodel():
    plt.close('all')

    cols = ['prod24h_before', 'Tout24hdiff', 'vWind24hdiff', 'sunRad24hdiff']
    ts_start = dt.datetime(2016, 1, 20, 1)
    ts_end = dt.datetime(2016, 1, 31, 0)

    validation_data = ens.repack_ens_mean_as_df(ts_start, ts_end)

    # correct error in production:
    new_val = (validation_data['prod'][116] + validation_data['prod'][116]) / 2
    validation_data['prod'][116] = new_val
    validation_data['prod'][117] = new_val
    validation_data['prod24h_before'] = sq.fetch_production(
        ts_start + dt.timedelta(days=-1), ts_end + dt.timedelta(days=-1))
    validation_data['prod24h_before'][116 + 24] = new_val
    validation_data['prod24h_before'][117 + 24] = new_val
    Tout24h_before = ens.load_ens_timeseries_as_df(ts_start+dt.timedelta(days=-1),\
                         ts_end+dt.timedelta(days=-1), weathervars=['Tout']).mean(axis=1)
    vWind24h_before = ens.load_ens_timeseries_as_df(ts_start+dt.timedelta(days=-1),\
                         ts_end+dt.timedelta(days=-1), weathervars=['vWind']).mean(axis=1)
    sunRad24h_before = ens.load_ens_timeseries_as_df(ts_start+dt.timedelta(days=-1),\
                         ts_end+dt.timedelta(days=-1), weathervars=['sunRad']).mean(axis=1)
    validation_data['Tout24hdiff'] = validation_data['Tout'] - Tout24h_before
    validation_data[
        'vWind24hdiff'] = validation_data['vWind'] - vWind24h_before
    validation_data[
        'sunRad24hdiff'] = validation_data['sunRad'] - sunRad24h_before

    # fit on fit area
    X = all_data[cols]
    res = mlin_regression(all_data['prod'], X, add_const=False)

    #apply to validation area
    weather_model = linear_map(validation_data, res.params, cols)
    timesteps = ens.gen_hourly_timesteps(ts_start, ts_end)

    plt.plot_date(timesteps, validation_data['prod'], 'b-')
    plt.plot_date(timesteps, weather_model, 'r-')
    residual = weather_model - validation_data['prod']

    return validation_data, res, residual
def fetch_production(from_time, to_time):
    conn = connect()
    sql_query = """ USE [DM_VT]
                    SELECT [Tid_Key]
                          ,[SamletProduktionMWh]
                      FROM [dbo].[vFact_Timepris_Doegn]
                      WHERE Tid_Key BETWEEN '%s' AND  '%s'
                      ORDER BY Tid_Key""" % (ens.timestamp_str(from_time), ens.timestamp_str(to_time))
                      
    data = extractdata(conn, sql_query)
    timestamps, production = zip(*data)
    assert(list(timestamps)==[int(ens.timestamp_str(ts)) for ts in ens.gen_hourly_timesteps(from_time, to_time)]), "Timesteps are not hour by hour"
    prod_array = np.array(production, dtype=float)    
    for ts in (2016032702, 2016032703):
        if ts in timestamps:
            print "Correcting error in production by transition to daylight savings on timestamp %s"%ts
            index = timestamps.index(ts)
            prod_array[index] = 2*prod_array[index]
    
    return prod_array
def validate_prod24h_before_and_diffsmodel():
    plt.close('all')
    
    cols = ['prod24h_before', 'Tout24hdiff', 'vWind24hdiff', 'sunRad24hdiff']
    ts_start = dt.datetime(2016,1,20,1)
    ts_end = dt.datetime(2016,1,31,0)
    
    validation_data = ens.repack_ens_mean_as_df(ts_start, ts_end)
    
    # correct error in production:
    new_val = (validation_data['prod'][116] +validation_data['prod'][116])/2
    validation_data['prod'][116] = new_val
    validation_data['prod'][117] = new_val
    validation_data['prod24h_before'] = sq.fetch_production(ts_start+dt.timedelta(days=-1), ts_end+dt.timedelta(days=-1))
    validation_data['prod24h_before'][116+24] = new_val
    validation_data['prod24h_before'][117+24] = new_val
    Tout24h_before = ens.load_ens_timeseries_as_df(ts_start+dt.timedelta(days=-1),\
                         ts_end+dt.timedelta(days=-1), weathervars=['Tout']).mean(axis=1)
    vWind24h_before = ens.load_ens_timeseries_as_df(ts_start+dt.timedelta(days=-1),\
                         ts_end+dt.timedelta(days=-1), weathervars=['vWind']).mean(axis=1)
    sunRad24h_before = ens.load_ens_timeseries_as_df(ts_start+dt.timedelta(days=-1),\
                         ts_end+dt.timedelta(days=-1), weathervars=['sunRad']).mean(axis=1)    
    validation_data['Tout24hdiff'] = validation_data['Tout'] - Tout24h_before
    validation_data['vWind24hdiff'] = validation_data['vWind'] - vWind24h_before
    validation_data['sunRad24hdiff'] = validation_data['sunRad'] - sunRad24h_before
    
    # fit on fit area
    X = all_data[cols]
    res = mlin_regression(all_data['prod'], X, add_const=False)
    
    #apply to validation area
    weather_model = linear_map(validation_data, res.params, cols)
    timesteps = ens.gen_hourly_timesteps(ts_start, ts_end)
    
    plt.plot_date(timesteps, validation_data['prod'],'b-')
    plt.plot_date(timesteps, weather_model,'r-')
    residual = weather_model - validation_data['prod']
    
    return validation_data, res, residual
def plot_best_model():
    plt.close('all')
    columns = [
        'Tout', 'Toutavg24', 'vWind', 'vWindavg24'
    ]  #, 'hours', 'hours2','hours3', 'hours4','hours5', 'hours6']#, 'hours7', 'hours8']#,'hours5', 'hours6']
    X = all_data[columns]
    res = mlin_regression(y, X)
    timesteps = ens.gen_hourly_timesteps(dt.datetime(2015, 12, 17, 1),
                                         dt.datetime(2016, 1, 15, 0))

    plt.subplot(2, 1, 1)
    plt.plot_date(timesteps, y, 'b', label='Actual prodution')
    plt.plot_date(timesteps, res.fittedvalues, 'r', label='Weather model')
    prstd, iv_l, iv_u = wls_prediction_std(res)
    plt.plot_date(timesteps, iv_u, 'r--', label='95% conf. int.')
    plt.plot_date(timesteps, iv_l, 'r--')
    mean_day_resid = [res.resid[i::24].mean() for i in range(24)]
    mean_resid_series = np.tile(mean_day_resid, 29)
    plt.plot_date(timesteps,
                  res.fittedvalues + mean_resid_series,
                  'g',
                  label='Weather model + avg daily profile')
    plt.ylabel('MW')
    plt.legend(loc=2)
    plt.subplot(2, 1, 2)
    plt.plot_date(timesteps, res.resid, '-', label='Residual')

    plt.plot_date(timesteps, mean_resid_series)
    plt.ylabel('MW')
    plt.legend()

    mape = np.mean(np.abs((res.fittedvalues + mean_resid_series - y) / y))
    mape2 = np.mean(np.abs((res.resid) / y))
    mae = np.mean(np.abs((res.fittedvalues + mean_resid_series - y)))

    print mape, mape2, mae

    res.summary()
    return res
def check_for_timeshift():
    """ This function chec if there is a time shift between data from
        the Brabrand Syd weather station and the Steno Museum one. 
        It appears that Steno data is one hour fast..
        
        """

    plt.close('all')
    start_stop = (dt.datetime(2015, 12, 16, 0), dt.datetime(2016, 1, 16, 0))

    timesteps = ens.gen_hourly_timesteps(start_stop[0], start_stop[1])

    Steno_data = np.load(
        'Q:/Weatherdata/Steno_weatherstation/Steno_hourly_2015120111_to_2016011800.npz'
    )
    Steno_Tvhs = Steno_data['Tout_vWind_hum_sunRad']
    Steno_timesteps = Steno_data['timesteps']
    start_index = np.where(Steno_timesteps == start_stop[0])[0]
    end_index = np.where(Steno_timesteps == start_stop[1])[0] + 1
    Steno_Tvhs_short = Steno_Tvhs[start_index:end_index, :]
    Steno_timesteps_new = Steno_timesteps[start_index:end_index]
    assert (all(Steno_timesteps_new == timesteps))

    for v in weathervars:
        plt.figure()
        for offset in range(-2, 3, 1):
            plt.subplot(5, 1, offset + 3)

            BBSYD_measured = sq.fetch_BrabrandSydWeather(
                v, start_stop[0], start_stop[1])
            Steno_measured = Steno_Tvhs_short[:, weathervars.index(v)]
            Steno_with_offset = np.roll(Steno_measured, offset)
            MAPE = np.mean(np.abs((Steno_with_offset - BBSYD_measured)))
            plt.title('offset %i, MAE = %2.4f ' % (offset, MAPE))
            plt.plot_date(timesteps, BBSYD_measured, 'k')
            plt.plot_date(timesteps, Steno_with_offset, 'r')

        plt.tight_layout()
        plt.suptitle(v)
def weather_forecast_ensemble(): # figure 2
    plt.close('all')
    ts = ens.gen_hourly_timesteps(dt.datetime(2016,1,20,1), dt.datetime(2016,2,5,0))
    ens_data = ens.load_ens_timeseries_as_df(ts_start=ts[0], ts_end=ts[-1],\
                                             weathervars=['Tout', 'vWind', 'sunRad'])
    fig, axes = plt.subplots(3,1, sharex=True, figsize=(colwidth, 1.65*colwidth))
    plt.xticks(size=5)
    
    ylabels = [u'Outside temperature [%sC]'%uni_degree, 'Wind speed [m/s]', u'Solar irradiance [W/m%s]'%uni_squared]
    
    for  ax, v, cshift, ylab in zip(axes, ['Tout', 'vWind', 'sunRad'], (15,23,6), ylabels):
        color_list = plt.cm.Dark2(np.roll(np.linspace(0, 1, 25), cshift))        
        ax.set_prop_cycle(cycler('color',color_list))
        v_ens_data = ens_data[[v + str(i) for i in range(25)]]
        ax.plot_date(ts, v_ens_data, '-', lw=0.5)
        ax.set_ylabel(ylab, size=8)
        ax.tick_params(axis='y', which='major', labelsize=8)
        plt.box(True)
    plt.tight_layout()
    axes[-1].xaxis.set_major_formatter(DateFormatter('%b %d') )
    axes[-1].set_xlim(dt.datetime(2016,1,20,0), dt.datetime(2016,2,5,0))
    fig.savefig('figures/first_articlefigs/weather_forecast_ensemble.pdf')
    return ens_data, axes
def check_for_timeshift():
    """ This function chec if there is a time shift between data from
        the Brabrand Syd weather station and the Steno Museum one. 
        It appears that Steno data is one hour fast..
        
        """
    
    plt.close('all')
    start_stop=(dt.datetime(2015,12,16,0), dt.datetime(2016,1,16,0))

    timesteps = ens.gen_hourly_timesteps(start_stop[0], start_stop[1])
    
    Steno_data = np.load('Q:/Weatherdata/Steno_weatherstation/Steno_hourly_2015120111_to_2016011800.npz')
    Steno_Tvhs = Steno_data['Tout_vWind_hum_sunRad']
    Steno_timesteps = Steno_data['timesteps']
    start_index = np.where(Steno_timesteps==start_stop[0])[0]
    end_index = np.where(Steno_timesteps==start_stop[1])[0] + 1
    Steno_Tvhs_short = Steno_Tvhs[start_index:end_index, :]
    Steno_timesteps_new = Steno_timesteps[start_index:end_index]
    assert(all(Steno_timesteps_new==timesteps))
    
    for v in weathervars:
        plt.figure()
        for offset in range(-2,3,1):
            plt.subplot(5,1,offset+3)
            
            BBSYD_measured = sq.fetch_BrabrandSydWeather(v, start_stop[0], start_stop[1])
            Steno_measured = Steno_Tvhs_short[:, weathervars.index(v)]
            Steno_with_offset = np.roll(Steno_measured, offset)
            MAPE = np.mean(np.abs((Steno_with_offset-BBSYD_measured)))
            plt.title('offset %i, MAE = %2.4f '%(offset,MAPE))
            plt.plot_date(timesteps, BBSYD_measured, 'k')
            plt.plot_date(timesteps, Steno_with_offset, 'r')
        
        plt.tight_layout()
        plt.suptitle(v)
def validate_ToutToutavg24vWindvWindavg24_model():
    plt.close('all')
    
    ts_start = dt.datetime(2016,1,19,1)
    ts_end = dt.datetime(2016,1,26,0)
    
    daily_profile = np.load('daily_profile.npy')
    params = pd.read_pickle('lin_reg_fit_params.pkl')
    validation_data = ens.repack_ens_mean_as_df(ts_start, ts_end)    
    
    weather_model = linear_map(validation_data, params, ['Tout', 'Toutavg24', 'vWind', 'vWindavg24'])
    timesteps = ens.gen_hourly_timesteps(ts_start, ts_end)
    
    plt.plot_date(timesteps, validation_data['prod'],'b-')
    plt.plot_date(timesteps, weather_model,'r-')
    
    weather_model_wdailyprofile = []
    for ts, wm in zip(timesteps, weather_model):
        print ts.hour
        weather_model_wdailyprofile.append(wm + daily_profile[np.mod(ts.hour-1,24)])
    
    plt.plot_date(timesteps, weather_model_wdailyprofile, 'g-')
    
    return validation_data
                                            h_hoursbefore(ts_start, timeshift),\
                                            h_hoursbefore(ts_end, timeshift),\
                                            pointcode=71699)
            df['%s%ihdiff'%(v,timeshift)] = ens_mean - ens_mean_before
    
    
    return df        

reload_data = True
if reload_data:        
    timelags = [48, 60, 168]
    all_data = gen_fit_df(dt.datetime(2016,1,26,1), dt.datetime(2016,4,1,0), ['Tout', 'vWind', 'hum', 'sunRad'], timelags)
y = all_data['prod']

#%%
ts = ens.gen_hourly_timesteps(dt.datetime(2016,1,26,1), dt.datetime(2016,4,1,0))
X = all_data.ix[:, all_data.columns !='prod']

X.to_pickle('48h60h168h_lagged_X.pkl')
y.to_pickle('prod_to_gowith.pkl')


#%%
lr = linear_model.LinearRegression(fit_intercept=False)


predicted = cross_val_predict(lr, X, y, cv=25)
plt.figure()
plt.plot(y)
plt.plot(predicted, 'r')
sns.jointplot(pd.Series(predicted), y)
def production_model(): # figure 3
    
    plt.close('all')
    cols = ['prod24h_before', 'Tout24hdiff', 'vWind24hdiff', 'sunRad24hdiff']
        
    ts1 = ens.gen_hourly_timesteps(dt.datetime(2015,12,17,1), dt.datetime(2016,1,15,0))
    ts2 = ens.gen_hourly_timesteps(dt.datetime(2016,1,20,1), dt.datetime(2016,2,5,0))
    
    #load the data
    fit_data = ens.repack_ens_mean_as_df()
    fit_data['prod24h_before'] = sq.fetch_production(ts1[0]+dt.timedelta(days=-1), ts1[-1]+dt.timedelta(days=-1))
    
    fit_data['Tout24hdiff'] = fit_data['Tout'] \
                                - ens.load_ens_timeseries_as_df(\
                                    ts_start=ts1[0]+dt.timedelta(days=-1),\
                                    ts_end=ts1[-1]+dt.timedelta(days=-1), \
                                    weathervars=['Tout']).mean(axis=1)
    fit_data['vWind24hdiff'] = fit_data['vWind'] \
                                - ens.load_ens_timeseries_as_df(\
                                    ts_start=ts1[0]+dt.timedelta(days=-1),\
                                    ts_end=ts1[-1]+dt.timedelta(days=-1), \
                                    weathervars=['vWind']).mean(axis=1)
    fit_data['sunRad24hdiff'] = fit_data['sunRad'] \
                                - ens.load_ens_timeseries_as_df(\
                                    ts_start=ts1[0]+dt.timedelta(days=-1),\
                                    ts_end=ts1[-1]+dt.timedelta(days=-1), \
                                    weathervars=['sunRad']).mean(axis=1)
                                    
    vali_data = ens.repack_ens_mean_as_df(ts2[0], ts2[-1])
    vali_data['prod24h_before'] = sq.fetch_production(ts2[0]+dt.timedelta(days=-1), ts2[-1]+dt.timedelta(days=-1))
    vali_data['Tout24hdiff'] = vali_data['Tout'] \
                                - ens.load_ens_timeseries_as_df(\
                                    ts_start=ts2[0]+dt.timedelta(days=-1),\
                                    ts_end=ts2[-1]+dt.timedelta(days=-1), \
                                    weathervars=['Tout']).mean(axis=1)
    vali_data['vWind24hdiff'] = vali_data['vWind'] \
                                - ens.load_ens_timeseries_as_df(\
                                    ts_start=ts2[0]+dt.timedelta(days=-1),\
                                    ts_end=ts2[-1]+dt.timedelta(days=-1), \
                                    weathervars=['vWind']).mean(axis=1)
    vali_data['sunRad24hdiff'] = vali_data['sunRad'] \
                                - ens.load_ens_timeseries_as_df(\
                                    ts_start=ts2[0]+dt.timedelta(days=-1),\
                                    ts_end=ts2[-1]+dt.timedelta(days=-1), \
                                    weathervars=['sunRad']).mean(axis=1)
    
    # correct error in production:
    new_val = (vali_data['prod'][116] +vali_data['prod'][116])/2
    vali_data['prod'][116] = new_val
    vali_data['prod'][117] = new_val
    vali_data['prod24h_before'][116+24] = new_val
    vali_data['prod24h_before'][117+24] = new_val
    
    
 
    # do the fit
    X = fit_data[cols]
    y = fit_data['prod']
    res = mlin_regression(y, X, add_const=False)    

    fig, [ax1, ax2] = plt.subplots(2, 1, sharex=True, figsize=(dcolwidth, 0.57*dcolwidth), gridspec_kw={'height_ratios':[4,1]})

    # load ensemble data
    ens_data1 = ens.load_ens_timeseries_as_df(ts_start=ts1[0], ts_end=ts1[-1],\
                                             weathervars=['Tout', 'vWind', 'sunRad'])
    ens_data1['prod24h_before'] = fit_data['prod24h_before']
    ens_data1_24h_before =  ens.load_ens_timeseries_as_df(\
                                    ts_start=ts1[0]+dt.timedelta(days=-1),\
                                    ts_end=ts1[-1]+dt.timedelta(days=-1), \
                                    weathervars=['Tout', 'vWind', 'sunRad']) 
    ens_data2 = ens.load_ens_timeseries_as_df(ts_start=ts2[0], ts_end=ts2[-1],\
                                             weathervars=['Tout', 'vWind', 'sunRad'])
    ens_data2['prod24h_before'] = vali_data['prod24h_before']
    ens_data2_24h_before = ens.load_ens_timeseries_as_df(\
                                    ts_start=ts2[0]+dt.timedelta(days=-1),\
                                    ts_end=ts2[-1]+dt.timedelta(days=-1), \
                                    weathervars=['Tout', 'vWind', 'sunRad']) 
    for i in range(25):
        for v in ['Tout', 'vWind', 'sunRad']:
            key_raw = v + str(i)
            key_diff = v + '24hdiff' + str(i)
            ens_data1[key_diff] = ens_data1[key_raw] - ens_data1_24h_before[key_raw]
            ens_data2[key_diff] = ens_data2[key_raw] - ens_data2_24h_before[key_raw]

    
    all_ens_data = pd.concat([ens_data1, ens_data2])
    all_ts = ts1 + ts2    
#    
#    
    # calculate production for each ensemble member
    ens_prods = np.zeros((len(all_ts), 25))
    for i in range(25):
        ens_cols = ['Tout24hdiff' + str(i), 'vWind24hdiff' + str(i),\
                    'sunRad24hdiff' + str(i), 'prod24h_before']
        ens_params = pd.Series({'Tout24hdiff' + str(i):res.params['Tout24hdiff'],
                                'vWind24hdiff' + str(i):res.params['vWind24hdiff'],
                                'sunRad24hdiff' + str(i):res.params['sunRad24hdiff'],
                                'prod24h_before':res.params['prod24h_before']})
        ens_prods[:,i] = linear_map(all_ens_data, ens_params, ens_cols)    
    
    
       
    # calculate combined confint
    ens_std = ens_prods.std(axis=1)
    vali_resid = linear_map(vali_data, res.params, cols) - vali_data['prod']
    vali_resid_corrig = vali_resid - np.sign(vali_resid)*1.9599*ens_std[len(ts1):]
    #mean_conf_int_spread = (vali_resid_corrig.quantile(0.95) - vali_resid_corrig.quantile(0.05))/2 # this conf_int is not used anymore


    fit_resid = res.resid
    fit_resid_corrig = fit_resid - np.sign(fit_resid)*1.9599*ens_std[0:len(ts1)]
    conf_int_spread_lower = - fit_resid_corrig.quantile(0.025)
    conf_int_spread_higher = fit_resid_corrig.quantile(0.975) 
    
    combined_conf_ints = conf_int_spread_lower + conf_int_spread_higher + 2*1.9599*ens_std
    all_prod_model = np.concatenate([res.fittedvalues, linear_map(vali_data, res.params, cols)])
    combined_ub95 = all_prod_model + conf_int_spread_higher + 1.9599*ens_std
    combined_lb95 = all_prod_model - (conf_int_spread_lower + 1.9599*ens_std)
    
    # plot confint
    ax1.fill_between(all_ts[len(ts1):], combined_lb95[len(ts1):], combined_ub95[len(ts1):], label='95% prediction intervals')
    ax1.fill_between(all_ts[len(ts1):], all_prod_model[len(ts1):] - 1.9599*ens_std[len(ts1):], all_prod_model[len(ts1):] + 1.9599*ens_std[len(ts1):], facecolor='grey', label='Weather ensemble 95% conf. int.')
    
    # plot ensempble models    
    ax1.plot_date(all_ts[len(ts1):], ens_prods[len(ts1):], '-', lw=0.5)    

    ax1.plot_date(ts2, vali_data['prod'], 'k-', lw=2, label='Historical production')
    ax1.plot_date(ts2, linear_map(vali_data, res.params, cols), '-', c=red, lw=2, label='Production model')
    ax1.set_ylabel('Production [MW]', size=8)
    ax1.tick_params(axis='both', which='major', labelsize=8)
    ax1.xaxis.set_major_formatter(DateFormatter('%b %d') )    
    ax1.legend(loc=1, prop={'size':8})
    ax1.set_ylim([300,1100])
    
    N = conf_int_spread_higher + 1.9599*ens_std[len(ts1):].max()
    ax2.fill_between(ts2, -(1.9599*ens_std[len(ts1):]+conf_int_spread_lower)/N, -1.9599*ens_std[len(ts1):]/N, alpha=0.5)
    ax2.fill_between(ts2, -1.9599*ens_std[len(ts1):]/N, np.zeros(len(ts2)), facecolor='grey',alpha=0.5)
    ax2.fill_between(ts2, 1.9599*ens_std[len(ts1):]/N, facecolor='grey')
    ax2.fill_between(ts2, 1.9599*ens_std[len(ts1):]/N, (conf_int_spread_higher+1.9599*ens_std[len(ts1):])/N) 
    ax2.set_ylabel('Prediction intervals \n[normalized]', size=8)
    ax2.tick_params(axis='y', which='major', labelsize=8)
    ax2.set_xlim(dt.datetime(2016,1,20,0), dt.datetime(2016,2,5,0))
    fig.tight_layout()
    print "Min_normalized pos conf bound. ", np.min(1.9599*ens_std[len(ts1):]/N+conf_int_spread_higher/N)
    
    print "MAE = " + str(mae(vali_resid))
    print "MAPE = " + str(mape(vali_resid, vali_data['prod']))
    print "RMSE = " + str(rmse(vali_resid))
    print "ME = " + str(np.mean(vali_resid))
    
    print "MAE (fit) = " + str(mae(res.resid))
    print "MAPE (fit) = " + str(mape(res.resid, fit_data['prod']))
    print "RMSE (fit)= " + str(rmse(res.resid))
    print "ME (fit)= " + str(np.mean(res.resid))
    
    print "Width of const blue bands (MW)", conf_int_spread_lower, conf_int_spread_higher

    plt.savefig('Q:/Projekter/Ens Article 1/figures/production_model.pdf', dpi=400) 

   
    EO3_fc1 = sq.fetch_EO3_midnight_forecast(ts1[0], ts1[-1])
    EO3_fc2 = sq.fetch_EO3_midnight_forecast(ts2[0], ts2[-1])
    EO3_err = EO3_fc2-vali_data['prod']
    EO3_err_fit = EO3_fc1-fit_data['prod']
    print "MAE (EO3) = " + str(mae(EO3_err))
    print "MAPE (EO3) = " + str(mape(EO3_err, vali_data['prod']))
    print "RMSE (EO3)= " + str(rmse(EO3_err))
    print "ME (EO3)= " + str(np.mean(EO3_err))
    
    print "MAE (EO3_fit) = " + str(mae(EO3_err_fit))
    print "MAPE (EO3_fit) = " + str(mape(EO3_err_fit, fit_data['prod']))
    print "RMSE (EO3_fit)= " + str(rmse(EO3_err_fit))
    print "ME (EO3_fit)= " + str(np.mean(EO3_err_fit))
    
    print np.min(combined_conf_ints[len(ts1):]/combined_conf_ints.max())
    np.savez('combined_conf_int', combined_conf_int=(conf_int_spread_higher+1.9599*ens_std), timesteps=all_ts)

    print "Corr coeff: vali ", np.corrcoef(vali_data['prod'],linear_map(vali_data, res.params, cols))[0,1]
    print "Corr coeff: vali EO3 ", np.corrcoef(vali_data['prod'], EO3_fc2)[0,1]
    print "Corr coeff: fit ", np.corrcoef(fit_data['prod'],res.fittedvalues)[0,1]
    print "Corr coeff: fit EO3 ", np.corrcoef(fit_data['prod'], EO3_fc1)[0,1]
    
    print "% of actual production in vali period above upper", float(len(np.where(vali_data['prod']>(conf_int_spread_higher+1.9599*ens_std[len(ts1):]+linear_map(vali_data, res.params, cols)))[0]))/len(ts2)
    print "plus minus: ", 0.5/len(ts2)
    
    print "% of actual production in vali period below lower", float(len(np.where(vali_data['prod']<(linear_map(vali_data, res.params, cols)-(conf_int_spread_lower+1.9599*ens_std[len(ts1):])))[0]))/len(ts2)
    print "plus minus: ", 0.5/len(ts2)
    
    return res, fit_data
Exemple #25
0
def load_cons_model_ens_dfs(df):
    fit_ts = ens.gen_hourly_timesteps(dt.datetime(2015, 12, 17, 1),
                                      dt.datetime(2016, 1, 15, 0))
    vali_ts = ens.gen_hourly_timesteps(dt.datetime(2016, 1, 20, 1),
                                       dt.datetime(2016, 2, 5, 0))
    test_ts = ens.gen_hourly_timesteps(dt.datetime(2016, 2, 5, 1),
                                       dt.datetime(2016, 3, 1, 0))

    weathervars = ['Tout', 'vWind', 'sunRad', 'hum']

    fit_data = [pd.DataFrame() for i in range(25)]
    vali_data = [pd.DataFrame() for i in range(25)]
    test_data = [pd.DataFrame() for i in range(25)]

    for i in range(25):
        fit_data[i]['cons'] = np.array(df.ix[fit_ts[0]:fit_ts[-1]]['cons'])
        vali_data[i]['cons'] = np.array(
            df.ix[vali_ts[0]:vali_ts[-1]]
            ['cons'])  # the casting is a hack to avoid the index f*****g up
        test_data[i]['cons'] = np.array(df.ix[test_ts[0]:test_ts[-1]]['cons'])

        fit_data[i]['cons24hbefore'] = np.array(
            df.ix[fit_ts[0] + dt.timedelta(days=-1):fit_ts[-1] +
                  dt.timedelta(days=-1)]['cons'])
        vali_data[i]['cons24hbefore'] = np.array(
            df.ix[vali_ts[0] + dt.timedelta(days=-1):vali_ts[-1] +
                  dt.timedelta(days=-1)]
            ['cons'])  # the casting is a hack to avoid the index f*****g up
        test_data[i]['cons24hbefore'] = np.array(
            df.ix[test_ts[0] + dt.timedelta(days=-1):test_ts[-1] +
                  dt.timedelta(days=-1)]['cons'])

    for v in weathervars:
        all_ens_fit = ens.load_ens_timeseries_as_df(\
                                    ts_start=fit_ts[0],\
                                    ts_end=fit_ts[-1], \
                                    weathervars=[v]) \
                                  - ens.load_ens_timeseries_as_df(\
                                    ts_start=fit_ts[0]+dt.timedelta(days=-1),\
                                    ts_end=fit_ts[-1]+dt.timedelta(days=-1), \
                                    weathervars=[v])
        all_ens_vali = ens.load_ens_timeseries_as_df(\
                                    ts_start=vali_ts[0],\
                                    ts_end=vali_ts[-1], \
                                    weathervars=[v]) \
                                  - ens.load_ens_timeseries_as_df(\
                                    ts_start=vali_ts[0]+dt.timedelta(days=-1),\
                                    ts_end=vali_ts[-1]+dt.timedelta(days=-1), \
                                    weathervars=[v])
        all_ens_test = ens.load_ens_timeseries_as_df(\
                                    ts_start=test_ts[0],\
                                    ts_end=test_ts[-1], \
                                    weathervars=[v]) \
                                  - ens.load_ens_timeseries_as_df(\
                                    ts_start=test_ts[0]+dt.timedelta(days=-1),\
                                    ts_end=test_ts[-1]+dt.timedelta(days=-1), \
                                    weathervars=[v])

        for i in range(25):
            fit_data[i]['%s24hdiff' % v] = all_ens_fit[v + str(i)]
            vali_data[i]['%s24hdiff' % v] = all_ens_vali[v + str(i)]
            test_data[i]['%s24hdiff' % v] = all_ens_test[v + str(i)]

    all_data = []
    for i in range(25):
        for d, t in zip([fit_data[i], vali_data[i], test_data[i]],
                        [fit_ts, vali_ts, test_ts]):
            d.set_index(pd.DatetimeIndex(t), inplace=True)
        all_data.append(pd.concat([fit_data[i], vali_data[i], test_data[i]]))

    return all_data
def hoerning_pump_model():  # figure 4
    # simple model
    T1 = 68.5
    a2 = 15.5
    a3 = 2.1
    b2 = 295 - a2 * T1
    b3 = 340 - a3 * 71.4

    def Q_from_cons_lin_piece(cons, a, b):
        B = -(b + a * T_ret) / a
        C = -cons / (specific_heat_water * density_water)
        A = 1 / a

        Qplus = (-B + np.sqrt(B**2 - 4 * A * C)) / (2 * A)

        return Qplus

    def get_Tsup_and_Q(cons, Q_ub):
        # try lowes possible T
        Q = cons / (specific_heat_water * density_water * (T1 - T_ret))
        if Q <= 295:
            return T1, Q
        elif Q > 295:
            Q = Q_from_cons_lin_piece(cons, a2, b2)
            if Q <= Q_ub * (340. / 360):
                T = (Q - b2) / a2
                return T, Q
            elif Q >= Q_ub * (340. / 360):
                b3_adjusted = b3 + (Q_ub * (340. / 360) - 340)
                Q = Q_from_cons_lin_piece(cons, a3, b3_adjusted)
                if Q <= Q_ub:
                    T = (Q - b3_adjusted) / a3
                    return T, Q
                elif Q > Q_ub:
                    Q = Q_ub
                    T = cons / (specific_heat_water * density_water *
                                Q) + T_ret
                    return T, Q

    plt.close('all')

    fig, [ax1, ax2] = plt.subplots(2, 1, sharex=True, sharey=True)
    ts1 = ens.gen_hourly_timesteps(dt.datetime(2015, 12, 17, 1),
                                   dt.datetime(2016, 1, 15, 0))
    ts2 = ens.gen_hourly_timesteps(dt.datetime(2016, 1, 20, 1),
                                   dt.datetime(2016, 2, 5, 0))
    all_ts = ts1 + ts2
    PI_T_sup = '4.146.120.29.HA.101'
    PI_Q = 'K.146A.181.02.HA.101'
    specific_heat_water = 1.17e-6  # MWh/kgKelvin
    density_water = 980  # kg/m3 at 65 deg C
    T_ret = 36.5
    df = pd.DataFrame()
    df['T_sup']=np.concatenate([sq.fetch_hourly_vals_from_PIno(PI_T_sup, ts1[0], \
            ts1[-1]),sq.fetch_hourly_vals_from_PIno(PI_T_sup, ts2[0], ts2[-1])])
    df['Q']=np.concatenate([sq.fetch_hourly_vals_from_PIno(PI_Q, ts1[0], ts1[-1]),\
            sq.fetch_hourly_vals_from_PIno(PI_Q, ts2[0], ts2[-1])])
    df['ts'] = all_ts
    df['cons'] = specific_heat_water * density_water * df['Q'] * (df['T_sup'] -
                                                                  T_ret)

    model_conf_int = np.load('combined_conf_int.npz')['combined_conf_int']
    assert (list(np.load('combined_conf_int.npz')['timesteps']) == all_ts
            ), "confidence intervals don't have matching time steps"
    const_Q_ub = 360
    Q_const_cap = []
    T_sup_const_cap = []
    Q_dyn_cap = []
    T_sup_dyn_cap = []
    dyn_Q_ub = []
    for c, model_uncertainty in zip(df['cons'], model_conf_int):
        T_const, Q_const = get_Tsup_and_Q(c, const_Q_ub)
        Q_const_cap.append(Q_const)
        T_sup_const_cap.append(T_const)

        Q_ub = 410 - (410 - const_Q_ub) * (model_uncertainty /
                                           np.max(model_conf_int))
        dyn_Q_ub.append(Q_ub)
        T_dyn, Q_dyn = get_Tsup_and_Q(c, Q_ub)
        Q_dyn_cap.append(Q_dyn)
        T_sup_dyn_cap.append(T_dyn)

    dT = 0.1
    ax1.fill_between([65 + dT, 95 - dT], [410, 410], [360, 360],
                     facecolor=red,
                     alpha=0.25)
    ax1.fill_between([65 + dT, 95 - dT], [360, 360], [340, 340],
                     facecolor=yellow,
                     alpha=0.25)
    ax1.fill_between([T1, 71.4, 80.9, 100], [295, 340, 360, 360],
                     color='k',
                     edgecolor='k',
                     alpha=0.2,
                     linewidth=1)
    ax2.fill_between([65 + dT, 95 - dT], [410, 410], [360, 360],
                     facecolor=red,
                     alpha=0.25)
    ax2.fill_between([65 + dT, 95 - dT], [360, 360], [340, 340],
                     facecolor=yellow,
                     alpha=0.25)
    ax2.fill_between([T1, 71.4, 80.9, 100], [295, 340, 360, 360],
                     color='k',
                     edgecolor='k',
                     alpha=0.2,
                     linewidth=1)
    ax1.plot([65 + dT, 95 - dT], [410, 410], '--', c=red, lw=2)
    ax1.text(79, 415, 'Maximum pump capacity', size=8)
    #im = ax1.scatter(T_sup_const_cap, Q_const_cap, facecolors='none', cmap=plt.cm.BuPu)
    im = ax1.scatter(T_sup_const_cap,
                     Q_const_cap,
                     c=df['cons'],
                     cmap=plt.cm.BuPu)

    ax2.scatter(T_sup_dyn_cap, Q_dyn_cap, c=df['cons'], cmap=plt.cm.BuPu)
    ax2.plot([65 + dT, 95 - dT], [410, 410], '--', c=red, lw=2)
    ax2.text(79, 415, 'Maximum pump capacity', size=8)

    cax, kw = mpl.colorbar.make_axes([ax1, ax2])
    fig.colorbar(im, cax=cax)
    cax.set_ylabel('Delivered heat [MW]', size=8)

    ax2.set_xlabel(u'Supply temperature [%sC]' % uni_degree, size=8)
    ax1.set_ylabel(u'Water flow rate [m%s/h]' % uni_tothethird, size=8)
    ax2.set_ylabel(u'Water flow rate [m%s/h]' % uni_tothethird, size=8)
    ax1.tick_params(axis='both', which='major', labelsize=8)
    ax2.tick_params(axis='both', which='major', labelsize=8)
    cax.tick_params(axis='y', which='major', labelsize=8)
    ax1.set_title('Scenario 1', size=10)
    ax2.set_title('Scenario 2', size=10)

    ax1.set_xlim((65, 95))
    ax1.set_ylim((150, 450))

    fig.set_size_inches(1.15 * colwidth, 1.6 * colwidth)

    fig.savefig('figures/first_articlefigs/hoerning_pump_model.pdf')

    # This is a theoretical calculation in case the model uncertainty was 50% of what it is
    statistical_conf_int = 50.90285  # this number is printed when production_model() is run (Width of const blue band (MW) ...)
    Q_dyn_cap_half_model_unc = []
    T_sup_dyn_cap_half_model_unc = []
    dyn_Q_ub_half_model_unc = []
    reduced_model_conf_int = model_conf_int - 0.5 * statistical_conf_int
    for c, model_uncertainty in zip(df['cons'], reduced_model_conf_int):
        Q_ub = 410 - (410 - const_Q_ub) * (model_uncertainty /
                                           np.max(model_conf_int))
        dyn_Q_ub_half_model_unc.append(Q_ub)
        T_dyn, Q_dyn = get_Tsup_and_Q(c, Q_ub)
        Q_dyn_cap_half_model_unc.append(Q_dyn)
        T_sup_dyn_cap_half_model_unc.append(T_dyn)

    return T_sup_const_cap, T_sup_dyn_cap, Q_const_cap, Q_dyn_cap, model_conf_int, T_sup_dyn_cap_half_model_unc
Exemple #27
0
def main(argv):
    plt.close('all')

    try:
        station = argv[0]
        no_sigma = argv[1]
        if not station in PI_T_sup_dict.keys():
            print "Use rundhoej, holme or hoerning and a float for the uncertainty bound"
            return
    except:
        print "No station provided. Defaults to holme, no_sigma=2"
        station = 'holme'
        no_sigma = 2

    print station, no_sigma
    # old tsstart dt.datetime(2014,12,17,1)
    ts1 = ens.gen_hourly_timesteps(dt.datetime(2015, 3, 1, 1),
                                   dt.datetime(2016, 1, 15, 0))
    ts2 = ens.gen_hourly_timesteps(dt.datetime(2016, 1, 19, 1),
                                   dt.datetime(2016, 3, 1, 0))
    all_ts = ts1 + ts2

    df = pd.DataFrame(index=all_ts)
    if station == 'holme':
        PI_Q1 = PI_Q_dict[station]
        PI_Q2 = PI_Q_dict2[station]
        df['Q1']=np.concatenate([sq.fetch_hourly_vals_from_PIno(PI_Q1, ts1[0], ts1[-1]),\
                    sq.fetch_hourly_vals_from_PIno(PI_Q1, ts2[0], ts2[-1])])
        df['Q2']=np.concatenate([sq.fetch_hourly_vals_from_PIno(PI_Q2, ts1[0], ts1[-1]),\
                    sq.fetch_hourly_vals_from_PIno(PI_Q2, ts2[0], ts2[-1])])
        df['Q'] = df['Q1'] + df['Q2']
    else:
        PI_Q = PI_Q_dict[station]
        df['Q']=np.concatenate([sq.fetch_hourly_vals_from_PIno(PI_Q, ts1[0], ts1[-1]),\
                    sq.fetch_hourly_vals_from_PIno(PI_Q, ts2[0], ts2[-1])])

    PI_T_sup = PI_T_sup_dict[station]
    df['T_sup']=np.concatenate([sq.fetch_hourly_vals_from_PIno(PI_T_sup, ts1[0], \
                    ts1[-1]),sq.fetch_hourly_vals_from_PIno(PI_T_sup, ts2[0], ts2[-1])])
    PI_T_ret = PI_T_ret_dict[station]
    df['T_ret']=np.concatenate([sq.fetch_hourly_vals_from_PIno(PI_T_ret, ts1[0], \
                    ts1[-1]),sq.fetch_hourly_vals_from_PIno(PI_T_ret, ts2[0], ts2[-1])])
    df['ts'] = all_ts
    df['cons'] = specific_heat_water * density_water * df['Q'] * (df['T_sup'] -
                                                                  df['T_ret'])
    Tout1 = sq.fetch_BrabrandSydWeather('Tout', ts1[0], ts1[-1])
    Tout2 = sq.fetch_BrabrandSydWeather('Tout', ts2[0], ts2[-1])
    Tout = np.concatenate([Tout1, Tout2])
    Tout_low_pass = [
        Tout[range(i - 23, i + 1)].mean() for i in range(len(Tout))
    ]
    df['Toutsmooth'] = Tout_low_pass

    Tsup_vs_Tout(df, station)

    #%% fitting and testing consumption prediction
    fit_data, vali_data, test_data, all_data = load_cons_model_dfs(df)
    fit_y = fit_data['cons']
    columns = ['cons24hbefore', 'Tout24hdiff', 'vWind24hdiff', 'sunRad24hdiff']
    X = fit_data[columns]
    res = mlin_regression(fit_y, X, add_const=False)

    fiterr = res.fittedvalues - fit_y
    print "Errors fit period: ", rmse(fiterr), mae(fiterr), mape(fiterr, fit_y)

    vali_pred = linear_map(vali_data, res.params, columns)
    valierr = vali_pred - vali_data['cons']
    print "Errors validation period: ", rmse(valierr), mae(valierr), mape(
        valierr, vali_data['cons'])

    test_pred = linear_map(test_data, res.params, columns)
    testerr = test_pred - test_data['cons']
    print "Errors test period: ", rmse(testerr), mae(testerr), mape(
        testerr, test_data['cons'])

    plt.figure()

    ens_dfs = load_cons_model_ens_dfs(df)
    ens_preds = np.empty((len(ens_dfs[0]), len(ens_dfs)))
    for edf, i in zip(ens_dfs, range(len(ens_dfs))):
        ens_pred = linear_map(edf, res.params, columns)
        ens_preds[:, i] = ens_pred
        plt.plot_date(all_data.index, ens_pred, 'grey', lw=0.5)

    ens_preds = pd.DataFrame(ens_preds, index=all_data.index)
    plt.plot_date(all_data.index, all_data['cons'], 'k-', lw=2)
    plt.plot_date(all_data.index,
                  np.concatenate([res.fittedvalues, vali_pred, test_pred]),
                  'r-',
                  lw=2)
    plt.title(station + ' forecasts of consumption')
    nonfit_errors = pd.concat([valierr, testerr])

    all_pred = np.concatenate([res.fittedvalues, vali_pred, test_pred])
    all_pred = pd.Series(all_pred, index=all_data.index)
    print res.summary()

    #%%
    TminofTout_fun = get_TminofTout_func(df, station, frac_below=0.005)

    sim_input = df.ix[all_data.index]
    sim_input['T_ret1hbefore'] = np.roll(sim_input['T_ret'], 1)
    sim_input['cons_pred'] = all_pred

    sc2_errormargin = pd.Series(no_sigma * np.ones(len(sim_input)) *
                                nonfit_errors.std(),
                                index=sim_input.index)

    nonfit_ts_start = vali_data.index[0]
    nonfit_ts_end = test_data.index[-1]

    quantile_sc2 = 1. - percent_above_forecasterrormargin(\
                    sc2_errormargin.loc[nonfit_ts_start:nonfit_ts_end], \
                    sim_input.loc[nonfit_ts_start:nonfit_ts_end, 'cons_pred'], \
                    sim_input.loc[nonfit_ts_start:nonfit_ts_end,'cons'])
    sc3_model_uncert = model_based_uncertainty_alaGorm(\
                            ens_preds.loc[nonfit_ts_start:nonfit_ts_end], \
                            sim_input.loc[nonfit_ts_start:nonfit_ts_end, 'cons_pred'], \
                            sim_input.loc[nonfit_ts_start:nonfit_ts_end, 'cons'], no_sigma, quantile_sc2)
    sc3_errormargin = pd.Series(no_sigma * ens_preds.std(axis=1) +
                                sc3_model_uncert,
                                index=sim_input.index)

    sig_m = model_based_sigma_alaChi2(
        ens_preds.loc[nonfit_ts_start:nonfit_ts_end],
        sim_input.loc[nonfit_ts_start:nonfit_ts_end, 'cons_pred'],
        sim_input.loc[nonfit_ts_start:nonfit_ts_end, 'cons'])
    sig_t = np.sqrt(ens_preds.std(axis=1)**2 + sig_m**2)
    sc35scale = total_uncertainty_scale_alaChi2(\
                                ens_preds.loc[nonfit_ts_start:nonfit_ts_end],\
                                sim_input.loc[nonfit_ts_start:nonfit_ts_end, 'cons_pred'],\
                                sim_input.loc[nonfit_ts_start:nonfit_ts_end, 'cons'],\
                                quantile_sc2)
    print sig_m
    #sc35_errormargin = pd.Series(no_sigma*np.sqrt(ens_preds.std(axis=1)**2+sig_m**2), index=sim_input.index)
    sc35_errormargin = pd.Series(sc35scale * sig_t, index=sim_input.index)

    use_sc35 = False
    if use_sc35:
        sc3_errormargin = sc35_errormargin

    sim_results_sc2 = simulate_operation(sim_input, sc2_errormargin,
                                         TminofTout_fun, station)
    sim_results_sc3 = simulate_operation(sim_input, sc3_errormargin,
                                         TminofTout_fun, station)

    #%% synthetic consumption, controlled variable model uncertainty

    model_stds = [
        0.5 * sim_input['cons'].std(), 0.1 * sim_input['cons'].std(),
        0.05 * sim_input['cons'].std()
    ]  # sim_input['cons'].std()*np.linspace(0,1,10)
    sc2_synth_results = []
    sc3_synth_results = []
    model_uncerts = []
    for model_std in model_stds:
        synth_cons = gen_synthetic_cons(ens_preds, sim_input['cons_pred'],
                                        model_std)
        sim_input_synth = sim_input.copy(deep=True)
        sim_input_synth['cons'] = synth_cons
        synth_resid = sim_input_synth.loc[nonfit_ts_start:nonfit_ts_end,
                                          'cons_pred'] - sim_input_synth.loc[
                                              nonfit_ts_start:nonfit_ts_end,
                                              'cons']
        sc2_errormargin_synth = pd.Series(
            no_sigma * np.ones(len(sim_input_synth)) * synth_resid.std(),
            index=sim_input_synth.index)
        quantile_sc2_synth = 1. - percent_above_forecasterrormargin(\
                        sc2_errormargin_synth.loc[nonfit_ts_start:nonfit_ts_end], \
                        sim_input_synth.loc[nonfit_ts_start:nonfit_ts_end, 'cons_pred'], \
                        sim_input_synth.loc[nonfit_ts_start:nonfit_ts_end,'cons'])
        print "Sc2 q: ", quantile_sc2_synth
        sc3_model_uncert_synth = model_based_uncertainty_alaGorm(\
                                ens_preds.loc[nonfit_ts_start:nonfit_ts_end], \
                                sim_input_synth.loc[nonfit_ts_start:nonfit_ts_end, 'cons_pred'], \
                                sim_input_synth.loc[nonfit_ts_start:nonfit_ts_end, 'cons'], no_sigma, quantile_sc2_synth)
        model_uncerts.append(sc3_model_uncert_synth)
        sc3_errormargin_synth = pd.Series(no_sigma * ens_preds.std(axis=1) +
                                          sc3_model_uncert_synth,
                                          index=sim_input_synth.index)

        sim_results_sc2_synth = simulate_operation(sim_input_synth,
                                                   sc2_errormargin_synth,
                                                   TminofTout_fun, station)
        sim_results_sc3_synth = simulate_operation(sim_input_synth,
                                                   sc3_errormargin_synth,
                                                   TminofTout_fun, station)
        sc2_synth_results.append(sim_results_sc2_synth)
        sc3_synth_results.append(sim_results_sc3_synth)

    mean_Tsupdiff = []
    mean_heatlossreduced = []
    for sc2_res, sc3_res in zip(sc2_synth_results, sc3_synth_results):
        mean_Tsupdiff.append(np.mean(sc2_res['T_sup'] - sc3_res['T_sup']))
        mean_heatlossreduced.append(
            np.mean(100 * (1 - (sc3_res['T_sup'] - T_grnd) /
                           (sc2_res['T_sup'] - T_grnd))))

    plt.figure()
    plt.plot(model_uncerts, mean_Tsupdiff, 'k.')
    plt.title('Mean temp reduction vs model uncert.')

    print "Perc above errormargin, sc2: ", percent_above_forecasterrormargin(\
                    sc2_errormargin.loc[nonfit_ts_start:nonfit_ts_end], \
                    sim_input.loc[nonfit_ts_start:nonfit_ts_end, 'cons_pred'], \
                    sim_input.loc[nonfit_ts_start:nonfit_ts_end,'cons'])
    print "Perc above errormargin, sc3: ", percent_above_forecasterrormargin(sc3_errormargin.loc[nonfit_ts_start:nonfit_ts_end], \
                    sim_input.loc[nonfit_ts_start:nonfit_ts_end, 'cons_pred'], \
                    sim_input.loc[nonfit_ts_start:nonfit_ts_end,'cons'])
    print "mean errormargin, sc2: ", sc2_errormargin.mean()
    print "mean errormargin, sc3: ", sc3_errormargin.mean()
    print "rms errormargin, sc2: ", rmse(sc2_errormargin)
    print "rms errormargin, sc3: ", rmse(sc3_errormargin)

    print "Synth Perc above errormargin, sc2: ", percent_above_forecasterrormargin(\
                    sc2_errormargin_synth.loc[nonfit_ts_start:nonfit_ts_end], \
                    sim_input_synth.loc[nonfit_ts_start:nonfit_ts_end, 'cons_pred'], \
                    sim_input_synth.loc[nonfit_ts_start:nonfit_ts_end,'cons'])
    print "Synth  Perc above errormargin, sc3: ", percent_above_forecasterrormargin(sc3_errormargin_synth.loc[nonfit_ts_start:nonfit_ts_end], \
                    sim_input_synth.loc[nonfit_ts_start:nonfit_ts_end, 'cons_pred'], \
                    sim_input_synth.loc[nonfit_ts_start:nonfit_ts_end,'cons'])
    print "Synth mean errormargin, sc2: ", sc2_errormargin_synth.mean()
    print "Synth mean errormargin, sc3: ", sc3_errormargin_synth.mean()
    print "Synth rms errormargin, sc2: ", rmse(sc2_errormargin_synth)
    print "Synth rms errormargin, sc3: ", rmse(sc3_errormargin_synth)

    #% error margins:
    fig_error_margins(sc2_errormargin, sc3_errormargin, sim_input,
                      sc3_model_uncert, station, no_sigma)
    fig_error_margins(sc2_errormargin_synth, sc3_errormargin_synth,
                      sim_input_synth, sc3_model_uncert_synth, station,
                      no_sigma)

    sns.jointplot(np.abs(nonfit_errors),
                  ens_preds.loc[nonfit_ts_start:nonfit_ts_end].std(axis=1))
    sns.jointplot(np.abs(synth_resid),
                  ens_preds.loc[nonfit_ts_start:nonfit_ts_end].std(axis=1))

    #% T Q scatter plots
    fig, axes = plt.subplots(3, 1, figsize=(10, 16), sharex=True, sharey=True)
    axes[0].scatter(sim_input['T_sup'], sim_input['Q'], c=sim_input['cons'])
    axes[0].set_title(station + ': ' + 'Scenario 1')

    axes[1].scatter(sim_results_sc2['T_sup'],
                    sim_results_sc2['Q'],
                    c=sim_results_sc2['cons'])
    axes[1].set_title(station + ': Scenario 2: ' + str(no_sigma) + r'$\sigma$')
    axes[2].scatter(sim_results_sc3['T_sup'],
                    sim_results_sc3['Q'],
                    c=sim_results_sc3['cons'])
    axes[2].set_title(station + ': Scenario 3: ' + str(no_sigma) + r'$\sigma$')
    axes[1].set_ylabel(u'Water flow rate [m%s/h]' % uni_tothethird, size=8)
    axes[2].set_xlabel(u'Supply temperature [%sC]' % uni_degree, size=8)
    fig.tight_layout()
    fig.savefig(figpath + 'TQscatter_%2.2f' % (no_sigma) + 'sigma_' + station +
                '.pdf')

    # T_sup time series fig
    fig, axes = plt.subplots(3, 1, figsize=(15, 15), sharex=True)
    axes[0].plot_date(sim_input.index,
                      sim_input['T_sup'],
                      'k-',
                      label='Scenario 1')
    axes[0].plot_date(sim_input.index,
                      sim_results_sc2['T_sup'],
                      'r-',
                      lw=3,
                      label='Scenario 2')
    axes[0].plot_date(sim_input.index,
                      sim_results_sc2['T_sup'],
                      'g-',
                      label='Scenario 3')
    axes[0].set_title(station + ', ' + str(no_sigma) + r'$\sigma$' +
                      ': Supply temperature')
    axes[0].set_ylabel(u'Supply temperature [%sC]' % uni_degree, size=8)
    axes[0].legend()
    axes[1].plot_date(sim_input.index,
                      sim_input['Q'],
                      'k-',
                      label='Scenario 1')
    axes[1].plot_date(sim_input.index,
                      sim_results_sc2['Q'],
                      'r-',
                      label='Scenario 2')
    axes[1].plot_date(sim_input.index,
                      sim_results_sc2['Q_ref'],
                      'b-',
                      lw=1,
                      label=r'$Q_{ref}$' + 'Scenario 2')
    axes[1].set_ylabel(u'Water flow rate [m%s/h]' % uni_tothethird, size=8)
    axes[1].legend()
    axes[2].plot_date(sim_input.index,
                      sim_input['Q'],
                      'k-',
                      label='Scenario 1')
    axes[2].plot_date(sim_input.index,
                      sim_results_sc3['Q'],
                      'g-',
                      label='Scenario 3')
    axes[2].plot_date(sim_input.index,
                      sim_results_sc3['Q_ref'],
                      'b-',
                      lw=1,
                      label=r'$Q_{ref}$' + 'Scenario 3')
    axes[2].set_ylabel(u'Water flow rate [m%s/h]' % uni_tothethird, size=8)
    axes[2].legend()
    fig.savefig(figpath + 'TQtimeseries_%2.2f' % (no_sigma) + 'sigma_' +
                station + '.pdf')

    # Differencen in supply temperature between the scenarios
    fig_heat_loss(sim_input, sim_results_sc2, sim_results_sc3, station,
                  no_sigma)
    fig_heat_loss(sim_input_synth,
                  sim_results_sc2_synth,
                  sim_results_sc3_synth,
                  station,
                  no_sigma,
                  save=False)

    return

    #%% The below section only runs if we view Tmin as a function of Q (the old way)
    # note: SOME OF THIS USES CONSTANT TRET!!
    TminofQ = False
    if TminofQ:
        # outlierdetection
        X = df[['T_sup', 'Q']]
        outlier_detection = False
        if outlier_detection:
            detect_outliers(X, station)
        else:
            inlierpred = np.ones(len(df), dtype=bool)

        fig, ax1 = plt.subplots()
        ax2 = ax1.twinx()
        cond_df = df
        ax1.plot_date(np.array(cond_df['ts']), np.array(cond_df['Q']), 'b')

        ax2.plot_date(np.array(cond_df['ts']), np.array(cond_df['T_sup']),
                      'r-')

        plt.figure()
        plt.plot_date(df['ts'], df['cons'], 'g-')
        plt.title(station)

        plt.figure()
        plt.scatter(df['T_sup'], df['Q'], c=df['cons'], alpha=0.25)
        plt.colorbar()
        plt.title(station)

        outliers = df[np.logical_not(inlierpred)]

        plt.plot(np.array(outliers['T_sup']), np.array(outliers['Q']), 'ko')

        #%%
        #plot_Tmin_Q_quantiles(df, inlierpred)
        Q = np.linspace(df[inlierpred]['Q'].min(), df[inlierpred]['Q'].max(),
                        500)
        qs = [0.001, 0.005, 0.01, 0.02275, 0.05, 0.1]
        for q in qs:
            T_min_func, Q_quantiles = get_Tmin_func(df[inlierpred],
                                                    T_min_q=q,
                                                    N_Q_q=21)
            plt.plot(T_min_func(Q), Q, label=str(q), lw=2)
        plt.legend()
        for Q_qua in Q_quantiles:
            plt.axhline(y=Q_qua)

        #%% P vs Q (T=Tmin(Q))
        T_min_func, Q_quantiles = get_Tmin_func(df, T_min_q=0.02275, N_Q_q=21)

        plt.figure()
        plt.plot(Q, T_min_func(Q), 'r', label='Tmin')
        P = specific_heat_water * density_water * Q * (T_min_func(Q) - T_ret)
        plt.plot(Q, P, 'b', label='Cons')
        plt.xlabel('Q')
        plt.legend()

        plt.figure()
        simP = df['cons']
        res = [
            op_model(cons, T_min_func, Q_max=Q_max_dict[station], T_ret=T_ret)
            for cons in simP
        ]
        simT, simQ = zip(*res)
        plt.scatter(df['T_sup'], df['Q'], c='k', alpha=0.1)
        plt.scatter(simT, simQ, c=simP)
        plt.colorbar()
Exemple #28
0
Created on Thu Feb 11 12:30:28 2016

@author: Magnus Dahl
"""

import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
import pandas as pd
import datetime as dt
import gurobipy as gb
import ensemble_tools as ens
import sql_tools as sq

plt.close('all')
ts1 = ens.gen_hourly_timesteps(dt.datetime(2015,12,17,1), dt.datetime(2016,1,15,0))
ts2 = ens.gen_hourly_timesteps(dt.datetime(2016,1,20,1), dt.datetime(2016,2,5,0))
all_ts = ts1 + ts2

specific_heat_water = 1.17e-6 # MWh/kgKelvin
density_water = 980 # kg/m3 at 65 deg C
T_ret = 36.5

PI_T_sup = '4.146.120.29.HA.101'
PI_Q = 'K.146A.181.02.HA.101'

df = pd.DataFrame()
df['T_sup']=np.concatenate([sq.fetch_hourly_vals_from_PIno(PI_T_sup, ts1[0], \
            ts1[-1]),sq.fetch_hourly_vals_from_PIno(PI_T_sup, ts2[0], ts2[-1])])
df['Q']=np.concatenate([sq.fetch_hourly_vals_from_PIno(PI_Q, ts1[0], ts1[-1]),\
            sq.fetch_hourly_vals_from_PIno(PI_Q, ts2[0], ts2[-1])])
def Q_T_heatloss_timeseries():  # figure 5
    T_sup_const_cap, T_sup_dyn_cap, Q_const_cap, Q_dyn_cap, model_conf_int, T_sup_dyn_cap_half_model_unc = hoerning_pump_model(
    )
    plt.close('all')

    fig, [ax1, ax2,
          ax3] = plt.subplots(3,
                              1,
                              sharex=True,
                              figsize=(dcolwidth, 0.65 * dcolwidth),
                              gridspec_kw={'height_ratios': [3, 1, 1]})
    #fig, [ax1, ax2, ax3] = plt.subplots(3, 1, sharex=True, figsize=(dcolwidth, 0.55*dcolwidth))

    ts1 = ens.gen_hourly_timesteps(dt.datetime(2015, 12, 17, 1),
                                   dt.datetime(2016, 1, 15, 0))
    ts2 = ens.gen_hourly_timesteps(dt.datetime(2016, 1, 20, 1),
                                   dt.datetime(2016, 2, 5, 0))

    red_area_lb1 = 410 - (410 - 360) * (model_conf_int[0:len(ts1)] /
                                        np.max(model_conf_int))
    red_area_lb2 = 410 - (410 - 360) * (model_conf_int[len(ts1):] /
                                        np.max(model_conf_int))
    yellow_area_lb1 = (340. / 360) * red_area_lb1
    yellow_area_lb2 = (340. / 360) * red_area_lb2
    limlw = 0.75
    ax1.plot_date(ts1,
                  red_area_lb1,
                  '-',
                  c=darkgrey,
                  lw=limlw,
                  label='Scenario 2 security margins')
    ax1.plot_date(ts2, red_area_lb2, '-', c=darkgrey, lw=limlw)
    ax1.plot_date(ts1, yellow_area_lb1, '-', c=darkgrey, lw=limlw)
    ax1.plot_date(ts2, yellow_area_lb2, '-', c=darkgrey, lw=limlw)
    ax1.fill_between(ts1,
                     360 * np.ones(len(ts1)),
                     410 * np.ones(len(ts1)),
                     facecolor=red,
                     alpha=0.25)
    ax1.fill_between(ts2,
                     360 * np.ones(len(ts2)),
                     410 * np.ones(len(ts2)),
                     facecolor=red,
                     alpha=0.25)
    ax1.fill_between(ts1,
                     340 * np.ones(len(ts1)),
                     360 * np.ones(len(ts1)),
                     facecolor=yellow,
                     alpha=0.25)
    ax1.fill_between(ts2,
                     340 * np.ones(len(ts2)),
                     360 * np.ones(len(ts2)),
                     facecolor=yellow,
                     alpha=0.25)
    ax1.plot_date(ts1, Q_const_cap[0:len(ts1)], '-', c=red, label='Scenario 1')
    ax1.plot_date(ts2, Q_const_cap[len(ts1):], '-', c=red)
    ax1.plot_date(ts1,
                  Q_dyn_cap[0:len(ts1)],
                  '-',
                  c=green,
                  lw=1,
                  label='Scenario 2')
    ax1.plot_date(ts2, Q_dyn_cap[len(ts1):], '-', c=green, lw=1)
    ax1.plot_date(ts1 + ts2, 410 * np.ones(len(ts1 + ts2)), '--', c=red, lw=1)
    handles, labels = ax1.get_legend_handles_labels()
    hl = sorted(zip(handles, labels), key=operator.itemgetter(1))
    handles2, labels2 = zip(*hl)

    ax1.legend(handles2, labels2, loc=0, prop={'size': 8})

    ax2.plot_date(ts1,
                  T_sup_const_cap[0:len(ts1)],
                  '-',
                  c=red,
                  label='Scenario 1')
    ax2.plot_date(ts2, T_sup_const_cap[len(ts1):], '-', c=red)
    ax2.plot_date(ts1,
                  T_sup_dyn_cap[0:len(ts1)],
                  '-',
                  c=green,
                  lw=1,
                  label='Scenario 2')
    ax2.plot_date(ts2, T_sup_dyn_cap[len(ts1):], '-', c=green, lw=1)
    ax2.legend(loc=6, prop={'size': 8})

    T_grnd = 6.4
    heat_loss_reduction = 100 * (1 - (np.array(T_sup_dyn_cap) - T_grnd) /
                                 (np.array(T_sup_const_cap) - T_grnd))
    heat_loss_reduction_half_model_unc = 100 * (
        1 - (np.array(T_sup_dyn_cap_half_model_unc) - T_grnd) /
        (np.array(T_sup_const_cap) - T_grnd))

    redu_heat_loss1 = heat_loss_reduction[0:len(ts1)]
    redu_heat_loss2 = heat_loss_reduction[len(ts1):]
    ax3.plot_date(ts1, redu_heat_loss1, '-', c=blue, lw=1)
    ax3.plot_date(ts2, redu_heat_loss2, '-', c=blue, lw=1)
    ax3.xaxis.set_major_formatter(DateFormatter('%b %d \n %Y'))
    ax1.tick_params(axis='y', which='major', labelsize=8)
    ax1.set_ylim(150, 450)
    ax2.tick_params(axis='y', which='major', labelsize=8)
    ax3.tick_params(axis='y', which='major', labelsize=8)
    ax1.set_ylabel(u'Flow rate  [m%s/h]' % uni_tothethird, size=8)
    ax2.set_ylabel(u'Supply\ntemperature [%sC]' % uni_degree, size=8)
    ax3.set_ylabel('Heat loss\nreduction [%]', size=8)

    mjloc = mpl.ticker.MultipleLocator(1)
    ax3.yaxis.set_major_locator(mjloc)
    ax3.set_xlim(dt.datetime(2015, 12, 17, 0), dt.datetime(2016, 2, 5, 0))
    fig.tight_layout()

    fig.savefig(
        'Q:/Projekter/Ens Article 1/figures/Q_T_heatloss_timeseries.pdf')

    return heat_loss_reduction, heat_loss_reduction_half_model_unc
# -*- coding: utf-8 -*-
"""
Created on Thu Jul 28 11:04:19 2016

@author: azfv1n8
"""
import datetime as dt
import numpy as np
import pandas as pd

import ensemble_tools as ens
import sql_tools as sq
from model_selection import linear_map, mlin_regression, gen_all_combinations, summary_to_file, mae, mape, rmse

#%%
fit_ts = ens.gen_hourly_timesteps(dt.datetime(2015,12,17,1), dt.datetime(2016,1,15,0))
vali_ts = ens.gen_hourly_timesteps(dt.datetime(2016,1,20,1), dt.datetime(2016,2,5,0))
test_ts = ens.gen_hourly_timesteps(dt.datetime(2016,2,5,1), dt.datetime(2016,3,1,0))

all_ts = fit_ts + vali_ts + test_ts

weathervars=['Tout', 'vWind', 'sunRad', 'hum']

fit_data = pd.DataFrame()
vali_data = pd.DataFrame()            
test_data = pd.DataFrame()
                
fit_data['prod24h_before'] = sq.fetch_production(fit_ts[0]+dt.timedelta(days=-1), fit_ts[-1]+dt.timedelta(days=-1))
vali_data['prod24h_before'] = sq.fetch_production(vali_ts[0]+dt.timedelta(days=-1), vali_ts[-1]+dt.timedelta(days=-1))
test_data['prod24h_before'] = sq.fetch_production(test_ts[0]+dt.timedelta(days=-1), test_ts[-1]+dt.timedelta(days=-1))
def main(argv):
    plt.close('all')
    
    try:
        station = argv[0]
        if not station in PI_T_sup_dict.keys():
            print "Wrong station, use rundhoej, holme or hoerning"
            return
    except:
        print "No station provided. Defaults to holme."
        station = 'holme'
        
    print station
    
    plt.close('all')
    #%%
    fit_ts = ens.gen_hourly_timesteps(dt.datetime(2015,12,17,1), dt.datetime(2016,1,15,0))
    vali_ts = ens.gen_hourly_timesteps(dt.datetime(2016,1,20,1), dt.datetime(2016,2,5,0))
    test_ts = ens.gen_hourly_timesteps(dt.datetime(2016,2,5,1), dt.datetime(2016,4,1,0))
    
    all_ts = fit_ts + vali_ts + test_ts
    
    weathervars=['Tout', 'vWind', 'sunRad', 'hum']
    
    fit_data = pd.DataFrame()
    vali_data = pd.DataFrame()            
    test_data = pd.DataFrame()
    
    cons_key = sq.consumption_place_key_dict[station]
    fit_data['cons24h_before'] = sq.fetch_consumption(cons_key, fit_ts[0]+dt.timedelta(days=-1), fit_ts[-1]+dt.timedelta(days=-1))
    vali_data['cons24h_before'] = sq.fetch_consumption(cons_key, vali_ts[0]+dt.timedelta(days=-1), vali_ts[-1]+dt.timedelta(days=-1))
    test_data['cons24h_before'] = sq.fetch_consumption(cons_key, test_ts[0]+dt.timedelta(days=-1), test_ts[-1]+dt.timedelta(days=-1))
    
    fit_data['cons'] = sq.fetch_consumption(cons_key, fit_ts[0], fit_ts[-1])
    vali_data['cons'] = sq.fetch_consumption(cons_key, vali_ts[0], vali_ts[-1])
    test_data['cons'] = sq.fetch_consumption(cons_key, test_ts[0], test_ts[-1])
    for v in weathervars:
        fit_data['%s24hdiff'%v] = ens.load_ens_timeseries_as_df(\
                                    ts_start=fit_ts[0],\
                                    ts_end=fit_ts[-1], \
                                    weathervars=[v]).mean(axis=1) \
                                  - ens.load_ens_timeseries_as_df(\
                                    ts_start=fit_ts[0]+dt.timedelta(days=-1),\
                                    ts_end=fit_ts[-1]+dt.timedelta(days=-1), \
                                    weathervars=[v]).mean(axis=1)
        vali_data['%s24hdiff'%v] = ens.load_ens_timeseries_as_df(\
                                    ts_start=vali_ts[0],\
                                    ts_end=vali_ts[-1], \
                                    weathervars=[v]).mean(axis=1) \
                                  - ens.load_ens_timeseries_as_df(\
                                    ts_start=vali_ts[0]+dt.timedelta(days=-1),\
                                    ts_end=vali_ts[-1]+dt.timedelta(days=-1), \
                                    weathervars=[v]).mean(axis=1)
        test_data['%s24hdiff'%v] = ens.load_ens_timeseries_as_df(\
                                    ts_start=test_ts[0],\
                                    ts_end=test_ts[-1], \
                                    weathervars=[v]).mean(axis=1) \
                                  - ens.load_ens_timeseries_as_df(\
                                    ts_start=test_ts[0]+dt.timedelta(days=-1),\
                                    ts_end=test_ts[-1]+dt.timedelta(days=-1), \
                                    weathervars=[v]).mean(axis=1)
                                    
                                    
    #%%
    all_data = pd.concat([fit_data, vali_data, test_data])
    no_blind_data = pd.concat([fit_data, vali_data])
    
    corr = no_blind_data.corr()
    
    fit_y = fit_data['cons']
    columns = ['cons24h_before', 'Tout24hdiff', 'vWind24hdiff', 'sunRad24hdiff']
    X = fit_data[columns]
    res = mlin_regression(fit_y,X, add_const=False)
    
    fiterr = res.fittedvalues - fit_y
    print "Errors fit period: ", rmse(fiterr), mae(fiterr), mape(fiterr, fit_y)
    
    vali_pred = linear_map(vali_data, res.params, columns)
    valierr = vali_pred - vali_data['cons']
    print "Errors validation period: ", rmse(valierr), mae(valierr), mape(valierr, vali_data['cons'])
    
    test_pred = linear_map(test_data, res.params, columns)
    testerr = test_pred - test_data['cons']
    print "Errors test period: ", rmse(testerr), mae(testerr), mape(testerr, test_data['cons'])
    
    plt.figure()
    plt.plot_date(all_ts, all_data['cons'], 'k-')
    plt.plot_date(all_ts, np.concatenate([res.fittedvalues, vali_pred, test_pred]), 'r-')
Exemple #32
0
def autocorr(x):
    result = np.correlate(x, x, mode='full')
    return result[result.size/2:]
    
 
def autocorr2(x, lag=1):
    rho = np.corrcoef(x, np.roll(x,lag))[0,1]
    
    return  rho
    

def my_diff(x, lag=24):
    return x-np.roll(x,lag)

ts = ens.gen_hourly_timesteps(dt.datetime(2013, 1, 1, 1), dt.datetime(2016,1,1,0))
prod = sq.fetch_production(ts[0], ts[-1])

norm_prod = (prod-prod.mean())/prod.std()

plt.plot_date(ts, prod, '-')


auto_c = autocorr(norm_prod)

rho_i = [autocorr2(prod, i) for i in range(2*168)]

prod_24h_diff = my_diff(prod)

rho2 =  [autocorr2(prod_24h_diff, i) for i in range(2*168)]
prod_48h_diff = my_diff(prod, 48)
def main(argv):
    plt.close('all')
    
    try:
        station = argv[0]
        no_sigma = argv[1]
        if not station in PI_T_sup_dict.keys():
            print "Use rundhoej, holme or hoerning and a float for the uncertainty bound"
            return
    except:
        print "No station provided. Defaults to holme, no_sigma=2"
        station = 'holme'
        no_sigma=2
        
    print station, no_sigma
    # old tsstart dt.datetime(2014,12,17,1)
    ts1 = ens.gen_hourly_timesteps(dt.datetime(2015,3,1,1), dt.datetime(2016,1,15,0))
    ts2 = ens.gen_hourly_timesteps(dt.datetime(2016,1,19,1), dt.datetime(2016,3,1,0))
    all_ts = ts1 + ts2
       
    
    
    df = pd.DataFrame(index=all_ts)
    if station == 'holme':
        PI_Q1 = PI_Q_dict[station]
        PI_Q2 = PI_Q_dict2[station]
        df['Q1']=np.concatenate([sq.fetch_hourly_vals_from_PIno(PI_Q1, ts1[0], ts1[-1]),\
                    sq.fetch_hourly_vals_from_PIno(PI_Q1, ts2[0], ts2[-1])])
        df['Q2']=np.concatenate([sq.fetch_hourly_vals_from_PIno(PI_Q2, ts1[0], ts1[-1]),\
                    sq.fetch_hourly_vals_from_PIno(PI_Q2, ts2[0], ts2[-1])])
        df['Q'] = df['Q1']+df['Q2']    
    else:
        PI_Q = PI_Q_dict[station]
        df['Q']=np.concatenate([sq.fetch_hourly_vals_from_PIno(PI_Q, ts1[0], ts1[-1]),\
                    sq.fetch_hourly_vals_from_PIno(PI_Q, ts2[0], ts2[-1])])
    
    PI_T_sup = PI_T_sup_dict[station]
    df['T_sup']=np.concatenate([sq.fetch_hourly_vals_from_PIno(PI_T_sup, ts1[0], \
                    ts1[-1]),sq.fetch_hourly_vals_from_PIno(PI_T_sup, ts2[0], ts2[-1])])    
    PI_T_ret = PI_T_ret_dict[station]
    df['T_ret']=np.concatenate([sq.fetch_hourly_vals_from_PIno(PI_T_ret, ts1[0], \
                    ts1[-1]),sq.fetch_hourly_vals_from_PIno(PI_T_ret, ts2[0], ts2[-1])]) 
    df['ts'] = all_ts
    df['cons'] = specific_heat_water*density_water*df['Q']*(df['T_sup']-df['T_ret'])
    Tout1 = sq.fetch_BrabrandSydWeather('Tout', ts1[0], ts1[-1])
    Tout2 = sq.fetch_BrabrandSydWeather('Tout', ts2[0], ts2[-1])
    Tout = np.concatenate([Tout1, Tout2])
    Tout_low_pass = [Tout[range(i-23,i+1)].mean() for i in range(len(Tout))]
    df['Toutsmooth'] = Tout_low_pass

    Tsup_vs_Tout(df, station)

   
    
    #%% fitting and testing consumption prediction
    fit_data, vali_data, test_data, all_data = load_cons_model_dfs(df)
    fit_y = fit_data['cons']
    columns = ['cons24hbefore', 'Tout24hdiff', 'vWind24hdiff', 'sunRad24hdiff']
    X = fit_data[columns]
    res = mlin_regression(fit_y,X, add_const=False)
    
    fiterr = res.fittedvalues - fit_y
    print "Errors fit period: ", rmse(fiterr), mae(fiterr), mape(fiterr, fit_y)
    
    vali_pred = linear_map(vali_data, res.params, columns)
    valierr = vali_pred - vali_data['cons']
    print "Errors validation period: ", rmse(valierr), mae(valierr), mape(valierr, vali_data['cons'])
    
    test_pred = linear_map(test_data, res.params, columns)
    testerr = test_pred - test_data['cons']
    print "Errors test period: ", rmse(testerr), mae(testerr), mape(testerr, test_data['cons'])
    
    plt.figure()
    
    ens_dfs = load_cons_model_ens_dfs(df)
    ens_preds = np.empty((len(ens_dfs[0]), len(ens_dfs)))
    for edf, i in zip(ens_dfs, range(len(ens_dfs))):
        ens_pred = linear_map(edf, res.params, columns)
        ens_preds[:,i] = ens_pred
        plt.plot_date(all_data.index, ens_pred, 'grey', lw=0.5)
        
    ens_preds = pd.DataFrame(ens_preds, index=all_data.index)
    plt.plot_date(all_data.index, all_data['cons'], 'k-', lw=2)
    plt.plot_date(all_data.index, np.concatenate([res.fittedvalues, vali_pred, test_pred]), 'r-', lw=2)
    plt.title(station + ' forecasts of consumption')
    nonfit_errors = pd.concat([valierr, testerr])
    
    all_pred = np.concatenate([res.fittedvalues, vali_pred, test_pred])
    all_pred = pd.Series(all_pred, index=all_data.index)
    print res.summary()
    
    #%% 
    TminofTout_fun = get_TminofTout_func(df, station, frac_below = 0.005)    

    sim_input = df.ix[all_data.index]
    sim_input['T_ret1hbefore'] = np.roll(sim_input['T_ret'], 1)
    sim_input['cons_pred'] = all_pred
    
    
    
    sc2_errormargin = pd.Series(no_sigma*np.ones(len(sim_input))*nonfit_errors.std(), index=sim_input.index)
    
    nonfit_ts_start = vali_data.index[0]
    nonfit_ts_end = test_data.index[-1]
    
    quantile_sc2 = 1. - percent_above_forecasterrormargin(\
                    sc2_errormargin.loc[nonfit_ts_start:nonfit_ts_end], \
                    sim_input.loc[nonfit_ts_start:nonfit_ts_end, 'cons_pred'], \
                    sim_input.loc[nonfit_ts_start:nonfit_ts_end,'cons'])
    sc3_model_uncert = model_based_uncertainty_alaGorm(\
                            ens_preds.loc[nonfit_ts_start:nonfit_ts_end], \
                            sim_input.loc[nonfit_ts_start:nonfit_ts_end, 'cons_pred'], \
                            sim_input.loc[nonfit_ts_start:nonfit_ts_end, 'cons'], no_sigma, quantile_sc2)    
    sc3_errormargin = pd.Series(no_sigma*ens_preds.std(axis=1) + sc3_model_uncert,  index=sim_input.index)

    sig_m = model_based_sigma_alaChi2(ens_preds.loc[nonfit_ts_start:nonfit_ts_end], sim_input.loc[nonfit_ts_start:nonfit_ts_end, 'cons_pred'], sim_input.loc[nonfit_ts_start:nonfit_ts_end, 'cons'])    
    sig_t = np.sqrt(ens_preds.std(axis=1)**2+sig_m**2)
    sc35scale = total_uncertainty_scale_alaChi2(\
                                ens_preds.loc[nonfit_ts_start:nonfit_ts_end],\
                                sim_input.loc[nonfit_ts_start:nonfit_ts_end, 'cons_pred'],\
                                sim_input.loc[nonfit_ts_start:nonfit_ts_end, 'cons'],\
                                quantile_sc2)    
    print sig_m    
    #sc35_errormargin = pd.Series(no_sigma*np.sqrt(ens_preds.std(axis=1)**2+sig_m**2), index=sim_input.index)
    sc35_errormargin = pd.Series(sc35scale*sig_t, index=sim_input.index)
    
        
    use_sc35 = False
    if use_sc35:
        sc3_errormargin = sc35_errormargin
     
    sim_results_sc2 = simulate_operation(sim_input, sc2_errormargin, TminofTout_fun, station)
    sim_results_sc3 = simulate_operation(sim_input, sc3_errormargin, TminofTout_fun, station)    
    
    #%% synthetic consumption, controlled variable model uncertainty
    
    model_stds = [0.5*sim_input['cons'].std(), 0.1*sim_input['cons'].std(), 0.05*sim_input['cons'].std()]# sim_input['cons'].std()*np.linspace(0,1,10)
    sc2_synth_results = []
    sc3_synth_results = []
    model_uncerts = []
    for model_std in model_stds:
        synth_cons = gen_synthetic_cons(ens_preds, sim_input['cons_pred'], model_std)
        sim_input_synth = sim_input.copy(deep=True)
        sim_input_synth['cons'] = synth_cons
        synth_resid = sim_input_synth.loc[nonfit_ts_start:nonfit_ts_end, 'cons_pred'] - sim_input_synth.loc[nonfit_ts_start:nonfit_ts_end, 'cons']
        sc2_errormargin_synth = pd.Series(no_sigma*np.ones(len(sim_input_synth))*synth_resid.std(), index=sim_input_synth.index)
        quantile_sc2_synth = 1. - percent_above_forecasterrormargin(\
                        sc2_errormargin_synth.loc[nonfit_ts_start:nonfit_ts_end], \
                        sim_input_synth.loc[nonfit_ts_start:nonfit_ts_end, 'cons_pred'], \
                        sim_input_synth.loc[nonfit_ts_start:nonfit_ts_end,'cons'])
        print "Sc2 q: ", quantile_sc2_synth
        sc3_model_uncert_synth = model_based_uncertainty_alaGorm(\
                                ens_preds.loc[nonfit_ts_start:nonfit_ts_end], \
                                sim_input_synth.loc[nonfit_ts_start:nonfit_ts_end, 'cons_pred'], \
                                sim_input_synth.loc[nonfit_ts_start:nonfit_ts_end, 'cons'], no_sigma, quantile_sc2_synth)
        model_uncerts.append(sc3_model_uncert_synth)
        sc3_errormargin_synth = pd.Series(no_sigma*ens_preds.std(axis=1) + sc3_model_uncert_synth,  index=sim_input_synth.index)
    
        sim_results_sc2_synth = simulate_operation(sim_input_synth, sc2_errormargin_synth, TminofTout_fun, station)
        sim_results_sc3_synth = simulate_operation(sim_input_synth, sc3_errormargin_synth, TminofTout_fun, station)
        sc2_synth_results.append(sim_results_sc2_synth)
        sc3_synth_results.append(sim_results_sc3_synth)

    mean_Tsupdiff = []
    mean_heatlossreduced = []
    for sc2_res, sc3_res in zip(sc2_synth_results, sc3_synth_results):
        mean_Tsupdiff.append(np.mean(sc2_res['T_sup'] - sc3_res['T_sup']))
        mean_heatlossreduced.append(np.mean(100*(1-(sc3_res['T_sup']-T_grnd)/(sc2_res['T_sup'] - T_grnd))))
        
    plt.figure()
    plt.plot(model_uncerts, mean_Tsupdiff, 'k.')
    plt.title('Mean temp reduction vs model uncert.')
        
    print "Perc above errormargin, sc2: ", percent_above_forecasterrormargin(\
                    sc2_errormargin.loc[nonfit_ts_start:nonfit_ts_end], \
                    sim_input.loc[nonfit_ts_start:nonfit_ts_end, 'cons_pred'], \
                    sim_input.loc[nonfit_ts_start:nonfit_ts_end,'cons'])
    print "Perc above errormargin, sc3: ", percent_above_forecasterrormargin(sc3_errormargin.loc[nonfit_ts_start:nonfit_ts_end], \
                    sim_input.loc[nonfit_ts_start:nonfit_ts_end, 'cons_pred'], \
                    sim_input.loc[nonfit_ts_start:nonfit_ts_end,'cons'])
    print "mean errormargin, sc2: ", sc2_errormargin.mean()
    print "mean errormargin, sc3: ", sc3_errormargin.mean()
    print "rms errormargin, sc2: ", rmse(sc2_errormargin)
    print "rms errormargin, sc3: ", rmse(sc3_errormargin)
    
    print "Synth Perc above errormargin, sc2: ", percent_above_forecasterrormargin(\
                    sc2_errormargin_synth.loc[nonfit_ts_start:nonfit_ts_end], \
                    sim_input_synth.loc[nonfit_ts_start:nonfit_ts_end, 'cons_pred'], \
                    sim_input_synth.loc[nonfit_ts_start:nonfit_ts_end,'cons'])
    print "Synth  Perc above errormargin, sc3: ", percent_above_forecasterrormargin(sc3_errormargin_synth.loc[nonfit_ts_start:nonfit_ts_end], \
                    sim_input_synth.loc[nonfit_ts_start:nonfit_ts_end, 'cons_pred'], \
                    sim_input_synth.loc[nonfit_ts_start:nonfit_ts_end,'cons'])
    print "Synth mean errormargin, sc2: ", sc2_errormargin_synth.mean()
    print "Synth mean errormargin, sc3: ", sc3_errormargin_synth.mean()
    print "Synth rms errormargin, sc2: ", rmse(sc2_errormargin_synth)
    print "Synth rms errormargin, sc3: ", rmse(sc3_errormargin_synth)

    
    #% error margins:
    fig_error_margins(sc2_errormargin, sc3_errormargin, sim_input, sc3_model_uncert, station, no_sigma)
    fig_error_margins(sc2_errormargin_synth, sc3_errormargin_synth, sim_input_synth, sc3_model_uncert_synth, station, no_sigma)
    
    sns.jointplot(np.abs(nonfit_errors), ens_preds.loc[nonfit_ts_start:nonfit_ts_end].std(axis=1))
    sns.jointplot(np.abs(synth_resid), ens_preds.loc[nonfit_ts_start:nonfit_ts_end].std(axis=1))


    #% T Q scatter plots
    fig, axes = plt.subplots(3,1, figsize=(10,16), sharex=True, sharey=True)
    axes[0].scatter(sim_input['T_sup'], sim_input['Q'], c=sim_input['cons'])
    axes[0].set_title(station + ': ' + 'Scenario 1')
    
    axes[1].scatter(sim_results_sc2['T_sup'], sim_results_sc2['Q'], c=sim_results_sc2['cons'])
    axes[1].set_title(station + ': Scenario 2: ' + str(no_sigma) + r'$\sigma$' )
    axes[2].scatter(sim_results_sc3['T_sup'], sim_results_sc3['Q'], c=sim_results_sc3['cons'])
    axes[2].set_title(station + ': Scenario 3: ' + str(no_sigma) + r'$\sigma$')
    axes[1].set_ylabel(u'Water flow rate [m%s/h]'%uni_tothethird, size=8)
    axes[2].set_xlabel(u'Supply temperature [%sC]'%uni_degree, size=8)
    fig.tight_layout()
    fig.savefig(figpath + 'TQscatter_%2.2f'%(no_sigma)  + 'sigma_' + station + '.pdf')

    # T_sup time series fig
    fig, axes = plt.subplots(3,1, figsize=(15,15), sharex=True)
    axes[0].plot_date(sim_input.index, sim_input['T_sup'], 'k-', label='Scenario 1')
    axes[0].plot_date(sim_input.index, sim_results_sc2['T_sup'], 'r-', lw=3, label='Scenario 2')
    axes[0].plot_date(sim_input.index, sim_results_sc2['T_sup'], 'g-', label='Scenario 3')
    axes[0].set_title(station + ', ' + str(no_sigma) + r'$\sigma$' + ': Supply temperature')
    axes[0].set_ylabel(u'Supply temperature [%sC]'%uni_degree, size=8)    
    axes[0].legend()
    axes[1].plot_date(sim_input.index, sim_input['Q'], 'k-', label='Scenario 1' )
    axes[1].plot_date(sim_input.index, sim_results_sc2['Q'], 'r-', label='Scenario 2')
    axes[1].plot_date(sim_input.index, sim_results_sc2['Q_ref'], 'b-', lw=1, label=r'$Q_{ref}$' + 'Scenario 2')
    axes[1].set_ylabel(u'Water flow rate [m%s/h]'%uni_tothethird, size=8)
    axes[1].legend()
    axes[2].plot_date(sim_input.index, sim_input['Q'], 'k-', label='Scenario 1' )
    axes[2].plot_date(sim_input.index, sim_results_sc3['Q'], 'g-', label='Scenario 3')
    axes[2].plot_date(sim_input.index, sim_results_sc3['Q_ref'], 'b-', lw=1, label=r'$Q_{ref}$' + 'Scenario 3')
    axes[2].set_ylabel(u'Water flow rate [m%s/h]'%uni_tothethird, size=8)
    axes[2].legend()
    fig.savefig(figpath + 'TQtimeseries_%2.2f'%(no_sigma) + 'sigma_' + station + '.pdf')
    
    # Differencen in supply temperature between the scenarios
    fig_heat_loss(sim_input, sim_results_sc2, sim_results_sc3, station, no_sigma)
    fig_heat_loss(sim_input_synth, sim_results_sc2_synth, sim_results_sc3_synth, station, no_sigma, save=False)
        
    
    return 
    
    #%% The below section only runs if we view Tmin as a function of Q (the old way)
    # note: SOME OF THIS USES CONSTANT TRET!!
    TminofQ = False
    if TminofQ:    
        # outlierdetection
        X = df[['T_sup','Q']]
        outlier_detection = False
        if outlier_detection: 
            detect_outliers(X, station)
        else:
            inlierpred = np.ones(len(df), dtype=bool)
              
    
        fig, ax1 = plt.subplots()
        ax2 = ax1.twinx()
        cond_df = df
        ax1.plot_date(np.array(cond_df['ts']), np.array(cond_df['Q']), 'b')
        
        ax2.plot_date(np.array(cond_df['ts']), np.array(cond_df['T_sup']), 'r-')
        
        plt.figure()
        plt.plot_date(df['ts'], df['cons'], 'g-')
        plt.title(station)
        
        plt.figure()
        plt.scatter(df['T_sup'], df['Q'], c=df['cons'], alpha=0.25)
        plt.colorbar()
        plt.title(station)        
        
        outliers = df[np.logical_not(inlierpred)]
    
        plt.plot(np.array(outliers['T_sup']), np.array(outliers['Q']), 'ko')
        
      
        #%%
        #plot_Tmin_Q_quantiles(df, inlierpred)
        Q = np.linspace(df[inlierpred]['Q'].min(), df[inlierpred]['Q'].max(), 500)
        qs = [0.001, 0.005, 0.01, 0.02275, 0.05, 0.1]
        for q in qs:
            T_min_func, Q_quantiles = get_Tmin_func(df[inlierpred],T_min_q=q, N_Q_q=21)
            plt.plot(T_min_func(Q), Q, label=str(q), lw=2)
        plt.legend()
        for Q_qua in Q_quantiles:
            plt.axhline(y=Q_qua)
            
        #%% P vs Q (T=Tmin(Q))      
        T_min_func, Q_quantiles = get_Tmin_func(df, T_min_q=0.02275, N_Q_q=21)
        
        plt.figure()
        plt.plot(Q, T_min_func(Q), 'r', label='Tmin')
        P = specific_heat_water*density_water*Q*(T_min_func(Q)-T_ret)   
        plt.plot(Q, P, 'b', label='Cons')
        plt.xlabel('Q')
        plt.legend()
        
        
        plt.figure()
        simP = df['cons']
        res = [op_model(cons, T_min_func, Q_max=Q_max_dict[station], T_ret=T_ret) for cons in simP]
        simT, simQ = zip(*res)
        plt.scatter(df['T_sup'], df['Q'], c='k', alpha=0.1)
        plt.scatter(simT,simQ,c=simP)
        plt.colorbar()
Exemple #34
0
                                            pointcode=71699)
            df['%s%ihdiff' % (v, timeshift)] = ens_mean - ens_mean_before

    return df


reload_data = True
if reload_data:
    timelags = [48, 60, 168]
    all_data = gen_fit_df(dt.datetime(2016, 1, 26, 1),
                          dt.datetime(2016, 4, 1, 0),
                          ['Tout', 'vWind', 'hum', 'sunRad'], timelags)
y = all_data['prod']

#%%
ts = ens.gen_hourly_timesteps(dt.datetime(2016, 1, 26, 1),
                              dt.datetime(2016, 4, 1, 0))
X = all_data.ix[:, all_data.columns != 'prod']

X.to_pickle('48h60h168h_lagged_X.pkl')
y.to_pickle('prod_to_gowith.pkl')

#%%
lr = linear_model.LinearRegression(fit_intercept=False)

predicted = cross_val_predict(lr, X, y, cv=25)
plt.figure()
plt.plot(y)
plt.plot(predicted, 'r')
sns.jointplot(pd.Series(predicted), y)
score = cross_val_score(lr, X, y, cv=25, scoring='mean_absolute_error')
def hoerning_pump_model(): # figure 4
    # simple model
    T1 = 68.5
    a2 = 15.5
    a3 = 2.1
    b2 = 295-a2*T1
    b3 = 340-a3*71.4
    
    def Q_from_cons_lin_piece(cons, a, b):
        B = -(b+a*T_ret)/a
        C = -cons/(specific_heat_water*density_water)
        A = 1/a
    
        Qplus = (-B+np.sqrt(B**2 - 4*A*C))/(2*A)
        
        return Qplus
    
    def get_Tsup_and_Q(cons, Q_ub):
        # try lowes possible T    
        Q = cons/(specific_heat_water*density_water*(T1 - T_ret))
        if Q <= 295:
            return T1, Q
        elif Q > 295:
            Q = Q_from_cons_lin_piece(cons, a2, b2)
            if Q <= Q_ub*(340./360):
                T = (Q - b2)/a2  
                return T, Q
            elif Q >= Q_ub*(340./360):
                b3_adjusted = b3 + (Q_ub*(340./360) - 340)
                Q = Q_from_cons_lin_piece(cons, a3, b3_adjusted)
                if Q <= Q_ub:
                    T = (Q - b3_adjusted)/a3
                    return T, Q
                elif Q > Q_ub:
                    Q = Q_ub
                    T = cons/(specific_heat_water*density_water*Q) + T_ret
                    return T, Q
                
    plt.close('all')

    fig, [ax1, ax2] = plt.subplots(2,1,sharex=True, sharey=True)
    ts1 = ens.gen_hourly_timesteps(dt.datetime(2015,12,17,1), dt.datetime(2016,1,15,0))
    ts2 = ens.gen_hourly_timesteps(dt.datetime(2016,1,20,1), dt.datetime(2016,2,5,0))
    all_ts = ts1 + ts2
    PI_T_sup = '4.146.120.29.HA.101'
    PI_Q = 'K.146A.181.02.HA.101'
    specific_heat_water = 1.17e-6 # MWh/kgKelvin
    density_water = 980 # kg/m3 at 65 deg C
    T_ret = 36.5
    df = pd.DataFrame()
    df['T_sup']=np.concatenate([sq.fetch_hourly_vals_from_PIno(PI_T_sup, ts1[0], \
            ts1[-1]),sq.fetch_hourly_vals_from_PIno(PI_T_sup, ts2[0], ts2[-1])])
    df['Q']=np.concatenate([sq.fetch_hourly_vals_from_PIno(PI_Q, ts1[0], ts1[-1]),\
            sq.fetch_hourly_vals_from_PIno(PI_Q, ts2[0], ts2[-1])])
    df['ts'] = all_ts
    df['cons'] = specific_heat_water*density_water*df['Q']*(df['T_sup']-T_ret)
    
    
    model_conf_int = np.load('combined_conf_int.npz')['combined_conf_int']
    assert(list(np.load('combined_conf_int.npz')['timesteps'])==all_ts), "confidence intervals don't have matching time steps"
    const_Q_ub = 360
    Q_const_cap = []
    T_sup_const_cap = []
    Q_dyn_cap = []
    T_sup_dyn_cap = []
    dyn_Q_ub = []
    for c, model_uncertainty in zip(df['cons'], model_conf_int):
        T_const, Q_const = get_Tsup_and_Q(c, const_Q_ub)
        Q_const_cap.append(Q_const)
        T_sup_const_cap.append(T_const)

        Q_ub = 410 - (410-const_Q_ub)*(model_uncertainty/np.max(model_conf_int))
        dyn_Q_ub.append(Q_ub)
        T_dyn, Q_dyn = get_Tsup_and_Q(c, Q_ub)
        Q_dyn_cap.append(Q_dyn)
        T_sup_dyn_cap.append(T_dyn)
        
    
    dT=0.1
    ax1.fill_between([65+dT,95-dT], [410, 410], [360, 360], facecolor=red, alpha=0.25)
    ax1.fill_between([65+dT,95-dT], [360, 360],[340, 340], facecolor=yellow, alpha=0.25)
    ax1.fill_between([T1, 71.4, 80.9, 100], [295, 340, 360, 360], color='k', edgecolor='k', alpha=0.2, linewidth=1)
    ax2.fill_between([65+dT,95-dT], [410, 410], [360, 360], facecolor=red, alpha=0.25)
    ax2.fill_between([65+dT,95-dT], [360, 360],[340, 340], facecolor=yellow, alpha=0.25)
    ax2.fill_between([T1, 71.4, 80.9, 100], [295, 340, 360, 360], color='k', edgecolor='k', alpha=0.2, linewidth=1)
    ax1.plot([65+dT,95-dT], [410, 410], '--', c=red, lw=2)
    ax1.text(79,415, 'Maximum pump capacity', size=8)
    #im = ax1.scatter(T_sup_const_cap, Q_const_cap, facecolors='none', cmap=plt.cm.BuPu)
    im = ax1.scatter(T_sup_const_cap, Q_const_cap, c=df['cons'], cmap=plt.cm.BuPu)

    ax2.scatter(T_sup_dyn_cap, Q_dyn_cap, c=df['cons'], cmap=plt.cm.BuPu)
    ax2.plot([65+dT,95-dT], [410, 410], '--', c=red, lw=2)
    ax2.text(79,415, 'Maximum pump capacity', size=8)
    
    cax, kw = mpl.colorbar.make_axes([ax1, ax2])
    fig.colorbar(im, cax=cax)
    cax.set_ylabel('Delivered heat [MW]',size=8)

    ax2.set_xlabel(u'Supply temperature [%sC]'%uni_degree, size=8)
    ax1.set_ylabel(u'Water flow rate [m%s/h]'%uni_tothethird, size=8)
    ax2.set_ylabel(u'Water flow rate [m%s/h]'%uni_tothethird, size=8)
    ax1.tick_params(axis='both', which='major', labelsize=8)
    ax2.tick_params(axis='both', which='major', labelsize=8)
    cax.tick_params(axis='y', which='major', labelsize=8)
    ax1.set_title('Scenario 1', size=10)
    ax2.set_title('Scenario 2', size=10)

    ax1.set_xlim((65,95))
    ax1.set_ylim((150,450))
    
    
    fig.set_size_inches(1.15*colwidth,1.6*colwidth)

    fig.savefig('figures/first_articlefigs/hoerning_pump_model.pdf')
    
    # This is a theoretical calculation in case the model uncertainty was 50% of what it is
    statistical_conf_int = 50.90285 # this number is printed when production_model() is run (Width of const blue band (MW) ...)    
    Q_dyn_cap_half_model_unc = []
    T_sup_dyn_cap_half_model_unc = []
    dyn_Q_ub_half_model_unc = []
    reduced_model_conf_int =  model_conf_int-0.5*statistical_conf_int
    for c, model_uncertainty in zip(df['cons'], reduced_model_conf_int):
        Q_ub = 410 - (410-const_Q_ub)*(model_uncertainty/np.max(model_conf_int))
        dyn_Q_ub_half_model_unc.append(Q_ub)
        T_dyn, Q_dyn = get_Tsup_and_Q(c, Q_ub)
        Q_dyn_cap_half_model_unc.append(Q_dyn)
        T_sup_dyn_cap_half_model_unc.append(T_dyn)
            
    
    return T_sup_const_cap, T_sup_dyn_cap, Q_const_cap, Q_dyn_cap, model_conf_int, T_sup_dyn_cap_half_model_unc
# -*- coding: utf-8 -*-
"""
Created on Thu Jul 28 11:04:19 2016

@author: azfv1n8
"""
import datetime as dt
import numpy as np
import pandas as pd

import ensemble_tools as ens
import sql_tools as sq
from model_selection import linear_map, mlin_regression, gen_all_combinations, summary_to_file, mae, mape, rmse

#%%
fit_ts = ens.gen_hourly_timesteps(dt.datetime(2015, 12, 17, 1),
                                  dt.datetime(2016, 1, 15, 0))
vali_ts = ens.gen_hourly_timesteps(dt.datetime(2016, 1, 20, 1),
                                   dt.datetime(2016, 2, 5, 0))
test_ts = ens.gen_hourly_timesteps(dt.datetime(2016, 2, 5, 1),
                                   dt.datetime(2016, 3, 1, 0))

all_ts = fit_ts + vali_ts + test_ts

weathervars = ['Tout', 'vWind', 'sunRad', 'hum']

fit_data = pd.DataFrame()
vali_data = pd.DataFrame()
test_data = pd.DataFrame()

fit_data['prod24h_before'] = sq.fetch_production(
    fit_ts[0] + dt.timedelta(days=-1), fit_ts[-1] + dt.timedelta(days=-1))
def Q_T_heatloss_timeseries(): # figure 5
    T_sup_const_cap, T_sup_dyn_cap, Q_const_cap, Q_dyn_cap, model_conf_int, T_sup_dyn_cap_half_model_unc = hoerning_pump_model()
    plt.close('all')
    
    fig, [ax1, ax2, ax3] = plt.subplots(3, 1, sharex=True, figsize=(dcolwidth, 0.65*dcolwidth), gridspec_kw={'height_ratios':[3,1,1]})
    #fig, [ax1, ax2, ax3] = plt.subplots(3, 1, sharex=True, figsize=(dcolwidth, 0.55*dcolwidth))
    
    ts1 = ens.gen_hourly_timesteps(dt.datetime(2015,12,17,1), dt.datetime(2016,1,15,0))
    ts2 = ens.gen_hourly_timesteps(dt.datetime(2016,1,20,1), dt.datetime(2016,2,5,0))
    
    
    red_area_lb1 = 410 - (410-360)*(model_conf_int[0:len(ts1)]/np.max(model_conf_int))
    red_area_lb2 = 410 - (410-360)*(model_conf_int[len(ts1):]/np.max(model_conf_int))
    yellow_area_lb1 = (340./360)*red_area_lb1
    yellow_area_lb2 = (340./360)*red_area_lb2
    limlw = 0.75
    ax1.plot_date(ts1, red_area_lb1, '-', c=darkgrey, lw=limlw, label='Scenario 2 security margins')
    ax1.plot_date(ts2, red_area_lb2, '-', c=darkgrey, lw=limlw)
    ax1.plot_date(ts1, yellow_area_lb1, '-', c=darkgrey, lw=limlw)
    ax1.plot_date(ts2, yellow_area_lb2, '-', c=darkgrey, lw=limlw)
    ax1.fill_between(ts1, 360*np.ones(len(ts1)), 410*np.ones(len(ts1)), facecolor=red, alpha=0.25)
    ax1.fill_between(ts2, 360*np.ones(len(ts2)), 410*np.ones(len(ts2)), facecolor=red, alpha=0.25)
    ax1.fill_between(ts1, 340*np.ones(len(ts1)), 360*np.ones(len(ts1)), facecolor=yellow, alpha=0.25)
    ax1.fill_between(ts2, 340*np.ones(len(ts2)), 360*np.ones(len(ts2)), facecolor=yellow, alpha=0.25)      
    ax1.plot_date(ts1, Q_const_cap[0:len(ts1)], '-', c=red, label='Scenario 1')
    ax1.plot_date(ts2, Q_const_cap[len(ts1):], '-', c=red)
    ax1.plot_date(ts1, Q_dyn_cap[0:len(ts1)], '-', c=green, lw=1, label='Scenario 2')
    ax1.plot_date(ts2, Q_dyn_cap[len(ts1):], '-', c=green, lw=1)    
    ax1.plot_date(ts1+ts2, 410*np.ones(len(ts1+ts2)), '--', c=red, lw=1)
    handles, labels = ax1.get_legend_handles_labels()
    hl = sorted(zip(handles, labels), key=operator.itemgetter(1))
    handles2, labels2 = zip(*hl)

    ax1.legend(handles2, labels2, loc=0, prop={'size':8})


    ax2.plot_date(ts1, T_sup_const_cap[0:len(ts1)], '-', c=red, label='Scenario 1')
    ax2.plot_date(ts2, T_sup_const_cap[len(ts1):], '-', c=red)
    ax2.plot_date(ts1, T_sup_dyn_cap[0:len(ts1)], '-', c=green, lw=1, label='Scenario 2')
    ax2.plot_date(ts2, T_sup_dyn_cap[len(ts1):], '-', c=green, lw=1)
    ax2.legend(loc=6, prop={'size':8})
   
    T_grnd = 6.4
    heat_loss_reduction = 100*(1 - (np.array(T_sup_dyn_cap) - T_grnd)/(np.array(T_sup_const_cap) - T_grnd))
    heat_loss_reduction_half_model_unc = 100*(1 - (np.array(T_sup_dyn_cap_half_model_unc) - T_grnd)/(np.array(T_sup_const_cap) - T_grnd))

    redu_heat_loss1 = heat_loss_reduction[0:len(ts1)]
    redu_heat_loss2 = heat_loss_reduction[len(ts1):]
    ax3.plot_date(ts1, redu_heat_loss1, '-', c=blue, lw=1)
    ax3.plot_date(ts2, redu_heat_loss2, '-', c=blue, lw=1)
    ax3.xaxis.set_major_formatter(DateFormatter('%b %d \n %Y') )
    ax1.tick_params(axis='y', which='major', labelsize=8)
    ax1.set_ylim(150,450)
    ax2.tick_params(axis='y', which='major', labelsize=8)
    ax3.tick_params(axis='y', which='major', labelsize=8)
    ax1.set_ylabel(u'Flow rate  [m%s/h]'%uni_tothethird, size=8)
    ax2.set_ylabel(u'Supply\ntemperature [%sC]'%uni_degree, size=8)
    ax3.set_ylabel('Heat loss\nreduction [%]', size=8)
    
    mjloc = mpl.ticker.MultipleLocator(1)
    ax3.yaxis.set_major_locator(mjloc)
    ax3.set_xlim(dt.datetime(2015,12,17,0), dt.datetime(2016,2,5,0))
    fig.tight_layout()
    
    fig.savefig('Q:/Projekter/Ens Article 1/figures/Q_T_heatloss_timeseries.pdf')
    
    return heat_loss_reduction, heat_loss_reduction_half_model_unc
    
def second_ens_prod_fig():
    """ This plot is based on a production model taking into account:
        the production 24 hours before as well as the change in
        temparature, windspeed and solar radiotion from 24 hours ago to now.
        
        """

    plt.close('all')
    cols = ['prod24h_before', 'Tout24hdiff', 'vWind24hdiff', 'sunRad24hdiff']

    ts1 = ens.gen_hourly_timesteps(dt.datetime(2015, 12, 17, 1),
                                   dt.datetime(2016, 1, 15, 0))
    ts2 = ens.gen_hourly_timesteps(dt.datetime(2016, 1, 20, 1),
                                   dt.datetime(2016, 2, 5, 0))

    #load the data
    fit_data = ens.repack_ens_mean_as_df()
    fit_data['prod24h_before'] = sq.fetch_production(
        ts1[0] + dt.timedelta(days=-1), ts1[-1] + dt.timedelta(days=-1))

    fit_data['Tout24hdiff'] = fit_data['Tout'] \
                                - ens.load_ens_timeseries_as_df(\
                                    ts_start=ts1[0]+dt.timedelta(days=-1),\
                                    ts_end=ts1[-1]+dt.timedelta(days=-1), \
                                    weathervars=['Tout']).mean(axis=1)
    fit_data['vWind24hdiff'] = fit_data['vWind'] \
                                - ens.load_ens_timeseries_as_df(\
                                    ts_start=ts1[0]+dt.timedelta(days=-1),\
                                    ts_end=ts1[-1]+dt.timedelta(days=-1), \
                                    weathervars=['vWind']).mean(axis=1)
    fit_data['sunRad24hdiff'] = fit_data['sunRad'] \
                                - ens.load_ens_timeseries_as_df(\
                                    ts_start=ts1[0]+dt.timedelta(days=-1),\
                                    ts_end=ts1[-1]+dt.timedelta(days=-1), \
                                    weathervars=['sunRad']).mean(axis=1)

    vali_data = ens.repack_ens_mean_as_df(ts2[0], ts2[-1])
    vali_data['prod24h_before'] = sq.fetch_production(
        ts2[0] + dt.timedelta(days=-1), ts2[-1] + dt.timedelta(days=-1))
    vali_data['Tout24hdiff'] = vali_data['Tout'] \
                                - ens.load_ens_timeseries_as_df(\
                                    ts_start=ts2[0]+dt.timedelta(days=-1),\
                                    ts_end=ts2[-1]+dt.timedelta(days=-1), \
                                    weathervars=['Tout']).mean(axis=1)
    vali_data['vWind24hdiff'] = vali_data['vWind'] \
                                - ens.load_ens_timeseries_as_df(\
                                    ts_start=ts2[0]+dt.timedelta(days=-1),\
                                    ts_end=ts2[-1]+dt.timedelta(days=-1), \
                                    weathervars=['vWind']).mean(axis=1)
    vali_data['sunRad24hdiff'] = vali_data['sunRad'] \
                                - ens.load_ens_timeseries_as_df(\
                                    ts_start=ts2[0]+dt.timedelta(days=-1),\
                                    ts_end=ts2[-1]+dt.timedelta(days=-1), \
                                    weathervars=['sunRad']).mean(axis=1)

    # correct error in production:
    new_val = (vali_data['prod'][116] + vali_data['prod'][116]) / 2
    vali_data['prod'][116] = new_val
    vali_data['prod'][117] = new_val
    vali_data['prod24h_before'][116 + 24] = new_val
    vali_data['prod24h_before'][117 + 24] = new_val

    # do the fit
    X = fit_data[cols]
    y = fit_data['prod']
    res = mlin_regression(y, X, add_const=False)

    fig, [ax1, ax2] = plt.subplots(2, 1, figsize=(40, 20))

    # load ensemble data
    ens_data1 = ens.load_ens_timeseries_as_df(ts_start=ts1[0], ts_end=ts1[-1],\
                                             weathervars=['Tout', 'vWind', 'sunRad'])
    ens_data1['prod24h_before'] = fit_data['prod24h_before']
    ens_data1_24h_before =  ens.load_ens_timeseries_as_df(\
                                    ts_start=ts1[0]+dt.timedelta(days=-1),\
                                    ts_end=ts1[-1]+dt.timedelta(days=-1), \
                                    weathervars=['Tout', 'vWind', 'sunRad'])
    ens_data2 = ens.load_ens_timeseries_as_df(ts_start=ts2[0], ts_end=ts2[-1],\
                                             weathervars=['Tout', 'vWind', 'sunRad'])
    ens_data2['prod24h_before'] = vali_data['prod24h_before']
    ens_data2_24h_before = ens.load_ens_timeseries_as_df(\
                                    ts_start=ts2[0]+dt.timedelta(days=-1),\
                                    ts_end=ts2[-1]+dt.timedelta(days=-1), \
                                    weathervars=['Tout', 'vWind', 'sunRad'])
    for i in range(25):
        for v in ['Tout', 'vWind', 'sunRad']:
            key_raw = v + str(i)
            key_diff = v + '24hdiff' + str(i)
            ens_data1[
                key_diff] = ens_data1[key_raw] - ens_data1_24h_before[key_raw]
            ens_data2[
                key_diff] = ens_data2[key_raw] - ens_data2_24h_before[key_raw]

    all_ens_data = pd.concat([ens_data1, ens_data2])
    all_ts = ts1 + ts2
    #
    #
    # calculate production for each ensemble member
    ens_prods = np.zeros((len(all_ts), 25))
    for i in range(25):
        ens_cols = ['Tout24hdiff' + str(i), 'vWind24hdiff' + str(i),\
                    'sunRad24hdiff' + str(i), 'prod24h_before']
        ens_params = pd.Series({
            'Tout24hdiff' + str(i):
            res.params['Tout24hdiff'],
            'vWind24hdiff' + str(i):
            res.params['vWind24hdiff'],
            'sunRad24hdiff' + str(i):
            res.params['sunRad24hdiff'],
            'prod24h_before':
            res.params['prod24h_before']
        })
        ens_prods[:, i] = linear_map(all_ens_data, ens_params, ens_cols)

    # calculate combined confint
    ens_std = ens_prods.std(axis=1)
    vali_resid = linear_map(vali_data, res.params, cols) - vali_data['prod']
    vali_resid_corrig = vali_resid - np.sign(
        vali_resid) * 1.9599 * ens_std[len(ts1):]
    mean_conf_int_spread = (vali_resid_corrig.quantile(0.95) -
                            vali_resid_corrig.quantile(0.05)) / 2

    combined_conf_int = mean_conf_int_spread + 1.9599 * ens_std
    all_prod_model = np.concatenate(
        [res.fittedvalues,
         linear_map(vali_data, res.params, cols)])
    combined_ub95 = all_prod_model + combined_conf_int
    combined_lb95 = all_prod_model - combined_conf_int

    # plot confint
    ax1.fill_between(all_ts,
                     combined_lb95,
                     combined_ub95,
                     label='Combined 95% conf. int.')
    ax1.fill_between(all_ts,
                     all_prod_model - 1.9599 * ens_std,
                     all_prod_model + 1.9599 * ens_std,
                     facecolor='grey',
                     label='Ensemble 95% conf. int.')

    # plot ensempble models
    ax1.plot_date(all_ts, ens_prods, '-', lw=0.5)

    ax1.plot_date(ts1, y, 'k-', lw=2, label='Actual production')
    ax1.plot_date(ts1,
                  res.fittedvalues,
                  'r-',
                  lw=2,
                  label='Model on ensemble mean')

    ax1.plot_date(ts2, vali_data['prod'], 'k-', lw=2, label='')
    ax1.plot_date(ts2, linear_map(vali_data, res.params, cols), 'r-', lw=2)
    ax1.set_ylabel('[MW]')
    ax1.legend(loc=2)
    ax1.set_ylim([0, 1100])

    ax2.plot_date(ts1, res.resid, '-', label='Residual, fitted data')
    ax2.plot_date(ts2, vali_resid, '-', label='Residual, validation data')
    ax2.set_ylabel('[MW]')
    ax2.legend(loc=2)
    ax2.set_ylim([-550, 550])
    print "MAE = " + str(mae(vali_resid))
    print "MAPE = " + str(mape(vali_resid, vali_data['prod']))
    print "RMSE = " + str(rmse(vali_resid))
    print "ME = " + str(np.mean(vali_resid))

    print "MAE (fit) = " + str(mae(res.resid))
    print "MAPE (fit) = " + str(mape(res.resid, fit_data['prod']))
    print "RMSE (fit)= " + str(rmse(res.resid))
    print "ME (fit)= " + str(np.mean(res.resid))

    plt.savefig('figures/ens_prod_models_v2.pdf', dpi=600)
    plt.figure()
    plt.plot_date(all_ts, ens_std)
    plt.ylabel('Std. of ensemble production models [MW]')
    plt.savefig('figures/std_ens_prod_models.pdf', dpi=600)
    #

    vali_ens_std = ens_std[len(ts1):]
    sns.jointplot(x=pd.Series(vali_ens_std), y=np.abs(vali_resid))
    sns.jointplot(x=vali_data['prod'],
                  y=pd.Series(linear_map(vali_data, res.params, cols)))

    EO3_fc1 = sq.fetch_EO3_midnight_forecast(ts1[0], ts1[-1])
    EO3_fc2 = sq.fetch_EO3_midnight_forecast(ts2[0], ts2[-1])
    plt.figure()
    plt.plot_date(ts1, fit_data['prod'], 'k-', label='Actual production')
    plt.plot_date(ts2, vali_data['prod'], 'k-')
    plt.plot_date(ts1, EO3_fc1, 'r-', label='EO3 forecast')
    plt.plot_date(ts2, EO3_fc2, 'r-')
    EO3_err = EO3_fc2 - vali_data['prod']
    EO3_err_fit = EO3_fc1 - fit_data['prod']
    print "MAE (EO3) = " + str(mae(EO3_err))
    print "MAPE (EO3) = " + str(mape(EO3_err, vali_data['prod']))
    print "RMSE (EO3)= " + str(rmse(EO3_err))
    print "ME (EO3)= " + str(np.mean(EO3_err))

    print "MAE (EO3_fit) = " + str(mae(EO3_err_fit))
    print "MAPE (EO3_fit) = " + str(mape(EO3_err_fit, fit_data['prod']))
    print "RMSE (EO3_fit)= " + str(rmse(EO3_err_fit))
    print "ME (EO3_fit)= " + str(np.mean(EO3_err_fit))

    sns.jointplot(x=pd.Series(vali_ens_std), y=np.abs(EO3_err))

    plt.figure(figsize=(20, 10))
    plt.subplot(2, 1, 1)
    plt.plot_date(all_ts, combined_conf_int / combined_conf_int.max(), '-')
    plt.ylabel('Model + ensemble uncertainty \n [normalized]')
    plt.ylim(0, 1)
    plt.subplot(2, 1, 2)
    plt.plot_date(all_ts,
                  (1 - 0.2 * combined_conf_int / combined_conf_int.max()),
                  '-',
                  label='Dynamic setpoint')
    plt.plot_date(all_ts,
                  0.8 * np.ones(len(all_ts)),
                  '--',
                  label='Static setpoint')
    plt.ylabel(
        'Setpoint for pump massflow \n temperature [fraction of max pump cap]')
    plt.legend()
    plt.ylim(.7, 1)
    plt.savefig('figures/setpoint.pdf')

    return vali_data, fit_data, res, ens_std, vali_resid
Exemple #39
0
"""

import pandas as pd
import datetime as dt
import numpy as np
from sklearn import linear_model
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from sklearn.cross_validation import cross_val_predict
from model_selection import gen_all_combinations, rmse, mae, mape
import sql_tools as sq
import ensemble_tools as ens

#%% SVR experinment

ts = ens.gen_hourly_timesteps(dt.datetime(2016, 1, 26, 1),
                              dt.datetime(2016, 4, 1, 0))
X = pd.read_pickle('48h60h168h_lagged_X.pkl'
                   )  # run model_selection_ext_horizon to generate these files
y = pd.read_pickle('prod_to_gowith.pkl')
# add more predictor data:

for v in ['Tout', 'vWind', 'hum', 'sunRad']:
    X[v] = ens.load_ens_mean_avail_at10_series(v,
                                               ts[0],
                                               ts[-1],
                                               pointcode=71699)


#X['weekdays'] = [t.weekday() for t in ts]
def h_hoursbefore(timestamp, h):
    return timestamp + dt.timedelta(hours=-h)
def most_recent_ens_timeseries(start_stop=(dt.datetime(2015, 12, 16, 0),
                                           dt.datetime(2016, 1, 19, 0)),
                               pointcode=71699,
                               shift_steno_one=False):
    """ star_stop can be a tupple with 2 date tim objects. The first
        is the first time step in the time series, the second is the last.
        
        """
    plt.close('all')
    ylabels = ['[$\degree $C]', '[m/s]', '[%]', '[W/m$^2$]']

    suffix = ''.join(['_geo', str(pointcode), '_', ens.timestamp_str(start_stop[0]), \
                        '_to_', ens.timestamp_str(start_stop[1]), '.npy'])
    timesteps = ens.gen_hourly_timesteps(start_stop[0], start_stop[1])

    Steno_data = np.load(
        'Q:/Weatherdata/Steno_weatherstation/Steno_hourly_2015120111_to_2016011800.npz'
    )
    Steno_Tvhs = Steno_data['Tout_vWind_hum_sunRad']
    Steno_timesteps = Steno_data['timesteps']

    for v, ylab in zip(weathervars, ylabels):
        plt.figure(figsize=(15, 20))
        plt.grid(True)
        plt.subplot(2, 1, 1)
        ens_data = np.load('time_series/' + v + suffix)
        BBSYD_measured = sq.fetch_BrabrandSydWeather(v, start_stop[0],
                                                     start_stop[1])
        Steno_measured = Steno_Tvhs[:, weathervars.index(v)]
        if shift_steno_one:
            Steno_measured = np.roll(Steno_measured, -1)

        if v == 'Tout':
            ens_data = ens.Kelvin_to_Celcius(ens_data)
        elif v == 'hum':
            ens_data = ens.frac_to_percent(ens_data)  # convert to percentage

        plt.plot_date(timesteps, ens_data, '-')

        plt.plot_date(timesteps,
                      BBSYD_measured,
                      'k-',
                      lw=2,
                      label='Measured: Brabrand Syd')
        plt.plot_date(Steno_timesteps,
                      Steno_measured,
                      'r-',
                      lw=2,
                      label='Measured: Steno Museum')
        plt.ylabel(ylab)
        plt.grid(True)
        plt.xlim(start_stop)
        plt.title(v)
        plt.legend()

        plt.subplot(2, 1, 2)
        plt.plot_date(timesteps,
                      ens.ensemble_std(ens_data),
                      '-',
                      label='Ensemble std')
        plt.plot_date(timesteps,
                      ens.ensemble_abs_spread(ens_data),
                      '-',
                      label='Max ensemble spread')
        plt.ylabel(ylab)
        plt.legend()
        plt.grid(True)
        plt.tight_layout()

        figfilename = v + '_most_recent_ens_timeseries.pdf'
        plt.savefig('figures/' + figfilename)
def first_ens_prod_fig():
    """ This plot is based on a production model taking into account:
        Tout, vWind and the production 24 hours before
        
        """

    plt.close('all')
    cols = ['Tout', 'vWind', 'prod24h_before']

    ts1 = ens.gen_hourly_timesteps(dt.datetime(2015, 12, 17, 1),
                                   dt.datetime(2016, 1, 15, 0))
    ts2 = ens.gen_hourly_timesteps(dt.datetime(2016, 1, 20, 1),
                                   dt.datetime(2016, 1, 28, 0))

    #load the data
    fit_data = ens.repack_ens_mean_as_df()
    fit_data['prod24h_before'] = sq.fetch_production(
        dt.datetime(2015, 12, 16, 1), dt.datetime(2016, 1, 14, 0))

    vali_data = ens.repack_ens_mean_as_df(dt.datetime(2016, 1, 20, 1),
                                          dt.datetime(2016, 1, 28, 0))
    vali_data['prod24h_before'] = sq.fetch_production(
        dt.datetime(2016, 1, 19, 1), dt.datetime(2016, 1, 27, 0))

    # do the fit
    X = fit_data[cols]
    y = fit_data['prod']
    res = mlin_regression(y, X, add_const=True)

    fig, [ax1, ax2] = plt.subplots(2, 1, figsize=(40, 20))

    # load ensemble data
    ens_data1 = ens.load_ens_timeseries_as_df(ts_start=ts1[0], ts_end=ts1[-1])
    ens_data1['prod24h_before'] = fit_data['prod24h_before']
    ens_data2 = ens.load_ens_timeseries_as_df(ts_start=ts2[0], ts_end=ts2[-1])
    ens_data2['prod24h_before'] = vali_data['prod24h_before']

    all_ens_data = pd.concat([ens_data1, ens_data2])
    all_ts = ts1 + ts2

    # calculate production for each ensemble member
    ens_prods = np.zeros((len(all_ts), 25))
    for i in range(25):
        ens_cols = ['Tout' + str(i), 'vWind' + str(i), 'prod24h_before']
        ens_params = pd.Series({
            'Tout' + str(i): res.params['Tout'],
            'vWind' + str(i): res.params['vWind'],
            'const': res.params['const'],
            'prod24h_before': res.params['prod24h_before']
        })
        ens_prods[:, i] = linear_map(all_ens_data, ens_params, ens_cols)

    # calculate combined confint
    prstd, iv_l, iv_u = wls_prediction_std(res)
    mean_conf_int_spread = np.mean(res.fittedvalues - iv_l)
    model_std = np.concatenate(
        [prstd, (1. / 1.9599) * mean_conf_int_spread * np.ones(len(ts2))])
    ens_std = ens_prods.std(axis=1)
    combined_std = np.sqrt(model_std**2 + ens_std**2)
    all_prod_model = np.concatenate(
        [res.fittedvalues,
         linear_map(vali_data, res.params, cols)])
    combined_ub95 = all_prod_model + 1.9599 * combined_std
    combined_lb95 = all_prod_model - 1.9599 * combined_std

    # plot confint
    ax1.fill_between(all_ts,
                     combined_lb95,
                     combined_ub95,
                     label='Combined 95% conf. int.')
    ax1.fill_between(all_ts,
                     all_prod_model - 1.9599 * ens_std,
                     all_prod_model + 1.9599 * ens_std,
                     facecolor='grey',
                     label='Ensemble 95% conf. int.')

    # plot ensempble models
    ax1.plot_date(all_ts, ens_prods, '-', lw=0.5)

    ax1.plot_date(ts1, y, 'k-', lw=2, label='Actual production')
    ax1.plot_date(ts1,
                  res.fittedvalues,
                  'r-',
                  lw=2,
                  label='Model on ensemble mean')

    ax1.plot_date(ts2, vali_data['prod'], 'k-', lw=2, label='')
    ax1.plot_date(ts2, linear_map(vali_data, res.params, cols), 'r-', lw=2)
    ax1.set_ylabel('[MW]')
    ax1.legend(loc=2)

    vali_resid = linear_map(vali_data, res.params, cols) - vali_data['prod']
    ax2.plot_date(ts1, res.resid, '-', label='Residual, fitted data')
    ax2.plot_date(ts2, vali_resid, '-', label='Residual, validation data')
    ax2.set_ylabel('[MW]')
    ax2.legend(loc=2)
    print "MAE = " + str(mae(vali_resid))
    print "MAPE = " + str(mape(vali_resid, vali_data['prod']))
    print "RMSE = " + str(rmse(vali_resid))
    print "ME = " + str(np.mean(vali_resid))

    print "MAE (fit) = " + str(mae(res.resid))
    print "MAPE (fit) = " + str(mape(res.resid, fit_data['prod']))
    print "RMSE (fit)= " + str(rmse(res.resid))
    print "ME (fit)= " + str(np.mean(res.resid))

    plt.savefig('figures/ens_prod_models.pdf', dpi=600)
    plt.figure()
    plt.plot_date(all_ts, ens_std)
    plt.ylabel('Std. of ensemble production models [MW]')
    plt.savefig('figures/std_ens_prod_models.pdf', dpi=600)

    sns.jointplot(x=ens_std, y=np.concatenate([res.resid, vali_resid]))

    return res, all_ens_data, all_ts, fit_data['prod'], vali_data['prod']
import pandas as pd
import datetime as dt
import numpy as np
from sklearn import linear_model
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from sklearn.cross_validation import cross_val_predict
from model_selection import gen_all_combinations, rmse, mae, mape
import sql_tools as sq
import ensemble_tools as ens


#%% SVR experinment


ts = ens.gen_hourly_timesteps(dt.datetime(2016,1,26,1), dt.datetime(2016,4,1,0))
X = pd.read_pickle('48h60h168h_lagged_X.pkl') # run model_selection_ext_horizon to generate these files
y = pd.read_pickle('prod_to_gowith.pkl') 
# add more predictor data:


for v in ['Tout', 'vWind', 'hum', 'sunRad']:
    X[v] = ens.load_ens_mean_avail_at10_series(v, ts[0], ts[-1], pointcode=71699)

#X['weekdays'] = [t.weekday() for t in ts]
def h_hoursbefore(timestamp, h):
    return timestamp + dt.timedelta(hours=-h)
most_recent_avail_prod = sq.fetch_production(h_hoursbefore(ts[0], 24),\
                                                          h_hoursbefore(ts[-1], 24))

for i, t, p48 in zip(range(len(most_recent_avail_prod)), ts, X['prod48hbefore']):
def main(argv):
    plt.close('all')

    try:
        station = argv[0]
        if not station in PI_T_sup_dict.keys():
            print "Wrong station, use rundhoej, holme or hoerning"
            return
    except:
        print "No station provided. Defaults to holme."
        station = 'holme'

    print station

    plt.close('all')
    #%%
    fit_ts = ens.gen_hourly_timesteps(dt.datetime(2015, 12, 17, 1),
                                      dt.datetime(2016, 1, 15, 0))
    vali_ts = ens.gen_hourly_timesteps(dt.datetime(2016, 1, 20, 1),
                                       dt.datetime(2016, 2, 5, 0))
    test_ts = ens.gen_hourly_timesteps(dt.datetime(2016, 2, 5, 1),
                                       dt.datetime(2016, 4, 1, 0))

    all_ts = fit_ts + vali_ts + test_ts

    weathervars = ['Tout', 'vWind', 'sunRad', 'hum']

    fit_data = pd.DataFrame()
    vali_data = pd.DataFrame()
    test_data = pd.DataFrame()

    cons_key = sq.consumption_place_key_dict[station]
    fit_data['cons24h_before'] = sq.fetch_consumption(
        cons_key, fit_ts[0] + dt.timedelta(days=-1),
        fit_ts[-1] + dt.timedelta(days=-1))
    vali_data['cons24h_before'] = sq.fetch_consumption(
        cons_key, vali_ts[0] + dt.timedelta(days=-1),
        vali_ts[-1] + dt.timedelta(days=-1))
    test_data['cons24h_before'] = sq.fetch_consumption(
        cons_key, test_ts[0] + dt.timedelta(days=-1),
        test_ts[-1] + dt.timedelta(days=-1))

    fit_data['cons'] = sq.fetch_consumption(cons_key, fit_ts[0], fit_ts[-1])
    vali_data['cons'] = sq.fetch_consumption(cons_key, vali_ts[0], vali_ts[-1])
    test_data['cons'] = sq.fetch_consumption(cons_key, test_ts[0], test_ts[-1])
    for v in weathervars:
        fit_data['%s24hdiff'%v] = ens.load_ens_timeseries_as_df(\
                                    ts_start=fit_ts[0],\
                                    ts_end=fit_ts[-1], \
                                    weathervars=[v]).mean(axis=1) \
                                  - ens.load_ens_timeseries_as_df(\
                                    ts_start=fit_ts[0]+dt.timedelta(days=-1),\
                                    ts_end=fit_ts[-1]+dt.timedelta(days=-1), \
                                    weathervars=[v]).mean(axis=1)
        vali_data['%s24hdiff'%v] = ens.load_ens_timeseries_as_df(\
                                    ts_start=vali_ts[0],\
                                    ts_end=vali_ts[-1], \
                                    weathervars=[v]).mean(axis=1) \
                                  - ens.load_ens_timeseries_as_df(\
                                    ts_start=vali_ts[0]+dt.timedelta(days=-1),\
                                    ts_end=vali_ts[-1]+dt.timedelta(days=-1), \
                                    weathervars=[v]).mean(axis=1)
        test_data['%s24hdiff'%v] = ens.load_ens_timeseries_as_df(\
                                    ts_start=test_ts[0],\
                                    ts_end=test_ts[-1], \
                                    weathervars=[v]).mean(axis=1) \
                                  - ens.load_ens_timeseries_as_df(\
                                    ts_start=test_ts[0]+dt.timedelta(days=-1),\
                                    ts_end=test_ts[-1]+dt.timedelta(days=-1), \
                                    weathervars=[v]).mean(axis=1)

    #%%
    all_data = pd.concat([fit_data, vali_data, test_data])
    no_blind_data = pd.concat([fit_data, vali_data])

    corr = no_blind_data.corr()

    fit_y = fit_data['cons']
    columns = [
        'cons24h_before', 'Tout24hdiff', 'vWind24hdiff', 'sunRad24hdiff'
    ]
    X = fit_data[columns]
    res = mlin_regression(fit_y, X, add_const=False)

    fiterr = res.fittedvalues - fit_y
    print "Errors fit period: ", rmse(fiterr), mae(fiterr), mape(fiterr, fit_y)

    vali_pred = linear_map(vali_data, res.params, columns)
    valierr = vali_pred - vali_data['cons']
    print "Errors validation period: ", rmse(valierr), mae(valierr), mape(
        valierr, vali_data['cons'])

    test_pred = linear_map(test_data, res.params, columns)
    testerr = test_pred - test_data['cons']
    print "Errors test period: ", rmse(testerr), mae(testerr), mape(
        testerr, test_data['cons'])

    plt.figure()
    plt.plot_date(all_ts, all_data['cons'], 'k-')
    plt.plot_date(all_ts,
                  np.concatenate([res.fittedvalues, vali_pred, test_pred]),
                  'r-')
def first_ens_prod_fig():
    """ This plot is based on a production model taking into account:
        Tout, vWind and the production 24 hours before
        
        """
        
    plt.close('all')
    cols = ['Tout', 'vWind', 'prod24h_before']
        
    ts1 = ens.gen_hourly_timesteps(dt.datetime(2015,12,17,1), dt.datetime(2016,1,15,0))
    ts2 = ens.gen_hourly_timesteps(dt.datetime(2016,1,20,1), dt.datetime(2016,1,28,0))
    
    #load the data
    fit_data = ens.repack_ens_mean_as_df()
    fit_data['prod24h_before'] = sq.fetch_production(dt.datetime(2015,12,16,1), dt.datetime(2016,1,14,0))

    vali_data = ens.repack_ens_mean_as_df(dt.datetime(2016,1,20,1), dt.datetime(2016,1,28,0))
    vali_data['prod24h_before'] = sq.fetch_production(dt.datetime(2016,1,19,1), dt.datetime(2016,1,27,0))   
    
 
    # do the fit
    X = fit_data[cols]
    y = fit_data['prod']
    res = mlin_regression(y, X, add_const=True)    
    
    fig, [ax1, ax2] = plt.subplots(2,1, figsize=(40,20))
    
    # load ensemble data
    ens_data1 = ens.load_ens_timeseries_as_df(ts_start=ts1[0], ts_end=ts1[-1])
    ens_data1['prod24h_before'] = fit_data['prod24h_before']    
    ens_data2 = ens.load_ens_timeseries_as_df(ts_start=ts2[0], ts_end=ts2[-1])
    ens_data2['prod24h_before'] = vali_data['prod24h_before']
    
    all_ens_data = pd.concat([ens_data1, ens_data2])
    all_ts = ts1 + ts2    
    
    
    # calculate production for each ensemble member
    ens_prods = np.zeros((len(all_ts), 25))
    for i in range(25):
        ens_cols = ['Tout' + str(i), 'vWind' + str(i), 'prod24h_before']
        ens_params = pd.Series({'Tout' + str(i):res.params['Tout'],
                                'vWind' + str(i):res.params['vWind'],
                                'const':res.params['const'],
                                'prod24h_before':res.params['prod24h_before']})
        ens_prods[:,i] = linear_map(all_ens_data, ens_params, ens_cols)    
    
    
       
    # calculate combined confint
    prstd, iv_l, iv_u = wls_prediction_std(res)
    mean_conf_int_spread = np.mean(res.fittedvalues - iv_l)
    model_std = np.concatenate([prstd, (1./1.9599)*mean_conf_int_spread*np.ones(len(ts2))])
    ens_std = ens_prods.std(axis=1)
    combined_std = np.sqrt(model_std**2 + ens_std**2)
    all_prod_model = np.concatenate([res.fittedvalues, linear_map(vali_data, res.params, cols)])
    combined_ub95 = all_prod_model + 1.9599*combined_std
    combined_lb95 = all_prod_model - 1.9599*combined_std 
    
    # plot confint
    ax1.fill_between(all_ts, combined_lb95, combined_ub95, label='Combined 95% conf. int.')
    ax1.fill_between(all_ts, all_prod_model - 1.9599*ens_std, all_prod_model + 1.9599*ens_std, facecolor='grey', label='Ensemble 95% conf. int.')
    
    # plot ensempble models    
    ax1.plot_date(all_ts, ens_prods, '-', lw=0.5)    
    
    ax1.plot_date(ts1, y, 'k-', lw=2, label='Actual production')
    ax1.plot_date(ts1, res.fittedvalues,'r-', lw=2, label='Model on ensemble mean')
         
    ax1.plot_date(ts2, vali_data['prod'], 'k-', lw=2, label='')
    ax1.plot_date(ts2, linear_map(vali_data, res.params, cols), 'r-', lw=2)
    ax1.set_ylabel('[MW]')
    ax1.legend(loc=2)
    
    vali_resid = linear_map(vali_data, res.params, cols) - vali_data['prod']
    ax2.plot_date(ts1, res.resid, '-', label='Residual, fitted data')
    ax2.plot_date(ts2, vali_resid, '-', label='Residual, validation data')
    ax2.set_ylabel('[MW]')
    ax2.legend(loc=2)
    print "MAE = " + str(mae(vali_resid))
    print "MAPE = " + str(mape(vali_resid, vali_data['prod']))
    print "RMSE = " + str(rmse(vali_resid))
    print "ME = " + str(np.mean(vali_resid))
    
    print "MAE (fit) = " + str(mae(res.resid))
    print "MAPE (fit) = " + str(mape(res.resid, fit_data['prod']))
    print "RMSE (fit)= " + str(rmse(res.resid))
    print "ME (fit)= " + str(np.mean(res.resid))

    plt.savefig('figures/ens_prod_models.pdf', dpi=600) 
    plt.figure()
    plt.plot_date(all_ts, ens_std)
    plt.ylabel('Std. of ensemble production models [MW]')
    plt.savefig('figures/std_ens_prod_models.pdf', dpi=600) 
    
    
    sns.jointplot(x=ens_std, y=np.concatenate([res.resid, vali_resid]))
   
        
    return res, all_ens_data, all_ts, fit_data['prod'], vali_data['prod']
def production_model():  # figure 3

    plt.close('all')
    cols = ['prod24h_before', 'Tout24hdiff', 'vWind24hdiff', 'sunRad24hdiff']

    ts1 = ens.gen_hourly_timesteps(dt.datetime(2015, 12, 17, 1),
                                   dt.datetime(2016, 1, 15, 0))
    ts2 = ens.gen_hourly_timesteps(dt.datetime(2016, 1, 20, 1),
                                   dt.datetime(2016, 2, 5, 0))

    #load the data
    fit_data = ens.repack_ens_mean_as_df()
    fit_data['prod24h_before'] = sq.fetch_production(
        ts1[0] + dt.timedelta(days=-1), ts1[-1] + dt.timedelta(days=-1))

    fit_data['Tout24hdiff'] = fit_data['Tout'] \
                                - ens.load_ens_timeseries_as_df(\
                                    ts_start=ts1[0]+dt.timedelta(days=-1),\
                                    ts_end=ts1[-1]+dt.timedelta(days=-1), \
                                    weathervars=['Tout']).mean(axis=1)
    fit_data['vWind24hdiff'] = fit_data['vWind'] \
                                - ens.load_ens_timeseries_as_df(\
                                    ts_start=ts1[0]+dt.timedelta(days=-1),\
                                    ts_end=ts1[-1]+dt.timedelta(days=-1), \
                                    weathervars=['vWind']).mean(axis=1)
    fit_data['sunRad24hdiff'] = fit_data['sunRad'] \
                                - ens.load_ens_timeseries_as_df(\
                                    ts_start=ts1[0]+dt.timedelta(days=-1),\
                                    ts_end=ts1[-1]+dt.timedelta(days=-1), \
                                    weathervars=['sunRad']).mean(axis=1)

    vali_data = ens.repack_ens_mean_as_df(ts2[0], ts2[-1])
    vali_data['prod24h_before'] = sq.fetch_production(
        ts2[0] + dt.timedelta(days=-1), ts2[-1] + dt.timedelta(days=-1))
    vali_data['Tout24hdiff'] = vali_data['Tout'] \
                                - ens.load_ens_timeseries_as_df(\
                                    ts_start=ts2[0]+dt.timedelta(days=-1),\
                                    ts_end=ts2[-1]+dt.timedelta(days=-1), \
                                    weathervars=['Tout']).mean(axis=1)
    vali_data['vWind24hdiff'] = vali_data['vWind'] \
                                - ens.load_ens_timeseries_as_df(\
                                    ts_start=ts2[0]+dt.timedelta(days=-1),\
                                    ts_end=ts2[-1]+dt.timedelta(days=-1), \
                                    weathervars=['vWind']).mean(axis=1)
    vali_data['sunRad24hdiff'] = vali_data['sunRad'] \
                                - ens.load_ens_timeseries_as_df(\
                                    ts_start=ts2[0]+dt.timedelta(days=-1),\
                                    ts_end=ts2[-1]+dt.timedelta(days=-1), \
                                    weathervars=['sunRad']).mean(axis=1)

    # correct error in production:
    new_val = (vali_data['prod'][116] + vali_data['prod'][116]) / 2
    vali_data['prod'][116] = new_val
    vali_data['prod'][117] = new_val
    vali_data['prod24h_before'][116 + 24] = new_val
    vali_data['prod24h_before'][117 + 24] = new_val

    # do the fit
    X = fit_data[cols]
    y = fit_data['prod']
    res = mlin_regression(y, X, add_const=False)

    fig, [ax1, ax2] = plt.subplots(2,
                                   1,
                                   sharex=True,
                                   figsize=(dcolwidth, 0.57 * dcolwidth),
                                   gridspec_kw={'height_ratios': [4, 1]})

    # load ensemble data
    ens_data1 = ens.load_ens_timeseries_as_df(ts_start=ts1[0], ts_end=ts1[-1],\
                                             weathervars=['Tout', 'vWind', 'sunRad'])
    ens_data1['prod24h_before'] = fit_data['prod24h_before']
    ens_data1_24h_before =  ens.load_ens_timeseries_as_df(\
                                    ts_start=ts1[0]+dt.timedelta(days=-1),\
                                    ts_end=ts1[-1]+dt.timedelta(days=-1), \
                                    weathervars=['Tout', 'vWind', 'sunRad'])
    ens_data2 = ens.load_ens_timeseries_as_df(ts_start=ts2[0], ts_end=ts2[-1],\
                                             weathervars=['Tout', 'vWind', 'sunRad'])
    ens_data2['prod24h_before'] = vali_data['prod24h_before']
    ens_data2_24h_before = ens.load_ens_timeseries_as_df(\
                                    ts_start=ts2[0]+dt.timedelta(days=-1),\
                                    ts_end=ts2[-1]+dt.timedelta(days=-1), \
                                    weathervars=['Tout', 'vWind', 'sunRad'])
    for i in range(25):
        for v in ['Tout', 'vWind', 'sunRad']:
            key_raw = v + str(i)
            key_diff = v + '24hdiff' + str(i)
            ens_data1[
                key_diff] = ens_data1[key_raw] - ens_data1_24h_before[key_raw]
            ens_data2[
                key_diff] = ens_data2[key_raw] - ens_data2_24h_before[key_raw]

    all_ens_data = pd.concat([ens_data1, ens_data2])
    all_ts = ts1 + ts2
    #
    #
    # calculate production for each ensemble member
    ens_prods = np.zeros((len(all_ts), 25))
    for i in range(25):
        ens_cols = ['Tout24hdiff' + str(i), 'vWind24hdiff' + str(i),\
                    'sunRad24hdiff' + str(i), 'prod24h_before']
        ens_params = pd.Series({
            'Tout24hdiff' + str(i):
            res.params['Tout24hdiff'],
            'vWind24hdiff' + str(i):
            res.params['vWind24hdiff'],
            'sunRad24hdiff' + str(i):
            res.params['sunRad24hdiff'],
            'prod24h_before':
            res.params['prod24h_before']
        })
        ens_prods[:, i] = linear_map(all_ens_data, ens_params, ens_cols)

    # calculate combined confint
    ens_std = ens_prods.std(axis=1)
    vali_resid = linear_map(vali_data, res.params, cols) - vali_data['prod']
    vali_resid_corrig = vali_resid - np.sign(
        vali_resid) * 1.9599 * ens_std[len(ts1):]
    #mean_conf_int_spread = (vali_resid_corrig.quantile(0.95) - vali_resid_corrig.quantile(0.05))/2 # this conf_int is not used anymore

    fit_resid = res.resid
    fit_resid_corrig = fit_resid - np.sign(
        fit_resid) * 1.9599 * ens_std[0:len(ts1)]
    conf_int_spread_lower = -fit_resid_corrig.quantile(0.025)
    conf_int_spread_higher = fit_resid_corrig.quantile(0.975)

    combined_conf_ints = conf_int_spread_lower + conf_int_spread_higher + 2 * 1.9599 * ens_std
    all_prod_model = np.concatenate(
        [res.fittedvalues,
         linear_map(vali_data, res.params, cols)])
    combined_ub95 = all_prod_model + conf_int_spread_higher + 1.9599 * ens_std
    combined_lb95 = all_prod_model - (conf_int_spread_lower + 1.9599 * ens_std)

    # plot confint
    ax1.fill_between(all_ts[len(ts1):],
                     combined_lb95[len(ts1):],
                     combined_ub95[len(ts1):],
                     label='95% prediction intervals')
    ax1.fill_between(all_ts[len(ts1):],
                     all_prod_model[len(ts1):] - 1.9599 * ens_std[len(ts1):],
                     all_prod_model[len(ts1):] + 1.9599 * ens_std[len(ts1):],
                     facecolor='grey',
                     label='Weather ensemble 95% conf. int.')

    # plot ensempble models
    ax1.plot_date(all_ts[len(ts1):], ens_prods[len(ts1):], '-', lw=0.5)

    ax1.plot_date(ts2,
                  vali_data['prod'],
                  'k-',
                  lw=2,
                  label='Historical production')
    ax1.plot_date(ts2,
                  linear_map(vali_data, res.params, cols),
                  '-',
                  c=red,
                  lw=2,
                  label='Production model')
    ax1.set_ylabel('Production [MW]', size=8)
    ax1.tick_params(axis='both', which='major', labelsize=8)
    ax1.xaxis.set_major_formatter(DateFormatter('%b %d'))
    ax1.legend(loc=1, prop={'size': 8})
    ax1.set_ylim([300, 1100])

    N = conf_int_spread_higher + 1.9599 * ens_std[len(ts1):].max()
    ax2.fill_between(ts2,
                     -(1.9599 * ens_std[len(ts1):] + conf_int_spread_lower) /
                     N,
                     -1.9599 * ens_std[len(ts1):] / N,
                     alpha=0.5)
    ax2.fill_between(ts2,
                     -1.9599 * ens_std[len(ts1):] / N,
                     np.zeros(len(ts2)),
                     facecolor='grey',
                     alpha=0.5)
    ax2.fill_between(ts2, 1.9599 * ens_std[len(ts1):] / N, facecolor='grey')
    ax2.fill_between(ts2, 1.9599 * ens_std[len(ts1):] / N,
                     (conf_int_spread_higher + 1.9599 * ens_std[len(ts1):]) /
                     N)
    ax2.set_ylabel('Prediction intervals \n[normalized]', size=8)
    ax2.tick_params(axis='y', which='major', labelsize=8)
    ax2.set_xlim(dt.datetime(2016, 1, 20, 0), dt.datetime(2016, 2, 5, 0))
    fig.tight_layout()
    print "Min_normalized pos conf bound. ", np.min(1.9599 *
                                                    ens_std[len(ts1):] / N +
                                                    conf_int_spread_higher / N)

    print "MAE = " + str(mae(vali_resid))
    print "MAPE = " + str(mape(vali_resid, vali_data['prod']))
    print "RMSE = " + str(rmse(vali_resid))
    print "ME = " + str(np.mean(vali_resid))

    print "MAE (fit) = " + str(mae(res.resid))
    print "MAPE (fit) = " + str(mape(res.resid, fit_data['prod']))
    print "RMSE (fit)= " + str(rmse(res.resid))
    print "ME (fit)= " + str(np.mean(res.resid))

    print "Width of const blue bands (MW)", conf_int_spread_lower, conf_int_spread_higher

    plt.savefig('Q:/Projekter/Ens Article 1/figures/production_model.pdf',
                dpi=400)

    EO3_fc1 = sq.fetch_EO3_midnight_forecast(ts1[0], ts1[-1])
    EO3_fc2 = sq.fetch_EO3_midnight_forecast(ts2[0], ts2[-1])
    EO3_err = EO3_fc2 - vali_data['prod']
    EO3_err_fit = EO3_fc1 - fit_data['prod']
    print "MAE (EO3) = " + str(mae(EO3_err))
    print "MAPE (EO3) = " + str(mape(EO3_err, vali_data['prod']))
    print "RMSE (EO3)= " + str(rmse(EO3_err))
    print "ME (EO3)= " + str(np.mean(EO3_err))

    print "MAE (EO3_fit) = " + str(mae(EO3_err_fit))
    print "MAPE (EO3_fit) = " + str(mape(EO3_err_fit, fit_data['prod']))
    print "RMSE (EO3_fit)= " + str(rmse(EO3_err_fit))
    print "ME (EO3_fit)= " + str(np.mean(EO3_err_fit))

    print np.min(combined_conf_ints[len(ts1):] / combined_conf_ints.max())
    np.savez('combined_conf_int',
             combined_conf_int=(conf_int_spread_higher + 1.9599 * ens_std),
             timesteps=all_ts)

    print "Corr coeff: vali ", np.corrcoef(
        vali_data['prod'], linear_map(vali_data, res.params, cols))[0, 1]
    print "Corr coeff: vali EO3 ", np.corrcoef(vali_data['prod'], EO3_fc2)[0,
                                                                           1]
    print "Corr coeff: fit ", np.corrcoef(fit_data['prod'],
                                          res.fittedvalues)[0, 1]
    print "Corr coeff: fit EO3 ", np.corrcoef(fit_data['prod'], EO3_fc1)[0, 1]

    print "% of actual production in vali period above upper", float(
        len(
            np.where(vali_data['prod'] >
                     (conf_int_spread_higher + 1.9599 * ens_std[len(ts1):] +
                      linear_map(vali_data, res.params, cols)))[0])) / len(ts2)
    print "plus minus: ", 0.5 / len(ts2)

    print "% of actual production in vali period below lower", float(
        len(
            np.where(vali_data['prod'] <
                     (linear_map(vali_data, res.params, cols) -
                      (conf_int_spread_lower + 1.9599 * ens_std[len(ts1):])))
            [0])) / len(ts2)
    print "plus minus: ", 0.5 / len(ts2)

    return res, fit_data
def second_ens_prod_fig():
    """ This plot is based on a production model taking into account:
        the production 24 hours before as well as the change in
        temparature, windspeed and solar radiotion from 24 hours ago to now.
        
        """
        
    plt.close('all')
    cols = ['prod24h_before', 'Tout24hdiff', 'vWind24hdiff', 'sunRad24hdiff']
        
    ts1 = ens.gen_hourly_timesteps(dt.datetime(2015,12,17,1), dt.datetime(2016,1,15,0))
    ts2 = ens.gen_hourly_timesteps(dt.datetime(2016,1,20,1), dt.datetime(2016,2,5,0))
    
    #load the data
    fit_data = ens.repack_ens_mean_as_df()
    fit_data['prod24h_before'] = sq.fetch_production(ts1[0]+dt.timedelta(days=-1), ts1[-1]+dt.timedelta(days=-1))
    
    fit_data['Tout24hdiff'] = fit_data['Tout'] \
                                - ens.load_ens_timeseries_as_df(\
                                    ts_start=ts1[0]+dt.timedelta(days=-1),\
                                    ts_end=ts1[-1]+dt.timedelta(days=-1), \
                                    weathervars=['Tout']).mean(axis=1)
    fit_data['vWind24hdiff'] = fit_data['vWind'] \
                                - ens.load_ens_timeseries_as_df(\
                                    ts_start=ts1[0]+dt.timedelta(days=-1),\
                                    ts_end=ts1[-1]+dt.timedelta(days=-1), \
                                    weathervars=['vWind']).mean(axis=1)
    fit_data['sunRad24hdiff'] = fit_data['sunRad'] \
                                - ens.load_ens_timeseries_as_df(\
                                    ts_start=ts1[0]+dt.timedelta(days=-1),\
                                    ts_end=ts1[-1]+dt.timedelta(days=-1), \
                                    weathervars=['sunRad']).mean(axis=1)
                                    
    vali_data = ens.repack_ens_mean_as_df(ts2[0], ts2[-1])
    vali_data['prod24h_before'] = sq.fetch_production(ts2[0]+dt.timedelta(days=-1), ts2[-1]+dt.timedelta(days=-1))
    vali_data['Tout24hdiff'] = vali_data['Tout'] \
                                - ens.load_ens_timeseries_as_df(\
                                    ts_start=ts2[0]+dt.timedelta(days=-1),\
                                    ts_end=ts2[-1]+dt.timedelta(days=-1), \
                                    weathervars=['Tout']).mean(axis=1)
    vali_data['vWind24hdiff'] = vali_data['vWind'] \
                                - ens.load_ens_timeseries_as_df(\
                                    ts_start=ts2[0]+dt.timedelta(days=-1),\
                                    ts_end=ts2[-1]+dt.timedelta(days=-1), \
                                    weathervars=['vWind']).mean(axis=1)
    vali_data['sunRad24hdiff'] = vali_data['sunRad'] \
                                - ens.load_ens_timeseries_as_df(\
                                    ts_start=ts2[0]+dt.timedelta(days=-1),\
                                    ts_end=ts2[-1]+dt.timedelta(days=-1), \
                                    weathervars=['sunRad']).mean(axis=1)
    
    # correct error in production:
    new_val = (vali_data['prod'][116] +vali_data['prod'][116])/2
    vali_data['prod'][116] = new_val
    vali_data['prod'][117] = new_val
    vali_data['prod24h_before'][116+24] = new_val
    vali_data['prod24h_before'][117+24] = new_val
    
    
 
    # do the fit
    X = fit_data[cols]
    y = fit_data['prod']
    res = mlin_regression(y, X, add_const=False)    
    
    fig, [ax1, ax2] = plt.subplots(2,1, figsize=(40,20))
 
    # load ensemble data
    ens_data1 = ens.load_ens_timeseries_as_df(ts_start=ts1[0], ts_end=ts1[-1],\
                                             weathervars=['Tout', 'vWind', 'sunRad'])
    ens_data1['prod24h_before'] = fit_data['prod24h_before']
    ens_data1_24h_before =  ens.load_ens_timeseries_as_df(\
                                    ts_start=ts1[0]+dt.timedelta(days=-1),\
                                    ts_end=ts1[-1]+dt.timedelta(days=-1), \
                                    weathervars=['Tout', 'vWind', 'sunRad']) 
    ens_data2 = ens.load_ens_timeseries_as_df(ts_start=ts2[0], ts_end=ts2[-1],\
                                             weathervars=['Tout', 'vWind', 'sunRad'])
    ens_data2['prod24h_before'] = vali_data['prod24h_before']
    ens_data2_24h_before = ens.load_ens_timeseries_as_df(\
                                    ts_start=ts2[0]+dt.timedelta(days=-1),\
                                    ts_end=ts2[-1]+dt.timedelta(days=-1), \
                                    weathervars=['Tout', 'vWind', 'sunRad']) 
    for i in range(25):
        for v in ['Tout', 'vWind', 'sunRad']:
            key_raw = v + str(i)
            key_diff = v + '24hdiff' + str(i)
            ens_data1[key_diff] = ens_data1[key_raw] - ens_data1_24h_before[key_raw]
            ens_data2[key_diff] = ens_data2[key_raw] - ens_data2_24h_before[key_raw]

    
    all_ens_data = pd.concat([ens_data1, ens_data2])
    all_ts = ts1 + ts2    
#    
#    
    # calculate production for each ensemble member
    ens_prods = np.zeros((len(all_ts), 25))
    for i in range(25):
        ens_cols = ['Tout24hdiff' + str(i), 'vWind24hdiff' + str(i),\
                    'sunRad24hdiff' + str(i), 'prod24h_before']
        ens_params = pd.Series({'Tout24hdiff' + str(i):res.params['Tout24hdiff'],
                                'vWind24hdiff' + str(i):res.params['vWind24hdiff'],
                                'sunRad24hdiff' + str(i):res.params['sunRad24hdiff'],
                                'prod24h_before':res.params['prod24h_before']})
        ens_prods[:,i] = linear_map(all_ens_data, ens_params, ens_cols)    
    
    
       
    # calculate combined confint
    ens_std = ens_prods.std(axis=1)
    vali_resid = linear_map(vali_data, res.params, cols) - vali_data['prod']
    vali_resid_corrig = vali_resid - np.sign(vali_resid)*1.9599*ens_std[len(ts1):]
    mean_conf_int_spread = (vali_resid_corrig.quantile(0.95) - vali_resid_corrig.quantile(0.05))/2
    
    
    combined_conf_int = mean_conf_int_spread + 1.9599*ens_std
    all_prod_model = np.concatenate([res.fittedvalues, linear_map(vali_data, res.params, cols)])
    combined_ub95 = all_prod_model + combined_conf_int
    combined_lb95 = all_prod_model - combined_conf_int 
    
    # plot confint
    ax1.fill_between(all_ts, combined_lb95, combined_ub95, label='Combined 95% conf. int.')
    ax1.fill_between(all_ts, all_prod_model - 1.9599*ens_std, all_prod_model + 1.9599*ens_std, facecolor='grey', label='Ensemble 95% conf. int.')
    
    # plot ensempble models    
    ax1.plot_date(all_ts, ens_prods, '-', lw=0.5)    
    
    ax1.plot_date(ts1, y, 'k-', lw=2, label='Actual production')
    ax1.plot_date(ts1, res.fittedvalues,'r-', lw=2, label='Model on ensemble mean')
         
    ax1.plot_date(ts2, vali_data['prod'], 'k-', lw=2, label='')
    ax1.plot_date(ts2, linear_map(vali_data, res.params, cols), 'r-', lw=2)
    ax1.set_ylabel('[MW]')
    ax1.legend(loc=2)
    ax1.set_ylim([0,1100])
    
    
    ax2.plot_date(ts1, res.resid, '-', label='Residual, fitted data')
    ax2.plot_date(ts2, vali_resid, '-', label='Residual, validation data')
    ax2.set_ylabel('[MW]')
    ax2.legend(loc=2)
    ax2.set_ylim([-550, 550])
    print "MAE = " + str(mae(vali_resid))
    print "MAPE = " + str(mape(vali_resid, vali_data['prod']))
    print "RMSE = " + str(rmse(vali_resid))
    print "ME = " + str(np.mean(vali_resid))
    
    print "MAE (fit) = " + str(mae(res.resid))
    print "MAPE (fit) = " + str(mape(res.resid, fit_data['prod']))
    print "RMSE (fit)= " + str(rmse(res.resid))
    print "ME (fit)= " + str(np.mean(res.resid))

    plt.savefig('figures/ens_prod_models_v2.pdf', dpi=600) 
    plt.figure()
    plt.plot_date(all_ts, ens_std)
    plt.ylabel('Std. of ensemble production models [MW]')
    plt.savefig('figures/std_ens_prod_models.pdf', dpi=600) 
    # 
    
    vali_ens_std = ens_std[len(ts1):]
    sns.jointplot(x=pd.Series(vali_ens_std), y=np.abs(vali_resid))
    sns.jointplot(x=vali_data['prod'], y=pd.Series(linear_map(vali_data, res.params, cols)))
   
    EO3_fc1 = sq.fetch_EO3_midnight_forecast(ts1[0], ts1[-1])
    EO3_fc2 = sq.fetch_EO3_midnight_forecast(ts2[0], ts2[-1])
    plt.figure()
    plt.plot_date(ts1, fit_data['prod'], 'k-', label='Actual production')
    plt.plot_date(ts2, vali_data['prod'], 'k-')
    plt.plot_date(ts1, EO3_fc1, 'r-', label='EO3 forecast')
    plt.plot_date(ts2, EO3_fc2, 'r-')
    EO3_err = EO3_fc2-vali_data['prod']
    EO3_err_fit = EO3_fc1-fit_data['prod']
    print "MAE (EO3) = " + str(mae(EO3_err))
    print "MAPE (EO3) = " + str(mape(EO3_err, vali_data['prod']))
    print "RMSE (EO3)= " + str(rmse(EO3_err))
    print "ME (EO3)= " + str(np.mean(EO3_err))
    
    print "MAE (EO3_fit) = " + str(mae(EO3_err_fit))
    print "MAPE (EO3_fit) = " + str(mape(EO3_err_fit, fit_data['prod']))
    print "RMSE (EO3_fit)= " + str(rmse(EO3_err_fit))
    print "ME (EO3_fit)= " + str(np.mean(EO3_err_fit))
     
    sns.jointplot(x=pd.Series(vali_ens_std), y=np.abs(EO3_err))
    
    plt.figure(figsize=(20,10))
    plt.subplot(2,1,1)
    plt.plot_date(all_ts, combined_conf_int/combined_conf_int.max(), '-')
    plt.ylabel('Model + ensemble uncertainty \n [normalized]')
    plt.ylim(0,1)    
    plt.subplot(2,1,2)
    plt.plot_date(all_ts, (1-0.2*combined_conf_int/combined_conf_int.max()), '-', label='Dynamic setpoint')
    plt.plot_date(all_ts, 0.8*np.ones(len(all_ts)), '--', label='Static setpoint')
    plt.ylabel('Setpoint for pump massflow \n temperature [fraction of max pump cap]')
    plt.legend()
    plt.ylim(.7,1)
    plt.savefig('figures/setpoint.pdf')

    
    return vali_data, fit_data, res, ens_std, vali_resid
        if res.pvalues[var] > 0.03:
            print res.pvalues[var], var
            return False, var
        elif correct_signs[var]*res.params[var] < 0:
            return False, var
                
    if np.abs(res.params['prod24h_before']-1) > 0.05:
        print "WARNING: prod24h_before is weighted with: " + str(res.params['prod24h_before'])
    if res.resid.mean()>5:
        print "WARNING: Bias in model: " + res.resid.mean()
    return True, None
    

ts_start = dt.datetime(2015, 10, 17, 1)
ts_end = dt.datetime(2016,1,16,0)
timesteps = gen_hourly_timesteps(ts_start, ts_end)
df = pd.DataFrame()

df['prod'] = sq.fetch_production(ts_start, ts_end)
df['prod24h_before'] = sq.fetch_production(ts_start + dt.timedelta(days=-1), \
                                            ts_end + dt.timedelta(days=-1))
                                            
for v in ['Tout', 'vWind', 'sunRad', 'hum']:
    df[v] = sq.fetch_BrabrandSydWeather(v, ts_start, ts_end)
    df[v + '24h_before'] = sq.fetch_BrabrandSydWeather(v, ts_start + dt.timedelta(days=-1), \
                                            ts_end + dt.timedelta(days=-1))
    df[v + '24hdiff'] = df[v] - df[v + '24h_before']
                                            
cols = ['Tout24hdiff', 'vWind24hdiff', 'prod24h_before', 'sunRad24hdiff', 'hum24hdiff']
good_fit = False
while not good_fit:
Exemple #48
0
def load_cons_model_dfs(df):
    # Takes the data frame with the already calculated consumptions
    #%%
    fit_ts = ens.gen_hourly_timesteps(dt.datetime(2015, 12, 17, 1),
                                      dt.datetime(2016, 1, 15, 0))
    vali_ts = ens.gen_hourly_timesteps(dt.datetime(2016, 1, 20, 1),
                                       dt.datetime(2016, 2, 5, 0))
    test_ts = ens.gen_hourly_timesteps(dt.datetime(2016, 2, 5, 1),
                                       dt.datetime(2016, 3, 1, 0))

    weathervars = ['Tout', 'vWind', 'sunRad', 'hum']

    fit_data = pd.DataFrame()
    vali_data = pd.DataFrame()
    test_data = pd.DataFrame()

    fit_data['cons'] = np.array(df.ix[fit_ts[0]:fit_ts[-1]]['cons'])
    vali_data['cons'] = np.array(
        df.ix[vali_ts[0]:vali_ts[-1]]
        ['cons'])  # the casting is a hack to avoid the index f*****g up
    test_data['cons'] = np.array(df.ix[test_ts[0]:test_ts[-1]]['cons'])

    fit_data['cons24hbefore'] = np.array(
        df.ix[fit_ts[0] + dt.timedelta(days=-1):fit_ts[-1] +
              dt.timedelta(days=-1)]['cons'])
    vali_data['cons24hbefore'] = np.array(
        df.ix[vali_ts[0] + dt.timedelta(days=-1):vali_ts[-1] +
              dt.timedelta(days=-1)]
        ['cons'])  # the casting is a hack to avoid the index f*****g up
    test_data['cons24hbefore'] = np.array(
        df.ix[test_ts[0] + dt.timedelta(days=-1):test_ts[-1] +
              dt.timedelta(days=-1)]['cons'])

    for v in weathervars:
        fit_data['%s24hdiff'%v] = ens.load_ens_timeseries_as_df(\
                                    ts_start=fit_ts[0],\
                                    ts_end=fit_ts[-1], \
                                    weathervars=[v]).mean(axis=1) \
                                  - ens.load_ens_timeseries_as_df(\
                                    ts_start=fit_ts[0]+dt.timedelta(days=-1),\
                                    ts_end=fit_ts[-1]+dt.timedelta(days=-1), \
                                    weathervars=[v]).mean(axis=1)
        vali_data['%s24hdiff'%v] = ens.load_ens_timeseries_as_df(\
                                    ts_start=vali_ts[0],\
                                    ts_end=vali_ts[-1], \
                                    weathervars=[v]).mean(axis=1) \
                                  - ens.load_ens_timeseries_as_df(\
                                    ts_start=vali_ts[0]+dt.timedelta(days=-1),\
                                    ts_end=vali_ts[-1]+dt.timedelta(days=-1), \
                                    weathervars=[v]).mean(axis=1)
        test_data['%s24hdiff'%v] = ens.load_ens_timeseries_as_df(\
                                    ts_start=test_ts[0],\
                                    ts_end=test_ts[-1], \
                                    weathervars=[v]).mean(axis=1) \
                                  - ens.load_ens_timeseries_as_df(\
                                    ts_start=test_ts[0]+dt.timedelta(days=-1),\
                                    ts_end=test_ts[-1]+dt.timedelta(days=-1), \
                                    weathervars=[v]).mean(axis=1)

    for d, t in zip([fit_data, vali_data, test_data],
                    [fit_ts, vali_ts, test_ts]):
        d.set_index(pd.DatetimeIndex(t), inplace=True)

    all_data = pd.concat([fit_data, vali_data, test_data])

    return fit_data, vali_data, test_data, all_data