ts_end=test_ts[-1], \ weathervars=[v]).mean(axis=1) \ - ens.load_ens_timeseries_as_df(\ ts_start=test_ts[0]+dt.timedelta(days=-1),\ ts_end=test_ts[-1]+dt.timedelta(days=-1), \ weathervars=[v]).mean(axis=1) #%% all_data = pd.concat([fit_data, vali_data, test_data]) no_blind_data = pd.concat([fit_data, vali_data]) corr = no_blind_data.corr() #%% Try fitting all combinations all_combs = gen_all_combinations(all_data.drop(['prod', 'prod24h_before'], axis=1).columns) for c in all_combs: c.insert(0,'prod24h_before') all_combs.insert(0, ['prod24h_before']) check_AIC=False if check_AIC: for c in fit_data.columns: fit_data[c] = (fit_data[c]-fit_data[c].mean())/fit_data[c].std() fit_y = fit_data['prod'] results = [] for columns in all_combs: X = fit_data[columns] res = mlin_regression(fit_y,X, add_const=False) results.append(res)
ts_start=test_ts[0],\ ts_end=test_ts[-1], \ weathervars=[v]).mean(axis=1) \ - ens.load_ens_timeseries_as_df(\ ts_start=test_ts[0]+dt.timedelta(days=-1),\ ts_end=test_ts[-1]+dt.timedelta(days=-1), \ weathervars=[v]).mean(axis=1) #%% all_data = pd.concat([fit_data, vali_data, test_data]) no_blind_data = pd.concat([fit_data, vali_data]) corr = no_blind_data.corr() #%% Try fitting all combinations all_combs = gen_all_combinations( all_data.drop(['prod', 'prod24h_before'], axis=1).columns) for c in all_combs: c.insert(0, 'prod24h_before') all_combs.insert(0, ['prod24h_before']) check_AIC = False if check_AIC: for c in fit_data.columns: fit_data[c] = (fit_data[c] - fit_data[c].mean()) / fit_data[c].std() fit_y = fit_data['prod'] results = [] for columns in all_combs: X = fit_data[columns] res = mlin_regression(fit_y, X, add_const=False) results.append(res)
X.to_pickle('48h60h168h_lagged_X.pkl') y.to_pickle('prod_to_gowith.pkl') #%% lr = linear_model.LinearRegression(fit_intercept=False) predicted = cross_val_predict(lr, X, y, cv=25) plt.figure() plt.plot(y) plt.plot(predicted, 'r') sns.jointplot(pd.Series(predicted), y) score = cross_val_score(lr, X, y, cv=25, scoring='mean_absolute_error') lr.fit(X, y) var_combs = gen_all_combinations(X.columns) #%% var_combs = [ v for v in var_combs if 'Tout48hdiff' in v and 'prod48hbefore' in v ] # this makes for fewer combinations try_all_combs = True if try_all_combs: maes = np.zeros(len(var_combs)) rmses = np.zeros(len(var_combs)) for v, i in zip(var_combs, range(len(var_combs))): predicted = cross_val_predict(lr, all_data[v], y, cv=10) maes[i] = mae(predicted - y) rmses[i] = rmse(predicted - y) #%% EO3 benchmark
#%% lr = linear_model.LinearRegression(fit_intercept=False) predicted = cross_val_predict(lr, X, y, cv=25) plt.figure() plt.plot(y) plt.plot(predicted, 'r') sns.jointplot(pd.Series(predicted), y) score = cross_val_score(lr, X, y, cv=25, scoring='mean_absolute_error' ) lr.fit(X,y) var_combs = gen_all_combinations(X.columns) #%% var_combs = [v for v in var_combs if 'Tout48hdiff' in v and 'prod48hbefore' in v] # this makes for fewer combinations try_all_combs=True if try_all_combs: maes = np.zeros(len(var_combs)) rmses = np.zeros(len(var_combs)) for v, i in zip(var_combs, range(len(var_combs))): predicted = cross_val_predict(lr, all_data[v], y, cv=10) maes[i] = mae(predicted-y) rmses[i] = rmse(predicted-y)