def cv_fun(x_tr,y_tr,x_te,y_te): names = ['mimo','miso'] models = [MIMO(n_boosts, lgb_pars),MISO(n_boosts, lgb_pars)] y_hat = {} for i, m in enumerate(models): m.fit(x_tr, y_tr) y_hat[names[i]] = m.predict(x_te) rmse_base, mape_base = [{}, {}] for i,n in enumerate(names): rmse_base[n] = np.mean(np.mean((y_hat[n] - y_te)**2, axis=0)**0.5) mape_base[n] = np.mean(np.mean(np.abs(y_hat[n] - y_te)/(np.abs(y_te)+1e-3), axis=0)) print('{} rmse: {:0.2e}, mape: {:0.2e}'.format(n,rmse_base[n],mape_base[n])) mape, rmse = [{}, {}] for n_h in np.arange(5, 120, 5): mbt_pars = deepcopy(mbt_pars_0) mbt_pars['n_harmonics'] = n_h m = MBT(**mbt_pars) m.fit(x_tr, y_tr, do_plot=False) y_hat_mbt = m.predict(x_te) rmse[n_h] = np.mean(np.mean((y_hat_mbt - y_te)**2, axis=0)**0.5) mape[n_h] = np.mean(np.mean(np.abs(y_hat_mbt - y_te)/(np.abs(y_te)+1e-3), axis=0)) print('rmse: {:0.4e}, mape: {:0.4e}, n_h {}'.format(rmse[n_h],mape[n_h], n_h)) results = {'mape':mape, 'rmse':rmse, 'mape_mimo': mape_base['mimo'], 'mape_miso':mape_base['miso'], 'rmse_mimo': rmse_base['mimo'], 'rmse_miso':rmse_base['miso'] } return results
def cv_fun(x_tr, y_tr, x_te, y_te, x_lingb_tr, x_lingb_te): models = [] scores = {} x_tr_d, y_tr_d = np.diff(x_tr, axis=0), np.diff(y_tr, axis=0) x_te_d, y_te_d = np.diff(x_te, axis=0), np.diff(y_te, axis=0) x_lingb_tr = x_lingb_tr[0:-1, :] x_lingb_te = x_lingb_te[0:-1, :] # fit VSC canonical models.append(LinearRegression()) models.append(RidgeCV(alphas=10**np.linspace(-2, 8, 20))) models.append(MBT(**mbt_pars_lin)) models.append(MBT(**mbt_pars)) names = ['linear', 'ridge', 'mbt lin', 'mbt'] y_mean = np.ones_like(y_te_d) * np.mean(y_te_d, axis=0) for m, model in enumerate(models): print('start fitting model {}'.format(model)) if names[m] == 'mbt lin': scores[names[m]] = noise_vsc(x_lingb_tr, y_tr_d, x_tr_d, x_lingb_te, x_te_d, y_te_d, y_mean, model) else: model_fitted = model.fit(x_tr_d, y_tr_d) y_hat = model_fitted.predict(x_te_d) scores[names[m]] = get_scores(y_hat, y_te_d, y_mean) print(scores[names[m]]) plt.close('all') return scores
def cv_fun(x_tr, y_tr, x_te, y_te): names = ['mimo', 'miso'] models = [MIMO(n_boosts, lgb_pars), MISO(n_boosts, lgb_pars)] y_hat = {} for i, m in enumerate(models): m.fit(x_tr, y_tr) y_hat[names[i]] = m.predict(x_te) rmse_base, mape_base = [{}, {}] for i, n in enumerate(names): rmse_base[n] = np.mean(np.mean((y_hat[n] - y_te)**2, axis=0)**0.5) mape_base[n] = np.mean( np.mean(np.abs(y_hat[n] - y_te) / (np.abs(y_te) + 1e-3), axis=0)) print('{} rmse: {:0.2e}, mape: {:0.2e}'.format(n, rmse_base[n], mape_base[n])) m = MBT(**deepcopy(mbt_pars_0)) m.fit(x_tr, y_tr, do_plot=False) y_hat_mbt = m.predict(x_te) rmse = np.mean(np.mean((y_hat_mbt - y_te)**2, axis=0)**0.5) mape = np.mean( np.mean(np.abs(y_hat_mbt - y_te) / (np.abs(y_te) + 1e-3), axis=0)) print('MAPE w.r.t. {}: {:0.2e}'.format(names[0], mape / mape_base[names[0]])) results = { 'mape': mape, 'rmse': rmse, 'mape_mimo': mape_base['mimo'], 'mape_miso': mape_base['miso'], 'rmse_mimo': rmse_base['mimo'], 'rmse_miso': rmse_base['miso'] } return results
def test_MSE_loss(self): n_tr = self.n_tr mbt = MBT() mbt.fit(self.x[:n_tr, :], self.y[:n_tr, :]) y_hat = mbt.predict(self.x[n_tr:, :]) plt.figure() plt.scatter(self.y[n_tr:, 0], y_hat[:, 0]) plt.scatter(self.y[n_tr:, 1], y_hat[:, 1]) x_l = np.min(self.y[n_tr:]) x_u = np.max(self.y[n_tr:]) plt.plot([x_l, x_u], [x_l, x_u]) plt.pause(1) plt.close('all') assert len(mbt.trees) > 1
def cv_function(x_tr,y_tr,x_te,y_te,pars,do_refit): n_t = y_tr.shape[1] q_hat, q_hat_mgb = [[],[]] pars['refit'] = do_refit for t in range(n_t): # fit LGBM model q_hat_t = [] for alpha in alphas: lgb_pars['alpha'] = alpha m = MISO(n_boost, lgb_pars) m.fit(x_tr, y_tr[:,[t]]) q_hat_t.append(m.predict(x_te)) q_hat_t = np.hstack(q_hat_t) q_hat.append(q_hat_t) # fit MBT model m = MBT(**pars) m.fit(x_tr, y_tr[:, [t]], do_plot=False) q_hat_t = m.predict(x_te) q_hat_mgb.append(q_hat_t) results_mimo_t = quantile_scores(np.expand_dims(q_hat[-1],2), y_te[:,[t]], alphas) results_mgb_t = quantile_scores(np.expand_dims(q_hat_mgb[-1],2), y_te[:, [t]], alphas) print('crsp mimo {:0.2e}, mbg {:0.2e}'.format(results_mimo_t['crps_mean'],results_mgb_t['crps_mean'])) q_hat = np.dstack(q_hat) q_hat_mgb = np.dstack(q_hat_mgb) results_mimo = quantile_scores(q_hat, y_te, alphas) results_mgb = quantile_scores(q_hat_mgb, y_te, alphas) results = {'mimo':results_mimo, 'mgb':results_mgb} plt.close('all') return results
def test_MBT_instantiations_own(self): mbt_pars = { "early_stopping_rounds": 2, "n_boosts": 30, "do_refit": True } m = MBT(**mbt_pars) assert True
def noise_vsc(x_lingb_tr, y_tr_d, x_tr_d, x_lingb_te, x_te_d, y_te_d, y_mean, model): x_tree_tr = x_lingb_tr x_tree_te = x_lingb_te model = MBT( **{ 'n_boosts': model.n_boosts, 'early_stopping_rounds': model.early_stopping_rounds, **model.tree_pars }) model_fitted = model.fit(x_tree_tr, y_tr_d, x_lr=x_tr_d, do_plot=True) y_hat = model_fitted.predict(x_tree_te, x_lr=x_te_d) sc = get_scores(y_hat, y_te_d, y_mean) plt.figure() plt.scatter(y_te_d, y_hat, marker='.', alpha=0.2) plt.pause(0.1) print(sc) scores_noise = {} x_scores = {'noise:{}'.format(k): {} for k in np.arange(10)} for k, k_noise in enumerate(range(10)): # add noise only to the first x_tree_te variable (the PCC power) The other are deterministic noise = [] for j in range(x_tree_te.shape[1] - 2): noise_j = truncnorm_rvs_recursive(x_te_d.shape[0], 3).reshape( -1, 1) * (np.mean(np.abs(x_tree_te[0, :])) * (k_noise / 50)) noise.append(noise_j.reshape(-1, 1)) noise = np.hstack(noise) x_hat = x_tree_te + np.hstack([noise, np.zeros((len(noise), 2))]) x_scores['noise:{}'.format(k)]['mape'] = np.mean( np.abs(x_hat[:, 0] - x_tree_te[:, 0]) / np.abs(x_tree_te[:, 0] + 1e-2), axis=0) x_scores['noise:{}'.format(k)]['rmse'] = np.mean( (x_hat[:, 0] - x_tree_te[:, 0])**2, axis=0)**0.5 y_hat_noise = model_fitted.predict(x_hat, x_lr=x_te_d) sc_n = get_scores(y_hat_noise, y_te_d, y_mean) scores_noise['noise:{}'.format(k)] = sc_n all_scores = { **sc, 'scores_noise_partial_cv': scores_noise, 'x_scores': x_scores } return all_scores
def test_MBT_instantiations_tree_loss_and_own(self): tree_pars = {"n_q": 4, "min_leaf": 34} mbt_pars = { "early_stopping_rounds": 5, "n_boosts": 32, "do_refit": True } loss_pars = { "lambda_weights": 0.1, "lambda_leaves": 0.1, "loss_type": "linear-regression" } pars = {**tree_pars, **mbt_pars, **loss_pars} m = MBT(**pars) assert True
for i in range(50): ax.cla() ax.plot(np.arange(24), x_tr[i, :], label='features') ax.scatter(25, y_tr[i, :], label='multivariate targets', marker='.') ax.set_xlabel('step ahead [h]') ax.set_ylabel('P [kW]') ax.legend(loc='upper right') ax.set_ylim(y_min, y_max) plt.pause(1e-6) plt.close('all') # --------------------------- Set up an MBT for quantiles prediction and train it ------------------------------------- alphas = np.linspace(0.05, 0.95, 7) m = MBT(loss_type='quantile', alphas=alphas, n_boosts=40, min_leaf=300, lambda_weights=1e-3).fit(x_tr, y_tr, do_plot=True) # --------------------------- Predict and plot ------------------------------------------------------------------------ y_hat = m.predict(x_te) fig, ax = set_figure((5, 4)) y_min = np.min(y_tr[:50, :]) * 0.9 y_max = np.max(y_tr[:50, :]) * 1.1 n_q = y_hat.shape[1] n_sa = y_te.shape[1] n_plot = 300 colors = plt.get_cmap('plasma', int(n_q)) for fl in np.arange(np.floor(n_q / 2), dtype=int):
print('start fitting') pars = { 'n_q': 10, 'min_leaf': 400, 'lambda_leaves': 0.1, 'lambda_weights': 1, 'early_stopping_rounds': 3, 'n_boosts': 100, 'loss_type': 'latent_variable', 'S': np.vstack([A, np.eye(A.shape[1])]), 'precision': np.eye(y_hat_tr.shape[1]) } # predict the MGB model on y_hat_te m = MBT(**pars) m.fit(np.hstack([y_hat_tr, err_tr, x_tr[0][:, -2:]]), y_tr - y_hat_tr[:, A.shape[0]:] @ pars['S'].T, do_plot=True) y_rec_mgb = m.predict(np.hstack([ y_hat_te, err_te, x_te[0][:, -2:] ])) + y_hat_te[:, A.shape[0]:] @ pars['S'].T y_hat_te_bu = y_hat_te[:, A.shape[0]:] @ pars['S'].T scores_baseline = pd.concat( [scores_baseline, hier_scores(y_te, y_hat_te, sa)], axis=0) scores_bu = pd.concat( [scores_bu, hier_scores(y_te, y_hat_te_bu, sa)], axis=0) scores_rec = pd.concat( [scores_rec, hier_scores(y_te, y_rec, sa)], axis=0)
for i in range(50): ax.cla() ax.plot(np.arange(24), x_tr[i,:], label='features') ax.plot(np.arange(24) + 24, y_tr[i, :], label='multivariate targets') ax.set_xlabel('step ahead [h]') ax.set_ylabel('P [kW]') ax.legend(loc='upper right') ax.set_ylim(y_min, y_max) plt.pause(1e-2) plt.close('all') # --------------------------- Set up an MBT with smooth regularization and fit it ------------------------------------- print('#'*20 + ' Fitting MBT with smooth loss ' + '#'*20) m_sm = MBT(loss_type='time_smoother', lambda_smooth=1, n_boosts=30, min_leaf=300, lambda_weights=1e-3).fit(x_tr, y_tr, do_plot=True) y_hat_sm = m_sm.predict(x_te) # --------------------------- Set up 2 MBT with Fourier response ------------------------------------------------------ print('#'*20 + ' Fitting MBT with Fourier loss and 3 harmonics ' + '#'*20) m_fou_3 = MBT(loss_type='fourier', n_harmonics=3, n_boosts=30, min_leaf=300, lambda_weights=1e-3).fit(x_tr, y_tr, do_plot=True) y_hat_fou_3 = m_fou_3.predict(x_te) print('#'*20 + ' Fitting MBT with Fourier loss and 5 harmonics ' + '#'*20) m_fou_5 = MBT(loss_type='fourier', n_harmonics=5, n_boosts=30, min_leaf=300, lambda_weights=1e-3).fit(x_tr, y_tr, do_plot=True) y_hat_fou_5 = m_fou_5.predict(x_te)
def test_MBT_instantiations_tree(self): tree_pars = {"n_q": 4, "min_leaf": 322} m = MBT(**tree_pars) assert True
for i in range(50): ax.cla() ax.plot(np.arange(24), x_tr[i, :], label='features') ax.plot(np.arange(24) + 24, y_tr[i, :], label='multivariate targets') ax.set_xlabel('step ahead [h]') ax.set_ylabel('P [kW]') ax.legend(loc='upper right') ax.set_ylim(y_min, y_max) plt.pause(1e-6) plt.close('all') # --------------------------- Set up an MBT and fit it -------------------------------------------------------------- print('#' * 20 + ' Fitting MBT with mse loss ' + '#' * 20) m = MBT(n_boosts=30, min_leaf=100, lambda_weights=1e-3).fit(x_tr, y_tr, do_plot=True) y_hat = m.predict(x_te) # --------------------------- Set up 24 MISO LightGBM and fit it ------------------------------------------------------ print('#' * 20 + ' Fitting 24 MISO LightGBMs ' + '#' * 20) m_lgb = ut.LightGBMMISO(30).fit(x_tr, y_tr) y_hat_lgb = m_lgb.predict(x_te) # --------------------------- Set up a linear-MBT and fit it ---------------------------------------------------------- # The MBT chooses splits based on the previous day mean, min, max, first and last values. It then fits a linear # model inside the leaves print('#' * 20 + ' Fitting a linear-response MBT ' + '#' * 20) x_build = np.hstack([