total_power = np.mean(total_power[:len(total_power) - len(total_power) % 6].reshape(-1, 6), axis=1, keepdims=True) # embed the signal in a 2-days matrix total_power = hankel(total_power, np.zeros((25, 1)))[:-25, :] # create feature matrix and target for the training and test sets x = total_power[:, :24] y = total_power[:, 24:] n_tr = int(len(x) * 0.8) x_tr, y_tr, x_te, y_te = [x[:n_tr, :], y[:n_tr, :], x[n_tr:, :], y[n_tr:, :]] # visual check on the first 50 samples of features and targets fig, ax = set_figure((5, 4)) y_min = np.min(y_tr[:50, :]) * 0.9 y_max = np.max(y_tr[:50, :]) * 1.1 for i in range(50): ax.cla() ax.plot(np.arange(24), x_tr[i, :], label='features') ax.scatter(25, y_tr[i, :], label='multivariate targets', marker='.') ax.set_xlabel('step ahead [h]') ax.set_ylabel('P [kW]') ax.legend(loc='upper right') ax.set_ylim(y_min, y_max) plt.pause(1e-6) plt.close('all') # --------------------------- Set up an MBT for quantiles prediction and train it -------------------------------------
def fit(self, x, y, do_plot=False, x_lr=None): """ Fits an MBT using the features specified in the matrix :math:`x\\in\\mathbb{R}^{n_{obs} \\times n_{f}}`, in order to predict the targets in the matrix :math:`y\\in\\mathbb{R}^{n_{obs} \\times n_{t}}`, where :math:`n_{obs}` is the number of observations, :math:`n_{f}` the number of features and :math:`n_{t}` the dimension of the target. :param x: feature matrix, np.ndarray. :param y: target matrix, np.ndarray. :param x_lr: features for fitting the linear response inside the leaves. This is only required if a LinearLoss is being used. """ # divide in training and validation sets (has effect only if pars['val_ratio'] was set x_tr, x_val, y_tr, y_val, x_lr_tr, x_lr_val = self._validation_split( x, y, x_lr) lowest_loss = np.inf best_iter = 0 self.trees = [] t_init = time() if do_plot: fig, ax = set_figure((5, 4)) for iter in tqdm(range(self.n_boosts)): t0 = time() tree = Tree(**self.tree_pars) if iter == 0: y_hat_val = self._fit_initial_guess(tree, y) y_hat = self._fit_initial_guess(tree, y) neg_grad, hessian = self._get_neg_grad_and_hessian_diags( tree, y_tr, y_hat, 0, x) # fit the tree tree.fit(x_tr, neg_grad, learning_rate=self.learning_rate, hessian=hessian, x_lr=x_lr_tr) # if loss function has an "exact_response" method, use it to refit the tree if self.refit: tree._refit(x_tr, y_tr - y_hat, self.learning_rate, x_lr=x_lr_tr) self.trees.append(tree) y_hat = y_hat + tree.predict(x_tr, x_lr_tr) y_hat_val = y_hat_val + tree.predict(x_val, x_lr_val) se, regularization = tree.loss.eval(y_val, y_hat_val, self.trees) terminal_leaves = len( nx.get_node_attributes(tree.g, 'response').values()) if self.verbose == 1: tqdm.write( ' Iteration: {} fitted in {:0.1e} sec, total loss: {:0.3e}, squared err: {:0.2e}, ' 'regularization: {:0.2e}, best iter: {}, terminal leaves: {}' .format(iter, time() - t0, se + regularization, se, regularization, best_iter, terminal_leaves)) loss = se + regularization if loss < lowest_loss: best_iter = iter lowest_loss = np.copy(loss) if iter - best_iter > self.early_stopping_rounds: break neg_grad, hessian = self._get_neg_grad_and_hessian_diags( tree, y_tr, y_hat, iter + 1, x) if do_plot: if type(tree.loss) in [QuantileLoss, QuadraticQuantileLoss]: ax.cla() n_q = y_hat.shape[1] n_plot = 200 colors = plt.get_cmap('plasma', int(n_q)) for fl in np.arange(np.floor(n_q / 2), dtype=int): q_low = np.squeeze(y_hat[:n_plot, fl]) q_up = np.squeeze(y_hat[:n_plot, n_q - fl - 1]) x_plot = np.arange(len(q_low)) ax.fill_between(x_plot, q_low, q_up, color=colors(fl), alpha=0.1 + 0.6 * fl / n_q, linewidth=0.0) ax.plot(y[:n_plot, :], linewidth=2, label='target') ax.legend(loc='upper right') plt.title('Quantiles on first {} samples'.format(n_plot)) else: ax.cla() ax.plot([np.min(y_tr[:, 0]), np.max(y_tr[:, 0])], [np.min(y_tr[:, 0]), np.max(y_tr[:, 0])], '--') ax.scatter(y_tr[:, 0], y_hat[:, 0], marker='.', alpha=0.2) ax.set_xlabel('observations') ax.set_ylabel('predictions') ax.set_title('Fit on first y dimension') plt.pause(0.1) print('#---------------- Model fitted in {:0.2e} min ----------------'. format((time() - t_init) / 60)) # keep trees up to best iteration self.trees = self.trees[:best_iter + 1] if do_plot: plt.close(fig) return self