Exemple #1
0
total_power = np.mean(total_power[:len(total_power) -
                                  len(total_power) % 6].reshape(-1, 6),
                      axis=1,
                      keepdims=True)

# embed the signal in a 2-days matrix
total_power = hankel(total_power, np.zeros((25, 1)))[:-25, :]

# create feature matrix and target for the training and test sets
x = total_power[:, :24]
y = total_power[:, 24:]
n_tr = int(len(x) * 0.8)
x_tr, y_tr, x_te, y_te = [x[:n_tr, :], y[:n_tr, :], x[n_tr:, :], y[n_tr:, :]]

# visual check on the first 50 samples of features and targets
fig, ax = set_figure((5, 4))
y_min = np.min(y_tr[:50, :]) * 0.9
y_max = np.max(y_tr[:50, :]) * 1.1

for i in range(50):
    ax.cla()
    ax.plot(np.arange(24), x_tr[i, :], label='features')
    ax.scatter(25, y_tr[i, :], label='multivariate targets', marker='.')
    ax.set_xlabel('step ahead [h]')
    ax.set_ylabel('P [kW]')
    ax.legend(loc='upper right')
    ax.set_ylim(y_min, y_max)
    plt.pause(1e-6)
plt.close('all')

# --------------------------- Set up an MBT for quantiles prediction and train it -------------------------------------
Exemple #2
0
    def fit(self, x, y, do_plot=False, x_lr=None):
        """
         Fits an MBT using the features specified in the matrix :math:`x\\in\\mathbb{R}^{n_{obs} \\times n_{f}}`, in
         order to predict the targets in the matrix :math:`y\\in\\mathbb{R}^{n_{obs} \\times n_{t}}`,
         where :math:`n_{obs}` is the number of observations, :math:`n_{f}` the number of features and :math:`n_{t}`
         the dimension of the target.

        :param x: feature matrix, np.ndarray.
        :param y: target matrix, np.ndarray.
        :param x_lr: features for fitting the linear response inside the leaves. This is only required if a LinearLoss
                      is being used.
        """
        # divide in training and validation sets (has effect only if pars['val_ratio'] was set
        x_tr, x_val, y_tr, y_val, x_lr_tr, x_lr_val = self._validation_split(
            x, y, x_lr)
        lowest_loss = np.inf
        best_iter = 0
        self.trees = []
        t_init = time()
        if do_plot:
            fig, ax = set_figure((5, 4))

        for iter in tqdm(range(self.n_boosts)):
            t0 = time()
            tree = Tree(**self.tree_pars)
            if iter == 0:
                y_hat_val = self._fit_initial_guess(tree, y)
                y_hat = self._fit_initial_guess(tree, y)
                neg_grad, hessian = self._get_neg_grad_and_hessian_diags(
                    tree, y_tr, y_hat, 0, x)

            # fit the tree
            tree.fit(x_tr,
                     neg_grad,
                     learning_rate=self.learning_rate,
                     hessian=hessian,
                     x_lr=x_lr_tr)

            # if loss function has an "exact_response" method, use it to refit the tree
            if self.refit:
                tree._refit(x_tr,
                            y_tr - y_hat,
                            self.learning_rate,
                            x_lr=x_lr_tr)

            self.trees.append(tree)
            y_hat = y_hat + tree.predict(x_tr, x_lr_tr)
            y_hat_val = y_hat_val + tree.predict(x_val, x_lr_val)
            se, regularization = tree.loss.eval(y_val, y_hat_val, self.trees)
            terminal_leaves = len(
                nx.get_node_attributes(tree.g, 'response').values())
            if self.verbose == 1:
                tqdm.write(
                    '   Iteration: {} fitted in {:0.1e} sec, total loss: {:0.3e}, squared err: {:0.2e},  '
                    'regularization: {:0.2e}, best iter: {}, terminal leaves: {}'
                    .format(iter,
                            time() - t0, se + regularization, se,
                            regularization, best_iter, terminal_leaves))
            loss = se + regularization

            if loss < lowest_loss:
                best_iter = iter
                lowest_loss = np.copy(loss)
            if iter - best_iter > self.early_stopping_rounds:
                break

            neg_grad, hessian = self._get_neg_grad_and_hessian_diags(
                tree, y_tr, y_hat, iter + 1, x)
            if do_plot:
                if type(tree.loss) in [QuantileLoss, QuadraticQuantileLoss]:
                    ax.cla()
                    n_q = y_hat.shape[1]
                    n_plot = 200
                    colors = plt.get_cmap('plasma', int(n_q))
                    for fl in np.arange(np.floor(n_q / 2), dtype=int):
                        q_low = np.squeeze(y_hat[:n_plot, fl])
                        q_up = np.squeeze(y_hat[:n_plot, n_q - fl - 1])
                        x_plot = np.arange(len(q_low))
                        ax.fill_between(x_plot,
                                        q_low,
                                        q_up,
                                        color=colors(fl),
                                        alpha=0.1 + 0.6 * fl / n_q,
                                        linewidth=0.0)

                    ax.plot(y[:n_plot, :], linewidth=2, label='target')
                    ax.legend(loc='upper right')
                    plt.title('Quantiles on first {} samples'.format(n_plot))
                else:
                    ax.cla()
                    ax.plot([np.min(y_tr[:, 0]),
                             np.max(y_tr[:, 0])],
                            [np.min(y_tr[:, 0]),
                             np.max(y_tr[:, 0])], '--')
                    ax.scatter(y_tr[:, 0], y_hat[:, 0], marker='.', alpha=0.2)
                    ax.set_xlabel('observations')
                    ax.set_ylabel('predictions')
                    ax.set_title('Fit on first y dimension')

                plt.pause(0.1)

        print('#---------------- Model fitted in {:0.2e} min ----------------'.
              format((time() - t_init) / 60))

        # keep trees up to best iteration
        self.trees = self.trees[:best_iter + 1]
        if do_plot:
            plt.close(fig)

        return self