Ejemplo n.º 1
0
    def predict(self,
                x_test: np.ndarray,
                return_individual_predictions: bool = False):
        """
        Predicts mean and variance for the given test point

        :param x_test: test datapoint
        :param return_individual_predictions: if True also the predictions of the individual models are returned
        :return: mean and variance
        """

        x_test_ = np.asarray(x_test)

        if self.do_normalize_input:
            x_test_, *_ = self.normalize_input(x_test_, self.x_mean,
                                               self.x_std)

        def network_predict(x_test_, weights):
            with torch.no_grad():
                self.network_weights = weights
                if self.use_double_precision:
                    return self.model(
                        torch.from_numpy(x_test_).double()).numpy()
                else:
                    return self.model(
                        torch.from_numpy(x_test_).float()).numpy()

        logging.debug("Predicting with %d networks." %
                      len(self.sampled_weights))
        network_outputs = np.array([
            network_predict(x_test_, weights=weights)
            for weights in self.sampled_weights
        ])

        mean_prediction = np.mean(network_outputs[:, :, 0], axis=0)
        # variance_prediction = np.mean((network_outputs[:, :, 0] - mean_prediction) ** 2, axis=0)
        # Total variance
        variance_prediction = np.mean(
            (network_outputs[:, :, 0] - mean_prediction)**2 +
            np.exp(network_outputs[:, :, 1]),
            axis=0)

        if self.do_normalize_output:

            mean_prediction = zero_mean_unit_var_denormalization(
                mean_prediction, self.y_mean, self.y_std)
            variance_prediction *= self.y_std**2

            for i in range(len(network_outputs)):
                network_outputs[i] = zero_mean_unit_var_denormalization(
                    network_outputs[i], self.y_mean, self.y_std)

        if return_individual_predictions:
            return mean_prediction, variance_prediction, network_outputs[:, :,
                                                                         0]

        return mean_prediction, variance_prediction
Ejemplo n.º 2
0
    def predict_single(self, x_test: np.ndarray, sample_index: int):
        """
        Compute the prediction of a single weight sample

        :param x_test: test datapoint
        :param sample_index: specifies the index of the weight sample
        :return: mean and variance of the neural network
        """
        x_test_ = np.asarray(x_test)

        if self.do_normalize_input:
            x_test_, *_ = self.normalize_input(x_test_, self.x_mean,
                                               self.x_std)

        def network_predict(x_test_, weights):
            with torch.no_grad():
                self.network_weights = weights
                if self.use_double_precision:
                    return self.model(
                        torch.from_numpy(x_test_).double()).numpy()
                else:
                    return self.model(
                        torch.from_numpy(x_test_).float()).numpy()

        logging.debug("Predicting with %d networks." %
                      len(self.sampled_weights))
        function_value = np.array(
            network_predict(x_test_,
                            weights=self.sampled_weights[sample_index]))

        if self.do_normalize_output:
            function_value = zero_mean_unit_var_denormalization(
                function_value, self.y_mean, self.y_std)
        return function_value
Ejemplo n.º 3
0
    def predict(self, X_test):
        r"""
        Returns the predictive mean and variance of the objective function at
        the given test points.

        Parameters
        ----------
        X_test: np.ndarray (N, D)
            N input test points

        Returns
        ----------
        np.array(N,)
            predictive mean
        np.array(N,)
            predictive variance

        """

        # Normalize inputs
        if self.normalize_input:
            X_, _, _ = zero_mean_unit_var_normalization(
                X_test, self.X_mean, self.X_std)
        else:
            X_ = X_test

        # Get features from the net
        if self.gpu:
            network = self.network.cpu()
        else:
            network = self.network

        theta = network.basis_funcs(torch.Tensor(X_)).data.numpy()

        # Marginalise predictions over hyperparameters of the BLR
        mu = np.zeros([len(self.models), X_test.shape[0]])
        var = np.zeros([len(self.models), X_test.shape[0]])

        for i, m in enumerate(self.models):
            mu[i], var[i] = m.predict(theta)

        # See the algorithm runtime prediction paper by Hutter et al
        # for the derivation of the total variance
        m = np.mean(mu, axis=0)
        v = np.mean(mu**2 + var, axis=0) - m**2

        # Clip negative variances and set them to the smallest
        # positive float value
        if v.shape[0] == 1:
            v = np.clip(v, np.finfo(v.dtype).eps, np.inf)
        else:
            v = np.clip(v, np.finfo(v.dtype).eps, np.inf)
            v[np.where((v < np.finfo(v.dtype).eps)
                       & (v > -np.finfo(v.dtype).eps))] = 0

        if self.normalize_output:
            m = zero_mean_unit_var_denormalization(m, self.y_mean, self.y_std)
            v *= self.y_std**2

        return m, v
Ejemplo n.º 4
0
    def predict(self, X_test):
        r"""
        Returns the predictive mean and variance of the objective function at
        the given test points.

        Parameters
        ----------
        X_test: np.ndarray (N, D)
            N input test points

        Returns
        ----------
        np.array(N,)
            predictive mean
        np.array(N,)
            predictive variance

        """
        # Normalize inputs
        if self.normalize_input:
            X_, _, _ = zero_mean_unit_var_normalization(
                X_test, self.X_mean, self.X_std)
        else:
            X_ = X_test

        # Perform MC dropout
        model = self.model
        T = self.T

        # Yt_hat: T x N x 1
        Yt_hat = np.array(
            [model(torch.Tensor(X_)).data.numpy() for _ in range(T)])
        # Yt_hat = Yt_hat * self.std_y_train + self.mean_y_train  # T x N TODO check with Adam

        MC_pred_mean = np.mean(Yt_hat, 0)  # N x 1

        Second_moment = np.mean(Yt_hat**2, 0)  # N x 1
        # MC_pred_var = Second_moment + np.eye(Yt_hat.shape[-1]) / self.tau - (MC_pred_mean ** 2)
        MC_pred_var = Second_moment - (MC_pred_mean**2)

        m = MC_pred_mean.flatten()

        if MC_pred_var.shape[0] == 1:
            v = np.clip(MC_pred_var, np.finfo(MC_pred_var.dtype).eps, np.inf)
        else:
            v = np.clip(MC_pred_var, np.finfo(MC_pred_var.dtype).eps, np.inf)
            v[np.where((v < np.finfo(v.dtype).eps)
                       & (v > -np.finfo(v.dtype).eps))] = 0

        if self.normalize_output:
            m = zero_mean_unit_var_denormalization(m, self.y_mean, self.y_std)
            v *= self.y_std**2

        m = m.flatten()
        v = v.flatten()

        return m, v
Ejemplo n.º 5
0
    def get_incumbent(self):
        """
        Returns the best observed point and its function value

        Returns
        ----------
        incumbent: ndarray (D,)
            current incumbent
        incumbent_value: ndarray (N,)
            the observed value of the incumbent
        """

        inc, inc_value = super(LCCD, self).get_incumbent()
        if self.normalize_input:
            inc = zero_mean_unit_var_denormalization(inc, self.X_mean, self.X_std)

        if self.normalize_output:
            inc_value = zero_mean_unit_var_denormalization(inc_value, self.y_mean, self.y_std)

        return inc, inc_value
Ejemplo n.º 6
0
    def train(self,
              x_train: np.ndarray,
              y_train: np.ndarray,
              num_steps: int = 13000,
              keep_every: int = 100,
              num_burn_in_steps: int = 3000,
              lr: float = 1e-2,
              batch_size=20,
              epsilon: float = 1e-10,
              mdecay: float = 0.05,
              continue_training: bool = False,
              verbose: bool = False,
              **kwargs):
        """
        Train a BNN using input datapoints `x_train` with corresponding targets `y_train`.

        :param x_train: input training datapoints.
        :param y_train: input training targets.
        :param num_steps: Number of sampling steps to perform after burn-in is finished.
            In total, `num_steps // keep_every` network weights will be sampled.
        :param keep_every: Number of sampling steps (after burn-in) to perform before keeping a sample.
            In total, `num_steps // keep_every` network weights will be sampled.
        :param num_burn_in_steps: Number of burn-in steps to perform.
            This value is passed to the given `optimizer` if it supports special
            burn-in specific behavior.
            Networks sampled during burn-in are discarded.
        :param lr: learning rate
        :param batch_size: batch size
        :param epsilon: epsilon for numerical stability
        :param mdecay: momemtum decay
        :param continue_training: defines whether we want to continue from the last training run
        :param verbose: verbose output
        """
        logging.debug("Training started.")
        start_time = time.time()

        num_datapoints, input_dimensionality = x_train.shape
        logging.debug("Processing %d training datapoints "
                      " with %d dimensions each." %
                      (num_datapoints, input_dimensionality))
        assert batch_size >= 1, "Invalid batch size. Batches must contain at least a single sample."
        assert len(y_train.shape) == 1 or (
            len(y_train.shape) == 2 and y_train.shape[1]
            == 1), "Targets need to be in vector format, i.e (N,) or (N,1)"

        if x_train.shape[0] < batch_size:
            logging.warning(
                "Not enough datapoints to form a batch. Use all datapoints in each batch"
            )
            batch_size = x_train.shape[0]

        self.X = x_train
        if len(y_train.shape) == 2:
            self.y = y_train[:, 0]
        else:
            self.y = y_train

        if self.do_normalize_input:
            logging.debug("Normalizing training datapoints to "
                          " zero mean and unit variance.")
            x_train_, self.x_mean, self.x_std = self.normalize_input(x_train)
            if self.use_double_precision:
                x_train_ = torch.from_numpy(x_train_).double()
            else:
                x_train_ = torch.from_numpy(x_train_).float()
        else:
            if self.use_double_precision:
                x_train_ = torch.from_numpy(x_train).double()
            else:
                x_train_ = torch.from_numpy(x_train).float()

        if self.do_normalize_output:
            logging.debug(
                "Normalizing training labels to zero mean and unit variance.")
            y_train_, self.y_mean, self.y_std = self.normalize_output(self.y)

            if self.use_double_precision:
                y_train_ = torch.from_numpy(y_train_).double()
            else:
                y_train_ = torch.from_numpy(y_train_).float()
        else:
            if self.use_double_precision:
                y_train_ = torch.from_numpy(y_train).double()
            else:
                y_train_ = torch.from_numpy(y_train).float()

        if self.use_double_precision:
            dtype = np.float64
        else:
            dtype = np.float32

        if not continue_training:
            logging.debug("Clearing list of sampled weights.")

            self.sampled_weights.clear()
            if self.use_double_precision:
                self.model = self.get_network(n_curves=num_datapoints).double()
            else:
                self.model = self.get_network(n_curves=num_datapoints).float()

            if self.sampling_method == "adaptive_sghmc":
                self.sampler = AdaptiveSGHMC(
                    self.model.parameters(),
                    scale_grad=dtype(num_datapoints),
                    num_burn_in_steps=num_burn_in_steps,
                    lr=dtype(lr),
                    mdecay=dtype(mdecay),
                    epsilon=dtype(epsilon))
            elif self.sampling_method == "sgld":
                self.sampler = SGLD(self.model.parameters(),
                                    lr=dtype(lr),
                                    scale_grad=num_datapoints)
            elif self.sampling_method == "preconditioned_sgld":
                self.sampler = PreconditionedSGLD(
                    self.model.parameters(),
                    lr=dtype(lr),
                    num_train_points=num_datapoints)
            elif self.sampling_method == "sghmc":
                self.sampler = SGHMC(self.model.parameters(),
                                     scale_grad=dtype(num_datapoints),
                                     mdecay=dtype(mdecay),
                                     lr=dtype(lr))

        data_loader = data_utils.DataLoader(data_utils.TensorDataset(
            x_train_, y_train_),
                                            batch_size=batch_size,
                                            shuffle=True)
        train_loader = infinite_dataloader(data_loader)
        batch_generator = islice(enumerate(train_loader), num_steps)

        for step, (x_batch, y_batch) in batch_generator:
            # print(step, (step - num_burn_in_steps) % keep_every, keep_every, num_burn_in_steps, flush=True)
            self.sampler.zero_grad()
            loss = self.likelihood_function(input=self.model(x_batch),
                                            target=y_batch)
            # Add prior. Note the gradient is computed by: g_prior + N/n sum_i grad_theta_xi see Eq 4
            # in Welling and Whye The 2011. Because of that we divide here by N=num of datapoints since
            # in the sample we rescale the gradient by N again
            loss -= log_variance_prior(
                self.model(x_batch)[:, 1].view((-1, 1))) / num_datapoints
            loss -= weight_prior(self.model.parameters(),
                                 dtype=dtype) / num_datapoints
            loss.backward()
            self.sampler.step()
            if verbose and step > 0 and step % self.print_every_n_steps == 0:

                # compute the training performance of the ensemble
                if len(self.sampled_weights) > 1:
                    mu, var = self.predict(x_train)
                    total_nll = -np.mean(
                        norm.logpdf(y_train, loc=mu, scale=np.sqrt(var)))
                    total_mse = np.mean((y_train - mu)**2)
                # in case we do not have an ensemble we compute the performance of the last weight sample
                else:
                    f = self.model(x_train_)
                    if self.do_normalize_output:
                        mu = zero_mean_unit_var_denormalization(
                            f[:, 0], self.y_mean, self.y_std).data.numpy()
                        var = torch.exp(f[:, 1]) * self.y_std**2
                        var = var.data.numpy()
                    else:
                        mu = f[:, 0].data.numpy()
                        var = np.exp(f[:, 1].data.numpy())
                    total_nll = -np.mean(
                        norm.logpdf(y_train, loc=mu, scale=np.sqrt(var)))
                    total_mse = np.mean((y_train - mu)**2)

                t = time.time() - start_time
                if step < num_burn_in_steps:
                    print("Step {:8d} : NLL = {:11.4e} MSE = {:.4e} "
                          "Time = {:5.2f}".format(step, float(total_nll),
                                                  float(total_mse), t))

                if step > num_burn_in_steps:
                    print("Step {:8d} : NLL = {:11.4e} MSE = {:.4e} "
                          "Samples= {} Time = {:5.2f}".format(
                              step, float(total_nll), float(total_mse),
                              len(self.sampled_weights), t))

            if step > num_burn_in_steps and (
                    step - num_burn_in_steps) % keep_every == 0:
                # print('appending wts')
                weights = self.network_weights

                self.sampled_weights.append(weights)

        self.is_trained = True
Ejemplo n.º 7
0
    def predict(self, X_test):
        r"""
        Returns the predictive mean and variance of the objective function at
        the given test points.

        Parameters
        ----------
        X_test: np.ndarray (N, D)
            N input test points

        Returns
        ----------
        np.array(N,)
            predictive mean
        np.array(N,)
            predictive variance

        """
        # Normalize inputs
        if self.normalize_input:
            X_, _, _ = zero_mean_unit_var_normalization(X_test, self.X_mean, self.X_std)
        else:
            X_ = X_test

        # Perform MC dropout
        model = self.model
        model.eval()
        T     = self.T
        # model.eval()
        # MC_samples : list T x N x 1
        # Yt_hat = np.array([model(torch.Tensor(X_)).data.numpy() for _ in range(T)])
        # start_mc=time.time()
        gpu_test = False
        if gpu_test:
            X_tensor = Variable(torch.FloatTensor(X_)).to(self.device)
            MC_samples = [model(X_tensor) for _ in range(T)]
            means = torch.stack([tup[0] for tup in MC_samples]).view(T, X_.shape[0]).cpu().data.numpy()
            # logvar = torch.stack([tup[1] for tup in MC_samples]).view(T, X_.shape[0]).cpu().data.numpy()
        else:
            model.cpu()
            MC_samples = [model(Variable(torch.FloatTensor(X_))) for _ in range(T)]
            means = torch.stack([tup[0] for tup in MC_samples]).view(T, X_.shape[0]).data.numpy()
            # logvar = torch.stack([tup[1] for tup in MC_samples]).view(T, X_.shape[0]).data.numpy()

        # mc_time = time.time() - start_mc
        # print(f'mc_time={mc_time}')
        # logvar = np.mean(logvar,0)
        # aleatoric_uncertainty = np.exp(logvar).mean(0)
        # epistemic_uncertainty = np.var(means, 0).mean(0)
        aleatoric_uncertainty = self.aleatoric_uncertainty
        MC_pred_mean = np.mean(means, 0)  # N x 1
        means_var  = np.var(means, 0)
        MC_pred_var = means_var + aleatoric_uncertainty
        # MC_pred_var = means_var + np.mean(np.exp(logvar), 0)
        m = MC_pred_mean.flatten()

        if MC_pred_var.shape[0] == 1:
            v = np.clip(MC_pred_var, np.finfo(MC_pred_var.dtype).eps, np.inf)
        else:
            v = np.clip(MC_pred_var, np.finfo(MC_pred_var.dtype).eps, np.inf)
            v[np.where((v < np.finfo(v.dtype).eps) & (v > -np.finfo(v.dtype).eps))] = 0

        if self.normalize_output:
            m = zero_mean_unit_var_denormalization(m, self.y_mean, self.y_std)
            v *= self.y_std ** 2

        m = m.flatten()
        v = v.flatten()

        return m, v
Ejemplo n.º 8
0
    def predict(self, X_test):
        r"""
        Returns the predictive mean and variance of the objective function at
        the given test points.

        Parameters
        ----------
        X_test: np.ndarray (N, D)
            N input test points

        Returns
        ----------
        np.array(N,)
            predictive mean
        np.array(N,)
            predictive variance

        """
        # Normalize inputs
        if self.normalize_input:
            X_, _, _ = zero_mean_unit_var_normalization(
                X_test, self.X_mean, self.X_std)
        else:
            X_ = X_test

        # Perform MC dropout
        model = self.model
        T = self.T
        model.eval()
        # MC_samples : list T x N x 1
        # Yt_hat = np.array([model(torch.Tensor(X_)).data.numpy() for _ in range(T)])
        MC_samples = [model(Variable(torch.FloatTensor(X_))) for _ in range(T)]
        means = torch.stack([tup[0] for tup in MC_samples
                             ]).view(T, X_.shape[0]).data.numpy()
        logvar = torch.stack([tup[1] for tup in MC_samples
                              ]).view(T, X_.shape[0]).data.numpy()
        # Yt_hat = Yt_hat * self.std_y_train + self.mean_y_train  # T x N TODO check with Adam
        aleatoric_uncertainty = np.exp(logvar).mean(0)
        epistemic_uncertainty = np.var(means, 0).mean(0)

        MC_pred_mean = np.mean(means, 0)  # N x 1

        Second_moment = np.mean(means**2, 0)  # N x 1
        MC_pred_var = Second_moment + epistemic_uncertainty - (MC_pred_mean**2)

        m = MC_pred_mean.flatten()

        if MC_pred_var.shape[0] == 1:
            v = np.clip(MC_pred_var, np.finfo(MC_pred_var.dtype).eps, np.inf)
        else:
            v = np.clip(MC_pred_var, np.finfo(MC_pred_var.dtype).eps, np.inf)
            v[np.where((v < np.finfo(v.dtype).eps)
                       & (v > -np.finfo(v.dtype).eps))] = 0

        if self.normalize_output:
            m = zero_mean_unit_var_denormalization(m, self.y_mean, self.y_std)
            v *= self.y_std**2

        m = m.flatten()
        v = v.flatten()

        return m, v