예제 #1
0
    def train_model(self, dataset, epoch, optimizer, visualize_errors=False):
        """
        Trains a given model for a given number of epochs on a given dataset.
        """
        # self.train()
        super().train()

        dataset_size = dataset.features.shape[0]
        # batch_size = dataset_size # train on entire data in one batch
        batch_size = self.hyperparams.batch_size # train using the specified batch size

        # transform dataset to dataloader
        train_loader = dataset_to_dataloader(dataset, batch_size=batch_size)
        n_train_samples = len(train_loader.dataset)
        n_batches = len(train_loader)

        mse = 0
        mae = 0
        total_loss = 0
        total_log_likelihood = 0
        total_kl_divergence = 0

        for batch_idx, (data, target) in enumerate(train_loader):

            data, target = data.to(self.device), target.to(self.device)
            self.zero_grad()
            loss, log_prior, log_variational_posterior, log_likelihood, outputs = self.sample_elbo(data, target, dataset_size)

            loss.backward()
            optimizer.step()

            # perform analytic update of remaining model parameters
            self.analytic_update()

            # Given all model outputs, compute the mean output of the ensemble
            mean_output = outputs.mean(dim=1)

            device_type = self.device.type
            mse, mae = update_mse_mae(mse, mae, device_type, mean_output, target)

            total_loss += loss
            total_log_likelihood += -log_likelihood
            total_kl_divergence += (log_variational_posterior - log_prior) * target.size()[0] / dataset_size

        rmse = np.sqrt(mse / dataset_size)
        mae /= dataset_size

        if visualize_errors:
            self.train_writer.add_scalar('loss__training loss', total_loss.item(), epoch)
            self.train_writer.add_scalar('loss__kl term' , total_kl_divergence.item(), epoch)
            self.train_writer.add_scalar('loss__log_likelihood term', total_log_likelihood.item(), epoch)
            if not self.hyperparams.classification:
                self.train_writer.add_scalar('errors__mae', mae, epoch)
                self.train_writer.add_scalar('errors__rmse', rmse, epoch)

        return loss, rmse, mae
예제 #2
0
    def predict(self, dataset, epoch=1, mean_y_train=0, std_y_train=1, visualize_errors=False):
        """
        Evaluates an ensemble of networks on a given test dataset.

        Because the Bayesian Neural Network has a distribution over the weights,
        we basically have an infinite number of different neural networks. We can
        take advantage of that by using an ensemble of networks during prediction.
        Each model in the ensemble performs a prediction. The different predictions
        are then averaged to give a final output.

        A model can be obtained by sampling weights from the distribution. Note: for
        each input batch, new models will be sampled.
        """
        n_samples_testing = self.hyperparams.n_samples_testing
        dataset_size = dataset.features.shape[0]
        test_batch_size = dataset_size

        # transform dataset to dataloader
        test_loader = dataset_to_dataloader(dataset, batch_size=test_batch_size, shuffle=False)
        n_test_samples = len(test_loader.dataset)
        n_test_batches = len(test_loader)

        super(GaussianBNN, self).eval()
        mse = 0
        rmse = 0
        mae = 0
        loglike = 0

        all_predicted_distributions = []
        means = []

        with torch.no_grad():
            for data, target in test_loader:
                data, target = data.to(self.device), target.to(self.device)

                # Each batch is forwarded through each model in the ensemble
                # and the model outputs are saved.
                ensemble_outputs = self.forward(data, sample=True, n_samples=n_samples_testing) * std_y_train + mean_y_train
                ensemble_outputs = ensemble_outputs.reshape(n_samples_testing, test_batch_size).t()

                # calculation of the predictive log likelihood of a batch, see notes from 18.12.18
                var_noise = np.exp(self.log_var_noise.detach().numpy()) * std_y_train ** 2

                if self.hyperparams.classification:
                    loglike_factor = - F.binary_cross_entropy_with_logits(ensemble_outputs, target.reshape(-1,1).repeat(1,n_samples_testing), reduction='none')
                    loglike = torch.sum(torch.logsumexp(loglike_factor - math.log(n_samples_testing), 1))
                    # Given all model outputs, compute the mean output of the ensemble
                    mean_output = ensemble_outputs.mean(1)
                else:
                    if self.device.type == 'cuda':
                        target = target.cpu().numpy()
                        ensemble_outputs = ensemble_outputs.cpu().numpy()
                    log_factor = -0.5 * np.log(2 * math.pi * var_noise) - (np.tile(target.reshape(-1, 1), (1, n_samples_testing)) - np.array(ensemble_outputs))**2 / (2* var_noise)
                    loglike += np.sum(logsumexp(log_factor - np.log(n_samples_testing), 1))
                    # Given all model outputs, compute the mean output of the ensemble
                    mean_output = ensemble_outputs.mean(1)
                    if self.device.type == 'cuda':
                        target = torch.from_numpy(target)
                        mean_output = torch.from_numpy(mean_output)

                if self.hyperparams.classification:
                    distributions = [BinarySampleDistribution(1 / (1 + np.exp(-e))) for e in ensemble_outputs.cpu().detach().numpy()]

                else:
                    mse += F.mse_loss(mean_output, target, reduction='sum')
                    mae += F.l1_loss(mean_output, target, reduction='sum')

                    distributions = [SampleDistribution(ensemble_outputs[i], var_noise)
                                        for i in range(test_batch_size)]

                all_predicted_distributions.extend(distributions)

        predicted_distr = PredictiveDistribution(all_predicted_distributions)

        loglike /= dataset_size
        rmse = np.sqrt(mse / dataset_size)
        mae /= dataset_size

        if self.hyperparams.classification:
            zero_one = AllMetrics.zero_one_loss.compute(target.cpu().detach().numpy(), predicted_distr)

        if visualize_errors:
            self.test_writer.add_scalar('errors__predictive log likelihood', loglike, epoch)
            if self.hyperparams.classification:
                self.test_writer.add_scalar('errors__zero_one', zero_one, epoch)
            else:
                self.test_writer.add_scalar('errors__mae', mae, epoch)
                self.test_writer.add_scalar('errors__rmse', rmse, epoch)


        return predicted_distr, rmse, mae, -loglike