Beispiel #1
0
    def _initialize_models(self, ds_train):
        """
        Function to initialize the feature extractor, GP, and the optimizer before training.
        """

        # Initialize Feature Extractor (Residual Net)
        self.feature_extractor = FCResNet(input_dim=self.input_dim,
                                          features=self.features,
                                          depth=self.depth,
                                          spectral_normalization=True,
                                          coeff=self.coeff,
                                          n_power_iterations=FC_N_POWER_ITERATIONS,
                                          dropout_rate=FC_DROPOUT_RATE,
                                          )
        initial_inducing_points, initial_lengthscale = initial_values_for_GP(
            ds_train, self.feature_extractor, self.n_inducing_points
        )

        # Initialize Gaussian Process
        gp = GP(
            num_outputs=self.num_outputs,
            initial_lengthscale=initial_lengthscale,
            initial_inducing_points=initial_inducing_points,
            kernel=self.kernel,
        )

        # Initialize the overall model Deep Kernel Learning  GP
        self.model = DKL_GP(self.feature_extractor, gp)

        # Classification task with two classes
        self.likelihood = SoftmaxLikelihood(num_classes=NUM_OUTPUTS, mixing_weights=False)
        self.loss_fn = VariationalELBO(self.likelihood, gp, num_data=len(ds_train))

        # Initialize models' optimizer
        parameters = [
            {"params": self.model.feature_extractor.parameters(), "lr": self.lr},
            {"params": self.model.gp.parameters(), "lr": self.lr},
            {"params": self.likelihood.parameters(), "lr": self.lr},
        ]

        self.optimizer = torch.optim.Adam(parameters, weight_decay=OPTIMIZER_WEIGHT_DECAY)
Beispiel #2
0
def main(hparams):
    results_dir = get_results_directory(hparams.output_dir)
    writer = SummaryWriter(log_dir=str(results_dir))

    ds = get_dataset(hparams.dataset, root=hparams.data_root)
    input_size, num_classes, train_dataset, test_dataset = ds

    hparams.seed = set_seed(hparams.seed)

    if hparams.n_inducing_points is None:
        hparams.n_inducing_points = num_classes

    print(f"Training with {hparams}")
    hparams.save(results_dir / "hparams.json")

    if hparams.ard:
        # Hardcoded to WRN output size
        ard = 640
    else:
        ard = None

    feature_extractor = WideResNet(
        spectral_normalization=hparams.spectral_normalization,
        dropout_rate=hparams.dropout_rate,
        coeff=hparams.coeff,
        n_power_iterations=hparams.n_power_iterations,
        batchnorm_momentum=hparams.batchnorm_momentum,
    )

    initial_inducing_points, initial_lengthscale = initial_values_for_GP(
        train_dataset, feature_extractor, hparams.n_inducing_points
    )

    gp = GP(
        num_outputs=num_classes,
        initial_lengthscale=initial_lengthscale,
        initial_inducing_points=initial_inducing_points,
        separate_inducing_points=hparams.separate_inducing_points,
        kernel=hparams.kernel,
        ard=ard,
        lengthscale_prior=hparams.lengthscale_prior,
    )

    model = DKL_GP(feature_extractor, gp)
    model = model.cuda()

    likelihood = SoftmaxLikelihood(num_classes=num_classes, mixing_weights=False)
    likelihood = likelihood.cuda()

    elbo_fn = VariationalELBO(likelihood, gp, num_data=len(train_dataset))

    parameters = [
        {"params": feature_extractor.parameters(), "lr": hparams.learning_rate},
        {"params": gp.parameters(), "lr": hparams.learning_rate},
        {"params": likelihood.parameters(), "lr": hparams.learning_rate},
    ]

    optimizer = torch.optim.SGD(
        parameters, momentum=0.9, weight_decay=hparams.weight_decay
    )

    milestones = [60, 120, 160]

    scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimizer, milestones=milestones, gamma=0.2
    )

    def step(engine, batch):
        model.train()
        likelihood.train()

        optimizer.zero_grad()

        x, y = batch
        x, y = x.cuda(), y.cuda()

        y_pred = model(x)
        elbo = -elbo_fn(y_pred, y)

        elbo.backward()
        optimizer.step()

        return elbo.item()

    def eval_step(engine, batch):
        model.eval()
        likelihood.eval()

        x, y = batch
        x, y = x.cuda(), y.cuda()

        with torch.no_grad():
            y_pred = model(x)

        return y_pred, y

    trainer = Engine(step)
    evaluator = Engine(eval_step)

    metric = Average()
    metric.attach(trainer, "elbo")

    def output_transform(output):
        y_pred, y = output

        # Sample softmax values independently for classification at test time
        y_pred = y_pred.to_data_independent_dist()

        # The mean here is over likelihood samples
        y_pred = likelihood(y_pred).probs.mean(0)

        return y_pred, y

    metric = Accuracy(output_transform=output_transform)
    metric.attach(evaluator, "accuracy")

    metric = Loss(lambda y_pred, y: -elbo_fn(y_pred, y))
    metric.attach(evaluator, "elbo")

    kwargs = {"num_workers": 4, "pin_memory": True}

    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=hparams.batch_size,
        shuffle=True,
        drop_last=True,
        **kwargs,
    )

    test_loader = torch.utils.data.DataLoader(
        test_dataset, batch_size=512, shuffle=False, **kwargs
    )

    @trainer.on(Events.EPOCH_COMPLETED)
    def log_results(trainer):
        metrics = trainer.state.metrics
        elbo = metrics["elbo"]

        print(f"Train - Epoch: {trainer.state.epoch} ELBO: {elbo:.2f} ")
        writer.add_scalar("Likelihood/train", elbo, trainer.state.epoch)

        if hparams.spectral_normalization:
            for name, layer in model.feature_extractor.named_modules():
                if isinstance(layer, torch.nn.Conv2d):
                    writer.add_scalar(
                        f"sigma/{name}", layer.weight_sigma, trainer.state.epoch
                    )

        if not hparams.ard:
            # Otherwise it's too much to submit to tensorboard
            length_scales = model.gp.covar_module.base_kernel.lengthscale.squeeze()
            for i in range(length_scales.shape[0]):
                writer.add_scalar(
                    f"length_scale/{i}", length_scales[i], trainer.state.epoch
                )

        if trainer.state.epoch > 150 and trainer.state.epoch % 5 == 0:
            _, auroc, aupr = get_ood_metrics(
                hparams.dataset, "SVHN", model, likelihood, hparams.data_root
            )
            print(f"OoD Metrics - AUROC: {auroc}, AUPR: {aupr}")
            writer.add_scalar("OoD/auroc", auroc, trainer.state.epoch)
            writer.add_scalar("OoD/auprc", aupr, trainer.state.epoch)

        evaluator.run(test_loader)
        metrics = evaluator.state.metrics
        acc = metrics["accuracy"]
        elbo = metrics["elbo"]

        print(
            f"Test - Epoch: {trainer.state.epoch} "
            f"Acc: {acc:.4f} "
            f"ELBO: {elbo:.2f} "
        )

        writer.add_scalar("Likelihood/test", elbo, trainer.state.epoch)
        writer.add_scalar("Accuracy/test", acc, trainer.state.epoch)

        scheduler.step()

    pbar = ProgressBar(dynamic_ncols=True)
    pbar.attach(trainer)

    trainer.run(train_loader, max_epochs=200)

    # Done training - time to evaluate
    results = {}

    evaluator.run(train_loader)
    train_acc = evaluator.state.metrics["accuracy"]
    train_elbo = evaluator.state.metrics["elbo"]
    results["train_accuracy"] = train_acc
    results["train_elbo"] = train_elbo

    evaluator.run(test_loader)
    test_acc = evaluator.state.metrics["accuracy"]
    test_elbo = evaluator.state.metrics["elbo"]
    results["test_accuracy"] = test_acc
    results["test_elbo"] = test_elbo

    _, auroc, aupr = get_ood_metrics(
        hparams.dataset, "SVHN", model, likelihood, hparams.data_root
    )
    results["auroc_ood_svhn"] = auroc
    results["aupr_ood_svhn"] = aupr

    print(f"Test - Accuracy {results['test_accuracy']:.4f}")

    results_json = json.dumps(results, indent=4, sort_keys=True)
    (results_dir / "results.json").write_text(results_json)

    torch.save(model.state_dict(), results_dir / "model.pt")
    torch.save(likelihood.state_dict(), results_dir / "likelihood.pt")

    writer.close()
Beispiel #3
0
    def forward(self, x):
        features = self.feature_extractor(x)
        features = scale_to_bounds(features, lower_bound=self.grid_bounds[0], upper_bound=self.grid_bounds[1])
        features = features.transpose(-1, -2).unsqueeze(-1)
        result = self.gp_layer(features)
        return result


train_loader, test_loader, num_classes = get_vision_data()

feature_extractor = DenseNetFeatureExtractor(block_config=(6, 6, 6), num_classes=num_classes)
num_features = feature_extractor.classifier.in_features

model = DeepKernelLearningModel(feature_extractor, num_dim=num_features)
likelihood = SoftmaxLikelihood(num_features=model.num_dim, num_classes=num_classes)

if torch.cuda.is_available():
    model = model.cuda()
    likelihood = likelihood.cuda()

n_epochs = 5
lr = 0.1

optimizer = SGD([{'params': model.feature_extractor.parameters(), 'weight_decay': 1e-4},
                 {'params': model.gp_layer.hyperparameters(), 'lr': lr * 0.01},
                 {'params': model.variational_parameters()},
                 {'params': likelihood.parameters()}],
                lr=lr, momentum=0.9, nesterov=True, weight_decay=0)

scheduler = MultiStepLR(optimizer, milestones=[0.5 * n_epochs, 0.75 * n_epochs], gamma=0.1)
 def create_likelihood(self):
     return SoftmaxLikelihood(num_features=6,
                              num_classes=6,
                              mixing_weights=False)
 def create_likelihood(self):
     return SoftmaxLikelihood(num_features=6, num_classes=4)
Beispiel #6
0
def main():
    parser = argparse.ArgumentParser(
        description='Deep Kernel Learning with synthetic data.')
    parser.add_argument('--datapath', type=str, help='Path to data directory.')
    parser.add_argument('--batchsize',
                        type=int,
                        default=10,
                        help='Batch size.')
    parser.add_argument('--n_epochs',
                        type=int,
                        default=10,
                        help='Number of epochs.')
    parser.add_argument('--lr',
                        type=float,
                        default=0.1,
                        help='Path to data directory.')
    parser.add_argument('--seed', type=int, default=42, help='Random seed.')
    parser.add_argument('--no-cuda',
                        action='store_true',
                        default=False,
                        help='disables CUDA training')
    args = parser.parse_args()

    torch.manual_seed(args.seed)
    use_cuda = not args.no_cuda and torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    traindata = Synthetic(args.datapath, 'train', download=True)
    train_loader = DataLoader(traindata, batch_size=args.batchsize)
    num_classes = len(np.unique(traindata.targets))

    testdata = Synthetic(args.datapath, 'test')
    test_loader = DataLoader(testdata, batch_size=args.batchsize)

    feature_extractor = ConvFeatureExtractor().to(device)
    num_features = feature_extractor._filter_sum

    model = DKLModel(feature_extractor, num_dim=5).to(device)
    likelihood = SoftmaxLikelihood(num_features=model.num_dim,
                                   n_classes=num_classes).to(device)

    optimizer = SGD([
        {
            'params': model.feature_extractor.parameters()
        },
        {
            'params': model.gp_layer.hyperparameters(),
            'lr': args.lr * 0.01
        },
        {
            'params': model.gp_layer.variational_parameters()
        },
        {
            'params': likelihood.parameters()
        },
    ],
                    lr=args.lr,
                    momentum=0.9,
                    nesterov=True,
                    weight_decay=0)

    scheduler = MultiStepLR(
        optimizer,
        milestones=[0.5 * args.n_epochs, 0.75 * args.n_epochs],
        gamma=0.1)

    for epoch in range(1, args.n_epochs + 1):
        scheduler.step()
        with settings.use_toeplitz(False), settings.max_preconditioner_size(0):
            train(epoch, train_loader, optimizer, likelihood, model, device)
            test(test_loader, likelihood, model, device)

        state_dict = model.state_dict()
        likelihood_state_dict = likelihood.state_dict()
        torch.save({
            'model': state_dict,
            'likelihood': likelihood_state_dict
        }, 'dkl_synthetic_checkpoint.dat')
Beispiel #7
0
class DUE:
    """
    Deterministic Uncertainty Estimator. As implemented in https://github.com/y0ast/DUE. See the original paper
    by Amersfoort et al. 2021 https://arxiv.org/abs/2102.11409/

    DUE consists of two parts - distance-preserving Feature Extractor (ResNet) and Gaussian Process.
    Here, DUE is implemeted for binary classification.
    """

    def __init__(self,
                 n_inducing_points: int = 20,
                 kernel: str = "Matern12",
                 coeff: [float, int] = 3,
                 features: int = 128,
                 depth: int = 4,
                 lr: float = 1e-3,
                 ):
        """
        Parameters
        ----------
        n_inducing_points: int
            Number of points used to calculate the covariance matrix. Inducing points in the feature space are
            learnable by maximizing ELBO. Reduces matrix inversion computation expenses.
        kernel: str
            Defines the kernel of the last layer Gaussian Process.
            Options: "RFB", "Matern12", "Matern32", "Matern52", "RQ"
        lr: float
            Learning rate.
        coeff: float
            Lipschitz factor for the distance-preserving feature extractor.
        features: int
            Number of features (units) in the feature extractor.
        depth: int
            Number of layers in the feature extractor.
        """

        self.num_outputs = NUM_OUTPUTS
        self.kernel = kernel
        self.input_dim = None
        self.n_inducing_points = n_inducing_points
        self.lr = lr
        self.coeff = coeff
        self.features = features
        self.depth = depth

    def fit(self,
              X_train: np.ndarray,
              y_train: np.ndarray,
              X_val: Optional[np.ndarray] = None,
              y_val: Optional[np.ndarray] = None,
              n_epochs: int = 5,
              batch_size: int = 64,
              early_stopping: bool = True,
              ):
        """
        DUE is initilalized during training since it uses the training data to compute initial inducing points.

        Parameters
        ----------
        X_train: np.ndarray
            The training data.
        y_train: np.ndarray
            The labels corresponding to the training data.
        X_val: Optional[np.ndarray]
            The validation data.
        y_val: Optional[np.ndarray]
            The labels corresponding to the validation data.
        batch_size: int
            The batch size, default 256
        n_epochs: int
            The number of training epochs, default 30
        early_stopping: bool
            Whether to perform early stopping, default True
        """

        ds_train = torch.utils.data.TensorDataset(torch.from_numpy(X_train).float(), torch.from_numpy(y_train).float())
        dl_train = torch.utils.data.DataLoader(ds_train, batch_size=batch_size, shuffle=True, drop_last=True)

        prev_val_loss = float("inf")
        n_no_improvement = 0
        if X_val is not None and y_val is not None:
            X_val = torch.tensor(X_val).float()
            y_val = torch.tensor(y_val).float().view(-1, 1)

        self.input_dim = X_train.shape[1]

        # Initialize the GP and Feature Extractor. Training data is used for initial inducing points for GP.
        self._initialize_models(ds_train)

        self.model.train()
        self.likelihood.train()

        for epoch in tqdm(range(n_epochs)):
            losses = []
            for batch in dl_train:
                self.optimizer.zero_grad()
                x, y = batch
                y_pred = self.model(x)
                loss = - self.loss_fn(y_pred, y)
                loss.backward()
                self.optimizer.step()

                losses.append(loss.item())

            if X_val is not None and y_val is not None:
                y_pred = self.model(X_val)
                val_loss = - self.loss_fn(y_pred, y_val)

                print(f"Epoch: {epoch}. Train loss: {np.round(np.mean(losses), 3)}. "
                      f"Validation loss: {np.round(val_loss)}.")

                if early_stopping:
                    if val_loss >= prev_val_loss:
                        n_no_improvement += 1
                    else:
                        n_no_improvement = 0
                        prev_val_loss = val_loss

                    if n_no_improvement >= EARLY_STOPPING_LIMIT:
                        print("Early stopping after", epoch, "epochs.")
                        break
            else:
                print(f"Epoch: {epoch}. Train loss: {np.round(np.mean(losses), 3)}.")

    def _get_likelihood(self,
                        X: np.ndarray):
        """
        Loop over samples in the array to compute the conditional distribution 𝑝(𝐲∣𝐟,…) that defines the likelihood.
        See https://docs.gpytorch.ai/en/v1.1.1/likelihoods.html Returns a list of distributions computed from
        the SoftMax likelihood forward pass on the model's outputs.
        """

        self.model.eval()
        self.likelihood.eval()

        ds = torch.utils.data.TensorDataset(torch.from_numpy(X).float())
        dl = torch.utils.data.DataLoader(ds, batch_size=512, shuffle=False, drop_last=False)

        # Loop over samples and compute likelihood of the model predictions
        with torch.no_grad(), gpytorch.settings.num_likelihood_samples(64):
            likelihood = [self.likelihood(
                self.model(data[0]).to_data_independent_dist()
            )
                for data in dl]

        return likelihood

    def predict(self,
                X: np.ndarray):
        """
        Returns probabilities for each class.

        Parameters
        ----------
        X: np.ndarray
        Returns
        -------
        proba: np.ndarray
            Probabilities for each class.
        """

        likelihood = self._get_likelihood(X)
        proba = [ol.probs.mean(0).detach().numpy()
                 for ol in likelihood]

        return np.concatenate(proba)

    def get_entropy(self,
                    X: np.ndarray):
        """
        Returns entropy of predictions.

        Parameters
        ----------
        X: np.ndarray

        Returns
        -------
        entropy: np.ndarray
            Entropy of predictions.
        """
        likelihood = self._get_likelihood(X)
        entropy = [(-(ol.probs.mean(0) * ol.probs.mean(0).log()).sum(1)).detach().numpy()
                   for ol in likelihood]
        return np.concatenate(entropy)

    def get_std(self,
                X: np.ndarray):
        """
        Returns standard deviation of predictions for the class 1.

        Parameters
        ----------
        X: np.ndarray

        Returns
        -------
        std: np.ndarray
            Standard deviation of predictions for class 1.
        """

        likelihood = self._get_likelihood(X)
        std = [ol.probs.std(0).detach().numpy()
               for ol in likelihood]

        return np.concatenate(std)[:, 1]

    def _initialize_models(self, ds_train):
        """
        Function to initialize the feature extractor, GP, and the optimizer before training.
        """

        # Initialize Feature Extractor (Residual Net)
        self.feature_extractor = FCResNet(input_dim=self.input_dim,
                                          features=self.features,
                                          depth=self.depth,
                                          spectral_normalization=True,
                                          coeff=self.coeff,
                                          n_power_iterations=FC_N_POWER_ITERATIONS,
                                          dropout_rate=FC_DROPOUT_RATE,
                                          )
        initial_inducing_points, initial_lengthscale = initial_values_for_GP(
            ds_train, self.feature_extractor, self.n_inducing_points
        )

        # Initialize Gaussian Process
        gp = GP(
            num_outputs=self.num_outputs,
            initial_lengthscale=initial_lengthscale,
            initial_inducing_points=initial_inducing_points,
            kernel=self.kernel,
        )

        # Initialize the overall model Deep Kernel Learning  GP
        self.model = DKL_GP(self.feature_extractor, gp)

        # Classification task with two classes
        self.likelihood = SoftmaxLikelihood(num_classes=NUM_OUTPUTS, mixing_weights=False)
        self.loss_fn = VariationalELBO(self.likelihood, gp, num_data=len(ds_train))

        # Initialize models' optimizer
        parameters = [
            {"params": self.model.feature_extractor.parameters(), "lr": self.lr},
            {"params": self.model.gp.parameters(), "lr": self.lr},
            {"params": self.likelihood.parameters(), "lr": self.lr},
        ]

        self.optimizer = torch.optim.Adam(parameters, weight_decay=OPTIMIZER_WEIGHT_DECAY)
def load_train(trainloader, testloader):
    
    global net, likelihood, optimizer, depth, loss_mixing_ratio, gp_kernel_feature
    global current_epoch, lr_init, train_epoch, print_init_model_state, save_freq, ece_avg_list
    global acc, running_loss, last_epoch, last_lr, end_epoch, optim_SGD, ngpu, gp_weight_decay
    
    running_loss = 0.0
    
    if '+GP' not in model_type:
        net = BayesFCNet(device=device, num_classes=num_classes, depth=depth,
                        rendFeature_rank_reduction=rendFeature_rank_reduction, 
                        loss_mixing_ratio=loss_mixing_ratio, net_type=model_type,
                        fc_setup=fc_setup, trainloader=trainloader, feature_size=gp_kernel_feature)
        net.to(device)
        if optim_SGD:
            optimizer = optim.SGD(net.parameters(), lr=lr_init, weight_decay=weight_decay, momentum=momentum)
        else:
            optimizer = optim.Adam(net.parameters(), lr=lr_init, weight_decay=weight_decay)
        _ = net.train()
        likelihood = None
    else:
        net = GPNet(device=device, kernel_net_type=model_type, gp_feature_size=gp_kernel_feature, 
                    num_classes=num_classes, depth=depth, grid_size=grid_size)
        net.to(device)
        likelihood = SoftmaxLikelihood(gp_kernel_feature, num_classes)
        likelihood.to(device)
        if optim.SGD:
            optimizer = optim.SGD([
            {'params': net.feature_extractor.parameters(), 'weight_decay': weight_decay},
            {'params': net.gp_layer.hyperparameters(), 'lr': lr_init * 0.01, 'weight_decay': gp_weight_decay},
            {'params': net.gp_layer.variational_parameters(), 'weight_decay': gp_weight_decay},
            {'params': likelihood.parameters()},
                            ], lr=lr_init, momentum=momentum, nesterov=True)  # , weight_decay=weight_decay)
        else:
            optimizer = optim.Adam([
            {'params': net.feature_extractor.parameters(), 'weight_decay': weight_decay},
            {'params': net.gp_layer.hyperparameters(), 'lr': lr_init * 0.01, 'weight_decay': gp_weight_decay},
            {'params': net.gp_layer.variational_parameters(), 'weight_decay': gp_weight_decay},
            {'params': likelihood.parameters(), 'weight_decay': gp_weight_decay},
                            ], lr=lr_init)  # , weight_decay=weight_decay)
        _ = net.train()
        likelihood.train()
        mll = gpytorch.mlls.VariationalELBO(likelihood, net.gp_layer, num_data=len(trainloader.dataset))

    pytorch_total_params = sum(p.numel() for p in net.parameters())
    print("total number of parameters is", pytorch_total_params)

    # load model from disk
    if load_model:
        if os.path.exists(SAVED_MODEL_PATH + saved_checkpoint_name + '.chkpt'):
            checkpoint = torch.load(SAVED_MODEL_PATH + saved_checkpoint_name + '.chkpt', map_location=device)
        elif os.path.exists(SAVED_MODEL_PATH + saved_checkpoint_name + '.interim'):
            checkpoint = torch.load(SAVED_MODEL_PATH + saved_checkpoint_name + '.interim', map_location=device)
        else:
            print("Neither checkpoint nor iterim file found! check file name")
            sys.exit(-1)

        print("Model state loaded")
        if 'command_dict' in checkpoint:
            print('Command directory was as follows\n',
                  checkpoint['command_dict'].__repr__().replace(', ', ',\n'))

        current_state = net.state_dict()
        state_dict_to_load = checkpoint['model_state']
        current_state.update(state_dict_to_load)
        net.load_state_dict(current_state)

        if 'randFeature' in net.net_type:
            net.rand_W = checkpoint['rand_W'].to(device)
            net.rand_B = checkpoint['rand_B'].to(device)

        if '+GP' in net.net_type:
            likelihood.load_state_dict(checkpoint['likelihood_state'])
            print("Likelihood state loaded")

        print("Model is loaded! Loss = %.3f and accuracy = %.3f %%" %(checkpoint['loss'], checkpoint['acc'] \
                                                      if checkpoint['acc'] > 1 else 100*checkpoint['acc']))
        optimizer.load_state_dict(checkpoint['optim_state'])
        print("Optimizer is loaded!")
        current_epoch = checkpoint['epoch']
        lr_init = checkpoint['last_lr']
        print("Current lr is: %.3f and target lr is: %.3f" %(lr_init, lr_final))

    elif len(component_pretrained_mods) > 0:

        current_state = net.state_dict()
        state_dict_to_load = dict()

        for pretrained_mod in component_pretrained_mods:
            if os.path.exists(SAVED_MODEL_PATH + pretrained_mod + '.chkpt'):
                checkpoint = torch.load(SAVED_MODEL_PATH + pretrained_mod + '.chkpt', map_location=device)
            elif os.path.exists(SAVED_MODEL_PATH + pretrained_mod + '.interim'):
                checkpoint = torch.load(SAVED_MODEL_PATH + pretrained_mod + '.interim', map_location=device)
            else:
                print("Neither checkpoint nor iterim file found! check file name")
                sys.exit(-1)
            checkpoint_dict = checkpoint['model_state']
            for k, v in checkpoint_dict.items():
                if k in current_state:
                    state_dict_to_load[k] = v
                elif 'feature_extractor.' + k in current_state:
                    state_dict_to_load['feature_extractor.' + k] = v
            
            if '+GP' in net.net_type and 'likelihood_state' in checkpoint:
                likelihood.load_state_dict(checkpoint['likelihood_state'])
                print("Likelihood state loaded")

        current_state.update(state_dict_to_load)
        print("Partial loading will update only %d parameters out of %d parameters" % (len(state_dict_to_load),
                                                                                           len(current_state)))
        net.load_state_dict(current_state)
                 
    _ = net.train()

    # derived params
    epoch_count = current_epoch
    epoch_count += train_epoch * int(train_model)
    swa_start = epoch_count
    epoch_count += swa_epoch * int(perform_swa)
    end_epoch = swa_start if 'SWA' in model_type else epoch_count
    current_state = dict([(name, copy.deepcopy(param.data)) for name,param in net.named_parameters() if 'feature_extractor' in name])

    if print_init_model_state:
        for name, param in net.named_parameters():
            print(name, param.size(), torch.max(param.data), torch.min(param.data))

    # perform train/swa update
    # with gpytorch.settings.use_toeplitz(False), gpytorch.settings.max_preconditioner_size(0):
    for epoch in range(current_epoch, epoch_count):  # loop over the dataset multiple times

        if optim_SGD:
            factor = learning_rate_mod_factor(epoch, running_loss)
            for i, g in enumerate(optimizer.param_groups):
                print("Learning rate for param %d is currently %.4f" %(i, g['lr']))
                push_output("Learning rate for param %d is currently %.4f\n" %(i, g['lr']))
                g['lr'] = lr_init * factor
    #                 if i == 1 and '+GP' in model_type:
    #                     g['lr'] = lr_init * factor * 0.01
                print("Learning rate for param %d has been changed to %.4f" %(i, g['lr']))
                push_output("Learning rate for param %d has been changed to %.4f\n" %(i, g['lr']))

        for i, data in enumerate(trainloader, 0):

            inputs, labels = data
            inputs = inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            if '+GP' not in model_type:
                loss = net.forward(inputs, labels, random_sample_train)
            else:
                output = net(inputs)
                loss = -mll(output, labels)

            loss = loss
            loss.sum().backward()
            # net.modify_grad()
            optimizer.step()
            running_loss = 0.9*running_loss + 0.1*loss.item() if running_loss != 0 else loss.item()
             
            if i% (len(trainloader) // 4) == 0:
                print('[%d, %5d] loss: %.3f' %(epoch + 1, i + 1, running_loss))
                push_output('[%d, %5d] loss: %.3f\n' %(epoch + 1, i + 1, running_loss))

        last_lr = lr_init * factor if optim_SGD else lr_init
        last_epoch = epoch
        print("=== Accuracy using SGD params ===")
        push_output("=== Accuracy using SGD params ===\n")
        validate("test", testloader, accuracy_only=True)
        push_output('Overall accuracy : %2d %%\n' % (acc))
        if epoch % save_freq == 0 and epoch != 0:
            save_model(None, True)
        if stop_predef_acc:
            if (acc >= predef_test_acc) and epoch >= current_epoch + 0.7*(epoch_count - current_epoch): # or ece_avg_list[-1] <= 0.011
                print("Stopped because accuracy reached")
                push_output("Stopped because accuracy reached\n")
                break

    print('Model is ready')
Beispiel #9
0
    def fit(self,
            epochs=75,
            train_loader=None,
            save_path=None,
            val_loader=None):
        initial_inducing_points, initial_lengthscale = initial_values_for_GP(
            train_loader.dataset, self.feature_extractor,
            self.n_inducing_points)

        self.gp = GP(
            num_outputs=self.num_classes,
            initial_lengthscale=initial_lengthscale,
            initial_inducing_points=initial_inducing_points,
            separate_inducing_points=self.separate_inducing_points,
            kernel=self.kernel,
            ard=self.ard,
            lengthscale_prior=self.lengthscale_prior,
        )

        self.model = DKL_GP(self.feature_extractor, self.gp)
        self.model.to(self.device)

        self.likelihood = SoftmaxLikelihood(num_classes=10,
                                            mixing_weights=False)
        self.likelihood = self.likelihood.to(self.device)

        self.elbo_fn = VariationalELBO(self.likelihood,
                                       self.gp,
                                       num_data=len(train_loader.dataset))

        parameters = [
            {
                "params": self.feature_extractor.parameters(),
                "lr": self.learning_rate
            },
            {
                "params": self.gp.parameters(),
                "lr": self.learning_rate
            },
            {
                "params": self.likelihood.parameters(),
                "lr": self.learning_rate
            },
        ]

        self.optimizer = torch.optim.SGD(parameters,
                                         momentum=0.9,
                                         weight_decay=self.weight_decay)

        self.scheduler = torch.optim.lr_scheduler.MultiStepLR(
            self.optimizer, milestones=[25, 50, 75], gamma=0.2)

        self.model.train()
        for epoch in tqdm(range(epochs)):
            running_loss = 0
            for i, (x, y) in enumerate(train_loader):
                self.model.train()

                self.optimizer.zero_grad()

                x, y = x.to(self.device), y.to(self.device)

                y_pred = self.model(x)
                elbo = -self.elbo_fn(y_pred, y)
                running_loss += elbo.item()
                elbo.backward()
                self.optimizer.step()

                if i % 50 == 0:
                    print("Iteration: {}, Loss = {}".format(
                        i, running_loss / (i + 1)))

            if epoch % 1 == 0 and val_loader is not None:
                self.model.eval()
                test_loss = 0
                correct = 0
                total = 0
                with torch.no_grad():
                    for batch_idx, (inputs, targets) in enumerate(val_loader):
                        inputs, y = inputs.to(self.device), F.one_hot(
                            targets, self.num_classes).float().to(self.device)
                        y_pred = self.model(data).to_data_independent_dist()
                        output = self.likelihood(y_pred).probs.mean(0)
                        predicted = torch.argmax(output, dim=1)
                        loss = -self.likelihood.expected_log_prob(
                            y, y_pred).mean()
                        test_loss += loss.item()
                        targets = targets.to(self.device)
                        total += targets.size(0)
                        correct += predicted.eq(targets.to(
                            self.device)).sum().item()
                acc = 100. * correct / total
                print("Epoch: {}, test acc: {}, test loss {}".format(
                    epoch, acc, test_loss / total))

            self.scheduler.step()

        if save_path is not None:
            self.save(save_path)
Beispiel #10
0
class DUEVarianceSource:
    def __init__(self, input_size, num_classes, spectral_normalization,
                 n_power_iterations, batchnorm_momentum, n_inducing_points,
                 learning_rate, weight_decay, ard, kernel,
                 separate_inducing_points, lengthscale_prior, coeff, device):
        self.device = device
        self.num_classes = num_classes
        self.n_inducing_points = n_inducing_points
        self.learning_rate = learning_rate
        self.weight_decay = weight_decay
        self.ard = ard
        self.kernel = kernel
        self.separate_inducing_points = separate_inducing_points
        self.lengthscale_prior = lengthscale_prior
        self.coeff = coeff

        self.feature_extractor = ResNet18Spec(input_size,
                                              spectral_normalization,
                                              n_power_iterations,
                                              batchnorm_momentum)

        self.postprocessor = MinMaxScaler()

    def save(self, path):
        torch.save(self.model.state_dict(), path + "model.pt")
        torch.save(self.likelihood.state_dict(), path + "likelihood.pt")

    def load(self, path):
        self.model.load_state_dict(torch.load(path + "model.pt"))
        self.likelihood.load_state_dict(torch.load(path + "likelihood.pt"))
        self.model.to(self.device)
        self.likelihood = self.likelihood.to(self.device)

    def fit(self,
            epochs=75,
            train_loader=None,
            save_path=None,
            val_loader=None):
        initial_inducing_points, initial_lengthscale = initial_values_for_GP(
            train_loader.dataset, self.feature_extractor,
            self.n_inducing_points)

        self.gp = GP(
            num_outputs=self.num_classes,
            initial_lengthscale=initial_lengthscale,
            initial_inducing_points=initial_inducing_points,
            separate_inducing_points=self.separate_inducing_points,
            kernel=self.kernel,
            ard=self.ard,
            lengthscale_prior=self.lengthscale_prior,
        )

        self.model = DKL_GP(self.feature_extractor, self.gp)
        self.model.to(self.device)

        self.likelihood = SoftmaxLikelihood(num_classes=10,
                                            mixing_weights=False)
        self.likelihood = self.likelihood.to(self.device)

        self.elbo_fn = VariationalELBO(self.likelihood,
                                       self.gp,
                                       num_data=len(train_loader.dataset))

        parameters = [
            {
                "params": self.feature_extractor.parameters(),
                "lr": self.learning_rate
            },
            {
                "params": self.gp.parameters(),
                "lr": self.learning_rate
            },
            {
                "params": self.likelihood.parameters(),
                "lr": self.learning_rate
            },
        ]

        self.optimizer = torch.optim.SGD(parameters,
                                         momentum=0.9,
                                         weight_decay=self.weight_decay)

        self.scheduler = torch.optim.lr_scheduler.MultiStepLR(
            self.optimizer, milestones=[25, 50, 75], gamma=0.2)

        self.model.train()
        for epoch in tqdm(range(epochs)):
            running_loss = 0
            for i, (x, y) in enumerate(train_loader):
                self.model.train()

                self.optimizer.zero_grad()

                x, y = x.to(self.device), y.to(self.device)

                y_pred = self.model(x)
                elbo = -self.elbo_fn(y_pred, y)
                running_loss += elbo.item()
                elbo.backward()
                self.optimizer.step()

                if i % 50 == 0:
                    print("Iteration: {}, Loss = {}".format(
                        i, running_loss / (i + 1)))

            if epoch % 1 == 0 and val_loader is not None:
                self.model.eval()
                test_loss = 0
                correct = 0
                total = 0
                with torch.no_grad():
                    for batch_idx, (inputs, targets) in enumerate(val_loader):
                        inputs, y = inputs.to(self.device), F.one_hot(
                            targets, self.num_classes).float().to(self.device)
                        y_pred = self.model(data).to_data_independent_dist()
                        output = self.likelihood(y_pred).probs.mean(0)
                        predicted = torch.argmax(output, dim=1)
                        loss = -self.likelihood.expected_log_prob(
                            y, y_pred).mean()
                        test_loss += loss.item()
                        targets = targets.to(self.device)
                        total += targets.size(0)
                        correct += predicted.eq(targets.to(
                            self.device)).sum().item()
                acc = 100. * correct / total
                print("Epoch: {}, test acc: {}, test loss {}".format(
                    epoch, acc, test_loss / total))

            self.scheduler.step()

        if save_path is not None:
            self.save(save_path)

    def score_samples(self, data=None, loader=None, no_preprocess=False):
        self.model.eval()

        with torch.no_grad():
            scores = []

            if loader is None:
                data = data.to(self.device)
                y_pred = self.model(data).to_data_independent_dist()
                output = self.likelihood(y_pred).probs.mean(0)

                scores.append(-(output * output.log()).sum(1).cpu())

            else:
                for data, target in loader:
                    data = data.to(self.device)
                    # target = target.cuda()
                    y_pred = self.model(data).to_data_independent_dist()
                    output = self.likelihood(y_pred).probs.mean(0)

                    output = self.likelihood(y_pred).probs.mean(0)

                    scores.append(-(output * output.log()).sum(1).cpu())

        scores = torch.cat(scores, dim=0)
        if no_preprocess:
            values = scores.numpy().ravel()
        else:
            values = self.postprocessor.transform(
                scores.unsqueeze(-1)).squeeze()
        values = torch.FloatTensor(values).unsqueeze(-1)
        return values
Beispiel #11
0
from gpytorch.likelihoods import GaussianLikelihood, BernoulliLikelihood, SoftmaxLikelihood
import torch
import numpy as np
from gpytorch.settings import num_likelihood_samples
from torch.distributions import MultivariateNormal

#likelihood = GaussianLikelihood()
#likelihood = BernoulliLikelihood()
likelihood = SoftmaxLikelihood(num_classes=5, num_features=2)

observations = torch.from_numpy(np.array([1.0, 1.0])).type(torch.float32)
mean = torch.from_numpy(np.array([[1.0], [2.0]])).type(torch.float32)
covar = torch.from_numpy(np.array([[1.0, 0.0], [0.0,
                                                1.0]])).type(torch.float32)
multivariate_normal = MultivariateNormal(mean, covar)
with num_likelihood_samples(8000):
    explog = likelihood.expected_log_prob(observations, multivariate_normal)

print(explog)