def update_hyp(self, hyperparameters, **subspace_kwargs): self.weight_mean = torch.zeros(self.num_parameters) self.weight_variance = None self.sq_mean = torch.zeros(self.num_parameters) self.num_models_collected = torch.zeros(1, dtype=torch.long) self.burnt_in = False self.epochs_run = 0 self.hyperparameters = hyperparameters self.burn_in_epochs = self.hyperparameters['burn_in_epochs'] self.num_iterates = self.hyperparameters['num_iterates'] self.num_samples = self.hyperparameters['num_samples'] self.momentum = self.hyperparameters['momentum'] self.lr_init = self.hyperparameters['lr_init'] self.swag_lr = self.hyperparameters['swag_lr'] self.swag_wd = self.hyperparameters['swag_wd'] self.model = reset_model(self.model) self.swag_model = reset_model(self.swag_model) self.optimizer = SGD(params=self.model.parameters(), lr=self.lr_init, momentum=self.momentum, weight_decay=self.swag_wd) if 'subspace_type' not in hyperparameters.keys(): self.subspace_type = 'pca' else: self.subspace_type = hyperparameters['subspace_type'] if subspace_kwargs is None: subspace_kwargs = dict() self.subspace = Subspace.create(self.subspace_type, num_parameters=self.num_parameters, **subspace_kwargs)
def update_hyp(self, hyperparameters): self.rank = hyperparameters['rank'] self.max_rank = hyperparameters['max_rank'] # self.sampler = hyperparameters['ess'] self.lr_init = hyperparameters['lr_init'] self.swag_lr = hyperparameters['swag_lr'] self.swag_burn_in_epochs = hyperparameters['swag_burn_in_epochs'] self.num_samples = hyperparameters['num_samples'] self.num_swag_iterates = hyperparameters['num_swag_iterates'] self.swag_momentum = hyperparameters['swag_momentum'] self.lr_init = hyperparameters['lr_init'] self.swag_lr = hyperparameters['swag_lr'] self.swag_wd = hyperparameters['swag_wd'] # import pdb; pdb.set_trace() self.prior_std = hyperparameters['prior_std'] self.temperature = self.hyperparameters['temperature'] self.subspace_constructed = False self.current_theta = None swag_hyperparam_dict = { 'burn_in_epochs': self.swag_burn_in_epochs, 'num_iterates': self.num_swag_iterates, 'momentum': self.swag_momentum, 'lr_init': self.lr_init, 'swag_lr': self.swag_lr, 'swag_wd': self.swag_wd, 'subspace_type': 'pca' } subspace_kwargs = {'max_rank': self.max_rank, 'pca_rank': self.rank} self.model = reset_model(self.model) self.swag_model.update_hyp(swag_hyperparam_dict, **subspace_kwargs) self.subspace_constructed = False self.weight_mean = None self.weight_covariance = None self.subspace = None
def update_hyp(self, hyperparameters): # TODO: Check the hyperparameters ar the right type self.step_size = hyperparameters['step_size'] self.num_samples = hyperparameters['num_samples'] self.L = hyperparameters['L'] self.tau = hyperparameters['tau'] self.mass = hyperparameters['mass'] self.burn = hyperparameters['burn'] self.model = reset_model(self.model)
def update_hyp(self, hyperparameters): self.lr = hyperparameters['lr'] self.num_samples = 1 self.epochs = hyperparameters['epochs'] self.momentum = hyperparameters['momentum'] self.weight_decay = hyperparameters['weight_decay'] self.optimizer = torch.optim.SGD(self.model.parameters(), lr=self.lr, momentum=self.momentum, weight_decay=self.weight_decay) self.model = reset_model(self.model).to(self.device) self.burnt_in = False self.epochs_run = 0 self.lr_final = self.lr / 2 self.optimizer_scheduler = CosineAnnealingLR(optimizer=self.optimizer, T_max= self.burn_in_epochs + self.num_samples, eta_min=self.lr_final)
def update_hyp(self, hyperparameters): self.lr_0 = hyperparameters['lr_0'] self.prior_std = hyperparameters['prior_std'] self.num_samples_per_cycle = hyperparameters['num_samples_per_cycle'] self.cycle_length = hyperparameters['cycle_length'] self.alpha = hyperparameters['alpha'] self.burn_in_epochs = hyperparameters['burn_in_epochs'] self.num_cycles = hyperparameters['num_cycles'] self.model = reset_model(self.model) self.optimizer = optimSGHMC(params=self.model.parameters(), lr=self.lr_0, momentum=1 - self.alpha, num_training_samples=self.dataset_size, weight_decay=1 / (self.prior_std ** 2)) self.burnt_in = False self.epochs_run = 0 assert ((self.cycle_length - self.burn_in_epochs - self.num_samples_per_cycle) > 0)
def update_hyp(self, hyperparameters): self.lr = hyperparameters['lr'] self.num_samples = hyperparameters['num_samples'] self.epochs = hyperparameters['epochs'] self.dropout = hyperparameters['dropout'] self.momentum = hyperparameters['momentum'] if hyperparameters['weight_decay'] != 0: self.weight_decay = hyperparameters['weight_decay'] else: self.weight_decay = hyperparameters['lengthscale'] ** 2 * (1 - self.dropout) / (2. * self.dataset_size) self.optimizer = torch.optim.SGD(self.model.parameters(), lr=self.lr, momentum=self.momentum, weight_decay=self.weight_decay) self.model = reset_model(self.model).to(self.device) self.burnt_in = False self.epochs_run = 0 self.lr_final = self.lr / 2 self.optimizer_scheduler = CosineAnnealingLR(optimizer=self.optimizer, T_max= self.burn_in_epochs + self.num_samples, eta_min=self.lr_final)
def update_hyp(self, hyperparameters): self.lr = hyperparameters['lr'] self.prior_std = hyperparameters['prior_std'] self.num_samples = hyperparameters['num_samples'] self.alpha = hyperparameters['alpha'] self.burn_in_epochs = hyperparameters['burn_in_epochs'] self.model = reset_model(self.model) self.burnt_in = False self.epochs_run = 0 self.optimizer = optimSGHMC(params=self.model.parameters(), lr=self.lr, momentum=1 - self.alpha, num_training_samples=self.dataset_size, weight_decay=1 / (self.prior_std**2)) self.lr_final = self.lr / 2 self.optimizer_scheduler = CosineAnnealingLR( optimizer=self.optimizer, T_max=self.burn_in_epochs + self.num_samples, eta_min=self.lr_final)