def _prior_gamma(self): self.fc1_prior = { 'radius': ('Gamma', { 'softplus_inv_shape': softplus_inv(GAMMA_SHAPE), 'softplus_inv_rate': softplus_inv(GAMMA_RATE) }) } self.fc2_prior = { 'radius': ('Gamma', { 'softplus_inv_shape': softplus_inv(GAMMA_SHAPE), 'softplus_inv_rate': softplus_inv(GAMMA_RATE) }) }
def _prior_weibull(self): self.fc1_prior = { 'radius': ('Weibull', { 'softplus_inv_shape': softplus_inv(WEIBULL_SHAPE), 'softplus_inv_scale': softplus_inv(WEIBULL_SCALE) }) } self.fc2_prior = { 'radius': ('Weibull', { 'softplus_inv_shape': softplus_inv(WEIBULL_SHAPE), 'softplus_inv_scale': softplus_inv(WEIBULL_SCALE) }) }
def reset_parameters(self, hyperparams={}): if 'LogNormal' in hyperparams.keys(): if 'mu_normal_mean' in hyperparams['LogNormal'].keys(): self.mu_normal_mean = hyperparams['LogNormal'][ 'mu_normal_mean'] if 'mu_normal_std' in hyperparams['LogNormal'].keys(): self.mu_normal_std = hyperparams['LogNormal']['mu_normal_std'] if 'softplus_inv_std_normal_mean' in hyperparams['LogNormal'].keys( ): self.softplus_inv_std_normal_mean = hyperparams['LogNormal'][ 'softplus_inv_std_normal_mean'] if 'softplus_inv_std_normal_std' in hyperparams['LogNormal'].keys( ): self.softplus_inv_std_normal_std = hyperparams['LogNormal'][ 'softplus_inv_std_normal_std'] torch.nn.init.normal_(self.mu, self.mu_normal_mean, self.mu_normal_std) torch.nn.init.normal_(self.softplus_inv_std, self.softplus_inv_std_normal_mean, self.softplus_inv_std_normal_std) if 'mu' in hyperparams['LogNormal'].keys(): self.mu.data.copy_(hyperparams['LogNormal']['mu']) self.mu_init_type = 'fixed' if 'std' in hyperparams['LogNormal'].keys(): self.softplus_inv_std.data.copy_( softplus_inv(hyperparams['LogNormal']['std'])) self.std_init_type = 'fixed'
def __init__(self, prior_type, n_in, n_hidden): super(UCIFCRadial, self).__init__() self.prior = UCIFCRadialPrior(prior_type, n_in, n_hidden) fc1_prior, fc2_prior = self.prior() self.obs_precision_softplus_inv_shape = nn.Parameter( torch.Tensor([softplus_inv(NOISE_GAMMA_PRIOR_SHAPE_INIT)])) self.obs_precision_softplus_inv_rate = nn.Parameter( torch.Tensor([softplus_inv(NOISE_GAMMA_PRIOR_RATE_INIT)])) self.fc1 = DoubleRadialLinear(in_features=n_in, out_features=n_hidden, bias=True, prior=fc1_prior) self.nonlinear1 = nn.ReLU() self.fc2 = DoubleRadialLinear(in_features=n_hidden, out_features=1, bias=True, prior=fc2_prior)
def __init__(self, batch_shape): super(LognormalReparametrizedSample, self).__init__() self.batch_shape = batch_shape self.mu = Parameter(torch.Tensor(batch_shape)) self.softplus_inv_std = Parameter(torch.Tensor(batch_shape)) self.mu_normal_mean = 0.0 self.mu_normal_std = 0.0001 self.softplus_inv_std_normal_mean = softplus_inv(1e-4) self.softplus_inv_std_normal_std = 0.0001 self.mu_init_type = 'random' self.std_init_type = 'random'
def __init__(self, prior_type, n1, n2): self.fc1_prior = None self.fc2_prior = None if prior_type == 'Gamma': self._prior_gamma() elif prior_type == 'Weibull': self._prior_weibull() elif prior_type == 'HalfCauchy': self._prior_halfcauchy() else: raise NotImplementedError self.fc1_prior['direction'] = ('vMF', { 'row_softplus_inv_concentration': softplus_inv(ml_kappa(dim=n1, eps=PRIOR_EPSILON)), 'col_softplus_inv_concentration': softplus_inv(ml_kappa(dim=n2, eps=PRIOR_EPSILON)) }) self.fc2_prior['direction'] = ('vMF', { 'row_softplus_inv_concentration': softplus_inv(ml_kappa(dim=n2, eps=PRIOR_EPSILON)), 'col_softplus_inv_concentration': softplus_inv(ml_kappa(dim=1, eps=PRIOR_EPSILON)) })
def __init__(self, batch_shape, event_shape): self.batch_shape = batch_shape if isinstance(event_shape, Number): event_shape = torch.Size([event_shape]) self.event_shape = event_shape assert len(event_shape) == 1 self.dim = int(event_shape[0]) super(VonMisesFisherReparametrizedSample, self).__init__() self.loc = Parameter(torch.Tensor(batch_shape + event_shape)) self.softplus_inv_concentration = Parameter( torch.Tensor(torch.Size([1]))) # Too large kappa slow down rejection sampling, so we set upper bound, which is called in forward pass self.softplus_inv_concentration_upper_bound = softplus_inv( ml_kappa(dim=float(event_shape[0]), eps=2e-3)) self.beta_sample = None self.concentration = None self.gradient_correction_required = True self.softplus_inv_concentration_normal_mean = softplus_inv( ml_kappa(dim=float(event_shape[0]), eps=EPSILON)) self.softplus_inv_concentration_normal_std = 0.001 self.direction_init_method = None self.rsample = None self.loc_init_type = 'random'
def _prior_halfcauchy(self): self.fc1_prior = { 'radius': ('HalfCauchy', { 'softplus_inv_shape': softplus_inv(0.5), 'softplus_inv_rate': 2 * softplus_inv(HALFCAUCHY_TAU), 'softplus_inv_shape1': softplus_inv(0.5), 'softplus_inv_rate1': softplus_inv(1) }) } self.fc2_prior = { 'radius': ('HalfCauchy', { 'softplus_inv_shape': softplus_inv(0.5), 'softplus_inv_rate': 2 * softplus_inv(HALFCAUCHY_TAU), 'softplus_inv_shape1': softplus_inv(0.5), 'softplus_inv_rate1': softplus_inv(1) }) }
def reset_parameters(self, hyperparams={}): if 'vMF' in hyperparams.keys(): if 'direction' in hyperparams['vMF'].keys(): if type(hyperparams['vMF']['direction']) == str: if hyperparams['vMF']['direction'] == 'kaiming': self.direction_init_method = torch.nn.init.kaiming_normal_ elif hyperparams['vMF'][ 'direction'] == 'kaiming_transpose': self.direction_init_method = kaiming_transpose elif hyperparams['vMF']['direction'] == 'orthogonal': self.direction_init_method = torch.nn.init.orthogonal_ self.direction_init_method(self.loc) elif type(hyperparams['vMF']['direction']) == torch.Tensor: self.loc.data.copy_(hyperparams['vMF']['direction']) self.loc_init_type = 'fixed' else: raise NotImplementedError self.loc.data /= torch.sum(self.loc.data**2, dim=-1, keepdim=True)**0.5 if 'softplus_inv_concentration_normal_mean' in hyperparams[ 'vMF'].keys(): self.softplus_inv_concentration_normal_mean = hyperparams[ 'vMF']['softplus_inv_concentration_normal_mean'] if 'softplus_inv_concentration_normal_mean_via_epsilon' in hyperparams[ 'vMF'].keys(): epsilon = hyperparams['vMF'][ 'softplus_inv_concentration_normal_mean_via_epsilon'] self.softplus_inv_concentration_normal_mean = softplus_inv( ml_kappa(dim=float(self.event_shape[0]), eps=epsilon)) if 'softplus_inv_concentration_normal_std' in hyperparams[ 'vMF'].keys(): self.softplus_inv_concentration_normal_std = hyperparams[ 'vMF']['softplus_inv_concentration_normal_std'] torch.nn.init.normal_(self.softplus_inv_concentration, self.softplus_inv_concentration_normal_mean, self.softplus_inv_concentration_normal_std)
def train_initiate(prior_type, data_type, split_id, output_normalize, n_pred_samples, n_epoch, lr, batch_size=32, num_workers=4, use_gpu=True): exp_info_dict = { 'prior_type': prior_type, 'data_type': data_type, 'data_id': split_id, 'output_normalize': output_normalize, 'n_epoch': n_epoch, 'lr': lr, 'batch_size': batch_size } time_tag = datetime.now().strftime("%H:%M:%S:%f") exp_filename_prefix = '_'.join([ 'Radial-double', data_type, prior_type, 'E' + str(n_epoch).zfill(4), str(split_id).zfill(2), time_tag ]) use_gpu = use_gpu and cuda.is_available() print(exp_filename_prefix) train_loader, test_loader, train_loader_eval, normalization_info = data_loader( data_type, split_id=split_id, batch_size=batch_size, num_workers=num_workers, output_normalize=output_normalize) model = load_model(prior_type, data_type, use_gpu) initialization_hyperparams = { 'vMF': { 'direction': 'kaiming', 'log_concentration_normal_mean_via_epsilon': 0.05, 'log_concentration_normal_std': 0.01 }, 'LogNormal': { 'mu_normal_mean': math.log(1.0), 'mu_normal_std': 0.0001, 'softplus_inv_var_normal_mean': softplus_inv(1e-4), 'softplus_inv_var_normal_std': 0.0001 }, 'Gamma': { 'softplus_inv_shape_normal_mean': softplus_inv(2.0**0.5), 'softplus_inv_shape_normal_std': 0.1, 'softplus_inv_rate_normal_mean': softplus_inv(1.0), 'softplus_inv_rate_normal_std': 0.1 } } model.reset_parameters(initialization_hyperparams) optimizer = optim.Adam(model.parameters(), lr=lr) annealing_steps = float(50 * math.ceil(len(train_loader.dataset) / batch_size)) beta_func = lambda s: min(s, annealing_steps) / (annealing_steps) train_log, n_steps = train(model=model, optimizer=optimizer, train_loader=train_loader, begin_step=0, epoch_begin=0, epoch_end=n_epoch, beta_func=beta_func, use_gpu=use_gpu) eval_log = evaluate(model=model, train_loader_eval=train_loader_eval, test_loader=test_loader, normalization_info=normalization_info, n_pred_samples=n_pred_samples) exp_info_dict['n_steps'] = n_steps exp_info_dict['beta_func'] = beta_func exp_filename = save_experiment( model=model, optimizer=optimizer, log_text='\n'.join([exp_filename_prefix, train_log, eval_log]), exp_info_dict=exp_info_dict, filename_prefix=exp_filename_prefix) return exp_filename
import math import argparse from BayesianNeuralNetwork.torch_user.nn.utils import softplus_inv from BayesianNeuralNetwork.compress import train_continue, train_initiate, prior_info_from_json INIT_HYPER = { 'vMF': { 'direction': 'kaiming', 'softplus_inv_concentration_normal_mean_via_epsilon': 0.1, 'softplus_inv_concentration_normal_std': 0.0001 }, 'LogNormal': { 'mu_normal_mean': None, 'mu_normal_std': 0.0001, 'softplus_inv_std_normal_mean': softplus_inv(0.0001), 'softplus_inv_std_normal_std': 0.0001 }, 'Normal': { 'mu_normal_mean': 0.0, 'mu_normal_std': 0.0001, 'softplus_inv_std_normal_mean': softplus_inv(0.0001), 'softplus_inv_std_normal_std': 0.0001 } } if __name__ == '__main__': parser = argparse.ArgumentParser(description='MNIST Train script') parser.add_argument('--model_type', dest='model_type', type=str,
def train_initiate(model_type, prior_type, data_type, n_pred_samples, n_epoch, lr, batch_size=32, num_workers=4, use_gpu=False): exp_info_dict = { 'model_type': model_type, 'prior_type': prior_type, 'data_type': data_type, 'n_epoch': n_epoch, 'lr': lr, 'batch_size': batch_size } time_tag = datetime.now().strftime("%H:%M:%S:%f") exp_filename_prefix = '_'.join([ data_type, model_type, prior_type, 'E' + str(n_epoch).zfill(4), time_tag ]) use_gpu = use_gpu and cuda.is_available() print(exp_filename_prefix) initialization_hyperparams = { 'vMF': { 'direction': 'kaiming', 'softplus_inv_concentration_normal_mean_via_epsilon': 0.1, 'softplus_inv_concentration_normal_std': 0.1 }, 'LogNormal': { 'mu_normal_mean': math.log(1.0), 'mu_normal_std': 0.1, 'softplus_inv_std_normal_mean': math.log(1e-2), 'softplus_inv_std_normal_std': 0.1 }, 'Gamma': { 'softplus_inv_shape_normal_mean': softplus_inv(2.0**0.5), 'softplus_inv_shape_normal_std': 0.1, 'softplus_inv_rate_normal_mean': softplus_inv(1.0), 'softplus_inv_rate_normal_std': 0.1 } } model = load_model(model_type=model_type, prior_type=prior_type, use_gpu=use_gpu) model.reset_parameters(initialization_hyperparams) for c in model.children(): if c._get_name() == 'DoubleRadialLinear': if c.in_features > 1: c.row_direction_rsampler.reset_parameters({ 'vMF': { 'direction': 'kaiming', 'log_concentration_normal_mean_via_epsilon': 0.05, 'log_concentration_normal_std': 0.5 } }) if c.out_features > 1: c.col_direction_rsampler.reset_parameters({ 'vMF': { 'direction': 'kaiming_transpose', 'log_concentration_normal_mean_via_epsilon': 0.05, 'log_concentration_normal_std': 0.5 } }) train_loader, valid_loader, test_loader, train_loader_eval = load_data( data_type=data_type, batch_size=batch_size, num_workers=num_workers, use_gpu=use_gpu) eval_loaders = [train_loader_eval, valid_loader, test_loader] annealing_steps = float(50.0 * math.ceil(len(train_loader.dataset) / batch_size)) beta_func = lambda s: min(s, annealing_steps) / annealing_steps # beta_func = lambda s: 1.0 optimizer = optim.Adam(model.parameters(), lr=lr) train_log, n_steps = train(model=model, optimizer=optimizer, train_loader=train_loader, begin_step=0, epoch_begin=0, epoch_end=n_epoch, beta_func=beta_func, filename_prefix=exp_filename_prefix, eval_loaders=eval_loaders, use_gpu=use_gpu) eval_log = evaluate(model=model, train_loader_eval=train_loader_eval, valid_loader=valid_loader, test_loader=test_loader, n_pred_samples=n_pred_samples) exp_info_dict['n_steps'] = n_steps exp_info_dict['beta_func'] = beta_func exp_filename = save_experiment( model=model, optimizer=optimizer, log_text='\n'.join([exp_filename_prefix, train_log, eval_log]), exp_info_dict=exp_info_dict, filename_prefix=exp_filename_prefix) return exp_filename