def _prior_gamma(self):
     self.fc1_prior = {
         'radius': ('Gamma', {
             'softplus_inv_shape': softplus_inv(GAMMA_SHAPE),
             'softplus_inv_rate': softplus_inv(GAMMA_RATE)
         })
     }
     self.fc2_prior = {
         'radius': ('Gamma', {
             'softplus_inv_shape': softplus_inv(GAMMA_SHAPE),
             'softplus_inv_rate': softplus_inv(GAMMA_RATE)
         })
     }
 def _prior_weibull(self):
     self.fc1_prior = {
         'radius': ('Weibull', {
             'softplus_inv_shape': softplus_inv(WEIBULL_SHAPE),
             'softplus_inv_scale': softplus_inv(WEIBULL_SCALE)
         })
     }
     self.fc2_prior = {
         'radius': ('Weibull', {
             'softplus_inv_shape': softplus_inv(WEIBULL_SHAPE),
             'softplus_inv_scale': softplus_inv(WEIBULL_SCALE)
         })
     }
 def reset_parameters(self, hyperparams={}):
     if 'LogNormal' in hyperparams.keys():
         if 'mu_normal_mean' in hyperparams['LogNormal'].keys():
             self.mu_normal_mean = hyperparams['LogNormal'][
                 'mu_normal_mean']
         if 'mu_normal_std' in hyperparams['LogNormal'].keys():
             self.mu_normal_std = hyperparams['LogNormal']['mu_normal_std']
         if 'softplus_inv_std_normal_mean' in hyperparams['LogNormal'].keys(
         ):
             self.softplus_inv_std_normal_mean = hyperparams['LogNormal'][
                 'softplus_inv_std_normal_mean']
         if 'softplus_inv_std_normal_std' in hyperparams['LogNormal'].keys(
         ):
             self.softplus_inv_std_normal_std = hyperparams['LogNormal'][
                 'softplus_inv_std_normal_std']
         torch.nn.init.normal_(self.mu, self.mu_normal_mean,
                               self.mu_normal_std)
         torch.nn.init.normal_(self.softplus_inv_std,
                               self.softplus_inv_std_normal_mean,
                               self.softplus_inv_std_normal_std)
         if 'mu' in hyperparams['LogNormal'].keys():
             self.mu.data.copy_(hyperparams['LogNormal']['mu'])
             self.mu_init_type = 'fixed'
         if 'std' in hyperparams['LogNormal'].keys():
             self.softplus_inv_std.data.copy_(
                 softplus_inv(hyperparams['LogNormal']['std']))
             self.std_init_type = 'fixed'
    def __init__(self, prior_type, n_in, n_hidden):
        super(UCIFCRadial, self).__init__()
        self.prior = UCIFCRadialPrior(prior_type, n_in, n_hidden)
        fc1_prior, fc2_prior = self.prior()
        self.obs_precision_softplus_inv_shape = nn.Parameter(
            torch.Tensor([softplus_inv(NOISE_GAMMA_PRIOR_SHAPE_INIT)]))
        self.obs_precision_softplus_inv_rate = nn.Parameter(
            torch.Tensor([softplus_inv(NOISE_GAMMA_PRIOR_RATE_INIT)]))

        self.fc1 = DoubleRadialLinear(in_features=n_in,
                                      out_features=n_hidden,
                                      bias=True,
                                      prior=fc1_prior)
        self.nonlinear1 = nn.ReLU()
        self.fc2 = DoubleRadialLinear(in_features=n_hidden,
                                      out_features=1,
                                      bias=True,
                                      prior=fc2_prior)
 def __init__(self, batch_shape):
     super(LognormalReparametrizedSample, self).__init__()
     self.batch_shape = batch_shape
     self.mu = Parameter(torch.Tensor(batch_shape))
     self.softplus_inv_std = Parameter(torch.Tensor(batch_shape))
     self.mu_normal_mean = 0.0
     self.mu_normal_std = 0.0001
     self.softplus_inv_std_normal_mean = softplus_inv(1e-4)
     self.softplus_inv_std_normal_std = 0.0001
     self.mu_init_type = 'random'
     self.std_init_type = 'random'
 def __init__(self, prior_type, n1, n2):
     self.fc1_prior = None
     self.fc2_prior = None
     if prior_type == 'Gamma':
         self._prior_gamma()
     elif prior_type == 'Weibull':
         self._prior_weibull()
     elif prior_type == 'HalfCauchy':
         self._prior_halfcauchy()
     else:
         raise NotImplementedError
     self.fc1_prior['direction'] = ('vMF', {
         'row_softplus_inv_concentration':
         softplus_inv(ml_kappa(dim=n1, eps=PRIOR_EPSILON)),
         'col_softplus_inv_concentration':
         softplus_inv(ml_kappa(dim=n2, eps=PRIOR_EPSILON))
     })
     self.fc2_prior['direction'] = ('vMF', {
         'row_softplus_inv_concentration':
         softplus_inv(ml_kappa(dim=n2, eps=PRIOR_EPSILON)),
         'col_softplus_inv_concentration':
         softplus_inv(ml_kappa(dim=1, eps=PRIOR_EPSILON))
     })
 def __init__(self, batch_shape, event_shape):
     self.batch_shape = batch_shape
     if isinstance(event_shape, Number):
         event_shape = torch.Size([event_shape])
     self.event_shape = event_shape
     assert len(event_shape) == 1
     self.dim = int(event_shape[0])
     super(VonMisesFisherReparametrizedSample, self).__init__()
     self.loc = Parameter(torch.Tensor(batch_shape + event_shape))
     self.softplus_inv_concentration = Parameter(
         torch.Tensor(torch.Size([1])))
     # Too large kappa slow down rejection sampling, so we set upper bound, which is called in forward pass
     self.softplus_inv_concentration_upper_bound = softplus_inv(
         ml_kappa(dim=float(event_shape[0]), eps=2e-3))
     self.beta_sample = None
     self.concentration = None
     self.gradient_correction_required = True
     self.softplus_inv_concentration_normal_mean = softplus_inv(
         ml_kappa(dim=float(event_shape[0]), eps=EPSILON))
     self.softplus_inv_concentration_normal_std = 0.001
     self.direction_init_method = None
     self.rsample = None
     self.loc_init_type = 'random'
 def _prior_halfcauchy(self):
     self.fc1_prior = {
         'radius': ('HalfCauchy', {
             'softplus_inv_shape': softplus_inv(0.5),
             'softplus_inv_rate': 2 * softplus_inv(HALFCAUCHY_TAU),
             'softplus_inv_shape1': softplus_inv(0.5),
             'softplus_inv_rate1': softplus_inv(1)
         })
     }
     self.fc2_prior = {
         'radius': ('HalfCauchy', {
             'softplus_inv_shape': softplus_inv(0.5),
             'softplus_inv_rate': 2 * softplus_inv(HALFCAUCHY_TAU),
             'softplus_inv_shape1': softplus_inv(0.5),
             'softplus_inv_rate1': softplus_inv(1)
         })
     }
    def reset_parameters(self, hyperparams={}):
        if 'vMF' in hyperparams.keys():
            if 'direction' in hyperparams['vMF'].keys():
                if type(hyperparams['vMF']['direction']) == str:
                    if hyperparams['vMF']['direction'] == 'kaiming':
                        self.direction_init_method = torch.nn.init.kaiming_normal_
                    elif hyperparams['vMF'][
                            'direction'] == 'kaiming_transpose':
                        self.direction_init_method = kaiming_transpose
                    elif hyperparams['vMF']['direction'] == 'orthogonal':
                        self.direction_init_method = torch.nn.init.orthogonal_
                    self.direction_init_method(self.loc)
                elif type(hyperparams['vMF']['direction']) == torch.Tensor:
                    self.loc.data.copy_(hyperparams['vMF']['direction'])
                    self.loc_init_type = 'fixed'
                else:
                    raise NotImplementedError

                self.loc.data /= torch.sum(self.loc.data**2,
                                           dim=-1,
                                           keepdim=True)**0.5

            if 'softplus_inv_concentration_normal_mean' in hyperparams[
                    'vMF'].keys():
                self.softplus_inv_concentration_normal_mean = hyperparams[
                    'vMF']['softplus_inv_concentration_normal_mean']
            if 'softplus_inv_concentration_normal_mean_via_epsilon' in hyperparams[
                    'vMF'].keys():
                epsilon = hyperparams['vMF'][
                    'softplus_inv_concentration_normal_mean_via_epsilon']
                self.softplus_inv_concentration_normal_mean = softplus_inv(
                    ml_kappa(dim=float(self.event_shape[0]), eps=epsilon))
            if 'softplus_inv_concentration_normal_std' in hyperparams[
                    'vMF'].keys():
                self.softplus_inv_concentration_normal_std = hyperparams[
                    'vMF']['softplus_inv_concentration_normal_std']
        torch.nn.init.normal_(self.softplus_inv_concentration,
                              self.softplus_inv_concentration_normal_mean,
                              self.softplus_inv_concentration_normal_std)
Ejemplo n.º 10
0
def train_initiate(prior_type,
                   data_type,
                   split_id,
                   output_normalize,
                   n_pred_samples,
                   n_epoch,
                   lr,
                   batch_size=32,
                   num_workers=4,
                   use_gpu=True):
    exp_info_dict = {
        'prior_type': prior_type,
        'data_type': data_type,
        'data_id': split_id,
        'output_normalize': output_normalize,
        'n_epoch': n_epoch,
        'lr': lr,
        'batch_size': batch_size
    }
    time_tag = datetime.now().strftime("%H:%M:%S:%f")
    exp_filename_prefix = '_'.join([
        'Radial-double', data_type, prior_type, 'E' + str(n_epoch).zfill(4),
        str(split_id).zfill(2), time_tag
    ])
    use_gpu = use_gpu and cuda.is_available()
    print(exp_filename_prefix)

    train_loader, test_loader, train_loader_eval, normalization_info = data_loader(
        data_type,
        split_id=split_id,
        batch_size=batch_size,
        num_workers=num_workers,
        output_normalize=output_normalize)
    model = load_model(prior_type, data_type, use_gpu)
    initialization_hyperparams = {
        'vMF': {
            'direction': 'kaiming',
            'log_concentration_normal_mean_via_epsilon': 0.05,
            'log_concentration_normal_std': 0.01
        },
        'LogNormal': {
            'mu_normal_mean': math.log(1.0),
            'mu_normal_std': 0.0001,
            'softplus_inv_var_normal_mean': softplus_inv(1e-4),
            'softplus_inv_var_normal_std': 0.0001
        },
        'Gamma': {
            'softplus_inv_shape_normal_mean': softplus_inv(2.0**0.5),
            'softplus_inv_shape_normal_std': 0.1,
            'softplus_inv_rate_normal_mean': softplus_inv(1.0),
            'softplus_inv_rate_normal_std': 0.1
        }
    }
    model.reset_parameters(initialization_hyperparams)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    annealing_steps = float(50 *
                            math.ceil(len(train_loader.dataset) / batch_size))
    beta_func = lambda s: min(s, annealing_steps) / (annealing_steps)

    train_log, n_steps = train(model=model,
                               optimizer=optimizer,
                               train_loader=train_loader,
                               begin_step=0,
                               epoch_begin=0,
                               epoch_end=n_epoch,
                               beta_func=beta_func,
                               use_gpu=use_gpu)
    eval_log = evaluate(model=model,
                        train_loader_eval=train_loader_eval,
                        test_loader=test_loader,
                        normalization_info=normalization_info,
                        n_pred_samples=n_pred_samples)
    exp_info_dict['n_steps'] = n_steps
    exp_info_dict['beta_func'] = beta_func
    exp_filename = save_experiment(
        model=model,
        optimizer=optimizer,
        log_text='\n'.join([exp_filename_prefix, train_log, eval_log]),
        exp_info_dict=exp_info_dict,
        filename_prefix=exp_filename_prefix)
    return exp_filename
Ejemplo n.º 11
0
import math
import argparse

from BayesianNeuralNetwork.torch_user.nn.utils import softplus_inv
from BayesianNeuralNetwork.compress import train_continue, train_initiate, prior_info_from_json

INIT_HYPER = {
    'vMF': {
        'direction': 'kaiming',
        'softplus_inv_concentration_normal_mean_via_epsilon': 0.1,
        'softplus_inv_concentration_normal_std': 0.0001
    },
    'LogNormal': {
        'mu_normal_mean': None,
        'mu_normal_std': 0.0001,
        'softplus_inv_std_normal_mean': softplus_inv(0.0001),
        'softplus_inv_std_normal_std': 0.0001
    },
    'Normal': {
        'mu_normal_mean': 0.0,
        'mu_normal_std': 0.0001,
        'softplus_inv_std_normal_mean': softplus_inv(0.0001),
        'softplus_inv_std_normal_std': 0.0001
    }
}

if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='MNIST Train script')
    parser.add_argument('--model_type',
                        dest='model_type',
                        type=str,
Ejemplo n.º 12
0
def train_initiate(model_type,
                   prior_type,
                   data_type,
                   n_pred_samples,
                   n_epoch,
                   lr,
                   batch_size=32,
                   num_workers=4,
                   use_gpu=False):
    exp_info_dict = {
        'model_type': model_type,
        'prior_type': prior_type,
        'data_type': data_type,
        'n_epoch': n_epoch,
        'lr': lr,
        'batch_size': batch_size
    }
    time_tag = datetime.now().strftime("%H:%M:%S:%f")
    exp_filename_prefix = '_'.join([
        data_type, model_type, prior_type, 'E' + str(n_epoch).zfill(4),
        time_tag
    ])
    use_gpu = use_gpu and cuda.is_available()
    print(exp_filename_prefix)

    initialization_hyperparams = {
        'vMF': {
            'direction': 'kaiming',
            'softplus_inv_concentration_normal_mean_via_epsilon': 0.1,
            'softplus_inv_concentration_normal_std': 0.1
        },
        'LogNormal': {
            'mu_normal_mean': math.log(1.0),
            'mu_normal_std': 0.1,
            'softplus_inv_std_normal_mean': math.log(1e-2),
            'softplus_inv_std_normal_std': 0.1
        },
        'Gamma': {
            'softplus_inv_shape_normal_mean': softplus_inv(2.0**0.5),
            'softplus_inv_shape_normal_std': 0.1,
            'softplus_inv_rate_normal_mean': softplus_inv(1.0),
            'softplus_inv_rate_normal_std': 0.1
        }
    }
    model = load_model(model_type=model_type,
                       prior_type=prior_type,
                       use_gpu=use_gpu)
    model.reset_parameters(initialization_hyperparams)
    for c in model.children():
        if c._get_name() == 'DoubleRadialLinear':
            if c.in_features > 1:
                c.row_direction_rsampler.reset_parameters({
                    'vMF': {
                        'direction': 'kaiming',
                        'log_concentration_normal_mean_via_epsilon': 0.05,
                        'log_concentration_normal_std': 0.5
                    }
                })
            if c.out_features > 1:
                c.col_direction_rsampler.reset_parameters({
                    'vMF': {
                        'direction': 'kaiming_transpose',
                        'log_concentration_normal_mean_via_epsilon': 0.05,
                        'log_concentration_normal_std': 0.5
                    }
                })

    train_loader, valid_loader, test_loader, train_loader_eval = load_data(
        data_type=data_type,
        batch_size=batch_size,
        num_workers=num_workers,
        use_gpu=use_gpu)
    eval_loaders = [train_loader_eval, valid_loader, test_loader]
    annealing_steps = float(50.0 *
                            math.ceil(len(train_loader.dataset) / batch_size))
    beta_func = lambda s: min(s, annealing_steps) / annealing_steps
    # beta_func = lambda s: 1.0
    optimizer = optim.Adam(model.parameters(), lr=lr)

    train_log, n_steps = train(model=model,
                               optimizer=optimizer,
                               train_loader=train_loader,
                               begin_step=0,
                               epoch_begin=0,
                               epoch_end=n_epoch,
                               beta_func=beta_func,
                               filename_prefix=exp_filename_prefix,
                               eval_loaders=eval_loaders,
                               use_gpu=use_gpu)
    eval_log = evaluate(model=model,
                        train_loader_eval=train_loader_eval,
                        valid_loader=valid_loader,
                        test_loader=test_loader,
                        n_pred_samples=n_pred_samples)

    exp_info_dict['n_steps'] = n_steps
    exp_info_dict['beta_func'] = beta_func
    exp_filename = save_experiment(
        model=model,
        optimizer=optimizer,
        log_text='\n'.join([exp_filename_prefix, train_log, eval_log]),
        exp_info_dict=exp_info_dict,
        filename_prefix=exp_filename_prefix)
    return exp_filename