def __init__(self,
                 in_features,
                 out_features,
                 bias=True,
                 prior_sigma_1=0.1,
                 prior_sigma_2=0.4,
                 prior_pi=1,
                 posterior_mu_init=0,
                 posterior_rho_init=-7.0,
                 freeze=False,
                 prior_dist=None):
        super().__init__()

        #our main parameters
        self.in_features = in_features
        self.out_features = out_features
        self.bias = bias
        self.freeze = freeze

        self.posterior_mu_init = posterior_mu_init
        self.posterior_rho_init = posterior_rho_init

        #parameters for the scale mixture prior
        self.prior_sigma_1 = prior_sigma_1
        self.prior_sigma_2 = prior_sigma_2
        self.prior_pi = prior_pi
        self.prior_dist = prior_dist

        # Variational weight parameters and sample
        self.weight_mu = nn.Parameter(
            torch.Tensor(out_features,
                         in_features).normal_(posterior_mu_init, 0.1))
        self.weight_rho = nn.Parameter(
            torch.Tensor(out_features,
                         in_features).normal_(posterior_rho_init, 0.1))
        self.weight_sampler = TrainableRandomDistribution(
            self.weight_mu, self.weight_rho)

        # Variational bias parameters and sample
        self.bias_mu = nn.Parameter(
            torch.Tensor(out_features).normal_(posterior_mu_init, 0.1))
        self.bias_rho = nn.Parameter(
            torch.Tensor(out_features).normal_(posterior_rho_init, 0.1))
        self.bias_sampler = TrainableRandomDistribution(
            self.bias_mu, self.bias_rho)

        # Priors (as BBP paper)
        self.weight_prior_dist = PriorWeightDistribution(self.prior_pi,
                                                         self.prior_sigma_1,
                                                         self.prior_sigma_2,
                                                         dist=self.prior_dist)
        self.bias_prior_dist = PriorWeightDistribution(self.prior_pi,
                                                       self.prior_sigma_1,
                                                       self.prior_sigma_2,
                                                       dist=self.prior_dist)
        self.log_prior = 0
        self.log_variational_posterior = 0
Beispiel #2
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 kernel_size,
                 groups = 1,
                 stride = 1,
                 padding = 0,
                 dilation = 1,
                 bias=True,
                 prior_sigma_1 = 0.1,
                 prior_sigma_2 = 0.002,
                 prior_pi = 1,
                 posterior_mu_init = 0,
                 posterior_rho_init = -7.0,
                 freeze = False,
                 prior_dist = None):
        super().__init__()
        
        #our main parameters
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.freeze = freeze
        self.kernel_size = kernel_size
        self.groups = groups
        self.stride = stride
        self.padding = padding
        self.dilation = dilation
        self.bias = bias


        self.posterior_mu_init = posterior_mu_init
        self.posterior_rho_init = posterior_rho_init

        #parameters for the scale mixture prior
        self.prior_sigma_1 = prior_sigma_1
        self.prior_sigma_2 = prior_sigma_2
        self.prior_pi = prior_pi
        self.prior_dist = prior_dist

        #our weights
        self.weight_mu = nn.Parameter(torch.Tensor(out_channels, in_channels // groups, kernel_size).normal_(posterior_mu_init, 0.1))
        self.weight_rho = nn.Parameter(torch.Tensor(out_channels, in_channels // groups, kernel_size).normal_(posterior_rho_init, 0.1))
        self.weight_sampler = TrainableRandomDistribution(self.weight_mu, self.weight_rho)

        #our biases
        self.bias_mu = nn.Parameter(torch.Tensor(out_channels).normal_(posterior_mu_init, 0.1))
        self.bias_rho = nn.Parameter(torch.Tensor(out_channels).normal_(posterior_rho_init, 0.1))
        self.bias_sampler = TrainableRandomDistribution(self.bias_mu, self.bias_rho)

        # Priors (as BBP paper)
        self.weight_prior_dist = PriorWeightDistribution(self.prior_pi, self.prior_sigma_1, self.prior_sigma_2, dist = self.prior_dist)
        self.bias_prior_dist = PriorWeightDistribution(self.prior_pi, self.prior_sigma_1, self.prior_sigma_2, dist = self.prior_dist)
        self.log_prior = 0
        self.log_variational_posterior = 0
    def __init__(self,
                 num_embeddings,
                 embedding_dim,
                 padding_idx=None,
                 max_norm=None,
                 norm_type=2.,
                 scale_grad_by_freq=False,
                 sparse=False,
                 prior_sigma_1=0.1,
                 prior_sigma_2=0.002,
                 prior_pi=1,
                 posterior_mu_init=0,
                 posterior_rho_init=-6.0,
                 freeze=False,
                 prior_dist=None):
        super().__init__()

        self.freeze = freeze

        #parameters for the scale mixture prior
        self.prior_sigma_1 = prior_sigma_1
        self.prior_sigma_2 = prior_sigma_2
        self.posterior_mu_init = posterior_mu_init
        self.posterior_rho_init = posterior_rho_init

        self.prior_pi = prior_pi
        self.prior_dist = prior_dist

        self.num_embeddings = num_embeddings
        self.embedding_dim = embedding_dim
        self.padding_idx = padding_idx
        self.max_norm = max_norm
        self.norm_type = norm_type
        self.scale_grad_by_freq = scale_grad_by_freq
        self.sparse = sparse

        # Variational weight parameters and sample
        self.weight_mu = nn.Parameter(
            torch.Tensor(num_embeddings,
                         embedding_dim).normal_(posterior_mu_init, 0.1))
        self.weight_rho = nn.Parameter(
            torch.Tensor(num_embeddings,
                         embedding_dim).normal_(posterior_rho_init, 0.1))
        self.weight_sampler = TrainableRandomDistribution(
            self.weight_mu, self.weight_rho)

        # Priors (as BBP paper)
        self.weight_prior_dist = PriorWeightDistribution(self.prior_pi,
                                                         self.prior_sigma_1,
                                                         self.prior_sigma_2,
                                                         dist=self.prior_dist)
        self.log_prior = 0
        self.log_variational_posterior = 0
    def test_gaussian_log_posterior(self):
        #checks if it the log_posterior calculator is working

        mu = torch.Tensor(10, 10).uniform_(-1, 1)
        rho = torch.Tensor(10, 10).uniform_(-1, 1)

        dist = TrainableRandomDistribution(mu, rho)
        s1 = dist.sample()

        log_posterior = dist.log_posterior()
        #check if it is not none
        self.assertEqual(log_posterior == log_posterior, torch.tensor(True))
    def test_gaussian_sample(self):
        #checks if sample works

        mu = torch.Tensor(10, 10).uniform_(-1, 1)
        rho = torch.Tensor(10, 10).uniform_(-1, 1)

        dist = TrainableRandomDistribution(mu, rho)
        s1 = dist.sample()
        s2 = dist.sample()

        self.assertEqual((s1 != s2).any(), torch.tensor(True))
        self.assertEqual(mu.shape, s1.shape)
        self.assertEqual(rho.shape, s1.shape)
        pass
    def test_scale_mixture_prior(self):
        mu = torch.Tensor(10, 10).uniform_(-1, 1)
        rho = torch.Tensor(10, 10).uniform_(-1, 1)

        dist = TrainableRandomDistribution(mu, rho)
        s1 = dist.sample()

        log_posterior = dist.log_posterior()

        prior_dist = PriorWeightDistribution(0.5, 1, .002)
        log_prior = prior_dist.log_prior(s1)

        #print(log_prior)
        #print(log_posterior)
        self.assertEqual(log_prior == log_prior, torch.tensor(True))
        self.assertEqual(log_posterior <= log_posterior - log_prior,
                         torch.tensor(True))
        pass
    def __init__(self,
                 in_features,
                 out_features,
                 bias=True,
                 prior_sigma_1=0.1,
                 prior_sigma_2=0.002,
                 prior_pi=1,
                 posterior_mu_init=0,
                 posterior_rho_init=-6.0,
                 freeze=False,
                 prior_dist=None,
                 **kwargs):

        super().__init__(**kwargs)
        self.in_features = in_features
        self.out_features = out_features
        self.use_bias = bias
        self.freeze = freeze

        self.posterior_mu_init = posterior_mu_init
        self.posterior_rho_init = posterior_rho_init

        self.prior_sigma_1 = prior_sigma_1
        self.prior_sigma_2 = prior_sigma_2
        self.prior_pi = prior_pi
        self.prior_dist = prior_dist

        # Variational weight parameters and sample for weight ih
        self.weight_ih_mu = nn.Parameter(
            torch.Tensor(in_features,
                         out_features * 4).normal_(posterior_mu_init, 0.1))
        self.weight_ih_rho = nn.Parameter(
            torch.Tensor(in_features,
                         out_features * 4).normal_(posterior_rho_init, 0.1))
        self.weight_ih_sampler = TrainableRandomDistribution(
            self.weight_ih_mu, self.weight_ih_rho)
        self.weight_ih = None

        # Variational weight parameters and sample for weight hh
        self.weight_hh_mu = nn.Parameter(
            torch.Tensor(out_features,
                         out_features * 4).normal_(posterior_mu_init, 0.1))
        self.weight_hh_rho = nn.Parameter(
            torch.Tensor(out_features,
                         out_features * 4).normal_(posterior_rho_init, 0.1))
        self.weight_hh_sampler = TrainableRandomDistribution(
            self.weight_hh_mu, self.weight_hh_rho)
        self.weight_hh = None

        # Variational weight parameters and sample for bias
        self.bias_mu = nn.Parameter(
            torch.Tensor(out_features * 4).normal_(posterior_mu_init, 0.1))
        self.bias_rho = nn.Parameter(
            torch.Tensor(out_features * 4).normal_(posterior_rho_init, 0.1))
        self.bias_sampler = TrainableRandomDistribution(
            self.bias_mu, self.bias_rho)
        self.bias = None

        #our prior distributions
        self.weight_ih_prior_dist = PriorWeightDistribution(
            self.prior_pi,
            self.prior_sigma_1,
            self.prior_sigma_2,
            dist=self.prior_dist)
        self.weight_hh_prior_dist = PriorWeightDistribution(
            self.prior_pi,
            self.prior_sigma_1,
            self.prior_sigma_2,
            dist=self.prior_dist)
        self.bias_prior_dist = PriorWeightDistribution(self.prior_pi,
                                                       self.prior_sigma_1,
                                                       self.prior_sigma_2,
                                                       dist=self.prior_dist)

        self.init_sharpen_parameters()

        self.log_prior = 0
        self.log_variational_posterior = 0