def __init__(self, K, L, n, N, D, device): """ Args: K number of cluster L number of factors n number of cells N number of individuals D number of genes W N x Ncells x D q_A: 2 x D x L q_gamma: 2x N x L q_alpha: 2 x N x Ncells (roughly) q_beta: 2 x N x Ncells (roughly) <-- inverse gamma q_mu: 2 x K x D q_sigma: 2 x K x D <-- inverse gamma q_pi: N x K q_z: N x Ncells x K <-- phi_ijk """ super(MFMM, self).__init__() self.name = "mfmm" self.K = K # number of cluster self.L = L # number of factors self.N = N # number of individuals self.n = n #number of cells per individual self.N_tot = n * N self.D = D self.epsilon = .001 self.device = device ''' q_A = torch.zeros(2, self.D, self.L) q_gamma = torch.zeros(2,self.N, self.L) q_alpha = torch.zeros(2, self.N, self.n) q_beta = torch.ones(2, self.N, self.n) q_mu = torch.zeros(2, self.K, self.D) q_sigma = torch.ones(2,self.K, self.D) q_pi = torch.ones(self.N, self.K) phi = torch.ones(self.N, self.n, self.K) ''' q_A = ds.Normal(0., 1.).sample([2, self.D, self.L]) q_gamma = ds.Normal(0., 1.).sample([2, self.N, self.L]) q_alpha = ds.Normal(0., 1.).sample([2, self.N, self.n]) q_beta = ds.Exponential(4.).sample([2, self.N, self.n]) q_mu = ds.Normal(0., 1.).sample([2, self.K, self.D]) q_sigma = ds.Exponential(4.).sample([2, self.K, self.D]) q_pi = ds.Normal(0., 1.).sample([self.N, self.K]) phi = ds.Normal(0., 1.).sample([self.N, self.n, self.K]) self.q_A = nn.Parameter(q_A) self.q_gamma = nn.Parameter(q_gamma) self.q_alpha = nn.Parameter(q_alpha) self.q_beta = nn.Parameter(q_beta) self.q_mu = nn.Parameter(q_mu) self.q_sigma = nn.Parameter(q_sigma) self.q_pi = nn.Parameter(q_pi) self.phi = nn.Parameter(phi)
def additive_exponential(alpha1, alpha2, h): """ Samples from additive exponential process h is the number of layers alpha is the rate of the exponential distribution """ g1 = dist.Exponential(torch.tensor([alpha1])).sample([1]).squeeze(0) g2 = dist.Exponential(torch.tensor([alpha2])).sample([h - 1]).squeeze() taus = torch.cat((g1, g1 + torch.cumsum(g2, dim=0)), 0) ## return taus
def __init__(self, K, V, M, M_val, p, alpha_fixed, device): """ Args: K: # topics, V: Vocab size, M: # docs, M_val: # val docs, p : switch prior alpha_fixed: if true fix alpha devcie: specify cpu or gpu """ super(pfsLDA, self).__init__() self.name = "pfslda" self.K = K self.V = V self.M = M self.M_val = M_val self.epsilon = 0.0000001 self.alpha_fixed = alpha_fixed self.device = device # model parameters alpha = (torch.ones(self.K).to(device) if self.alpha_fixed else ds.Exponential(1).sample([self.K])) # beta stored pre-softmax (over V) beta = ds.Exponential(1).sample([self.K, self.V]) beta = beta / beta.sum(dim=1, keepdim=True) # pi stored pre-softmax (over V) pi = ds.Exponential(1).sample([self.V]) pi = pi / pi.sum() eta = ds.Normal(0, 1).sample([self.K]) # delta stored pre-exponentiated delta = ds.Normal(0, 1).sample().abs() # variational parameters gamma = torch.ones(self.M, self.K) gamma_val = torch.ones(self.M_val, self.K) # phi stored pre softmax (over K) phi = torch.ones(self.M, self.K, self.V) phi_val = torch.ones(self.M_val, self.K, self.V) # varphi stored pre-sigmoid varphi = torch.ones(self.V) * p self.alpha = alpha if self.alpha_fixed else nn.Parameter(alpha) self.beta = nn.Parameter(beta) self.gamma = nn.Parameter(gamma) self.phi = nn.Parameter(phi) self.eta = nn.Parameter(eta) self.delta = nn.Parameter(delta) self.pi = nn.Parameter(pi) self.varphi = nn.Parameter(varphi) self.phi_val = nn.Parameter(phi_val) self.gamma_val = nn.Parameter(gamma_val) self.p = p
def loss_normal2d_exponential(model_output, device, beta): # unpack the required quantities x_true = model_output["x_input"].permute(1, 0, 2) params = model_output["params"] rate = torch.exp(params) # sigma = torch.exp(logvar / 2) z_mu = model_output["z_mu"] z_log_var = model_output["z_log_var"] seq_length = rate.shape[0] # iterate over each time step in the sequence to compute NLL and KL terms t = 0 # define the distribution p = distributions.Exponential(rate[t, :, :]) log_prob = torch.sum(p.log_prob(x_true[t + 1, :, :]), dim=-1) # dimensions [batch_size, dimension] ones_vector = torch.ones((z_mu.shape[0], z_mu.shape[2])).to(device) # KL-divergence negative_kl = 0.5 * torch.sum( ones_vector + z_log_var[t, :, :] - z_mu[t, :, :]**2 - torch.exp(z_log_var[t, :, :]), dim=-1) # KL divergence through time kl_tt = -negative_kl for t in range(1, seq_length - 1): # define the distribution # p = distributions.Normal(mu[:, t, :], sigma[:, t, :]) # p = distributions.LogNormal(mu[t, :, :], sigma[t, :, :]) p = distributions.Exponential(rate[t, :, :]) log_prob += torch.sum(p.log_prob(x_true[t + 1, :, :]), dim=-1) # KL-divergence negative_kl += 0.5 * torch.sum(ones_vector + z_log_var[t, :, :] - z_mu[t, :, :]**2 - torch.exp(z_log_var[t, :, :])) kl_tt -= negative_kl NLL, KL = -torch.mean(log_prob, dim=-1) / (seq_length - 1), torch.mean( kl_tt, dim=0) / (seq_length - 1) ELBO = NLL + beta * KL return {"loss": ELBO, "ELBO": ELBO, "NLL": NLL, "KL": KL}
def __init__(self, dataset_size=50000, pX=None): self.input_size = 2 self.label_size = 1 self.dataset_size = dataset_size self.base_dist = GaussianMixDistribution(pX=pX) self.X = self.base_dist.rsample(torch.Size([self.dataset_size])) self.weights_dist = D.Exponential(torch.tensor([1.0]))
def get_robust_regression(device: torch.device) -> GetterReturnType: N = 10 K = 10 # X.shape: (N, K + 1), Y.shape: (N, 1) X = torch.rand(N, K + 1, device=device) Y = torch.rand(N, 1, device=device) # Predefined nu_alpha and nu_beta, nu_alpha.shape: (1, 1), nu_beta.shape: (1, 1) nu_alpha = torch.rand(1, 1, device=device) nu_beta = torch.rand(1, 1, device=device) nu = dist.Gamma(nu_alpha, nu_beta) # Predefined sigma_rate: sigma_rate.shape: (N, 1) sigma_rate = torch.rand(N, 1, device=device) sigma = dist.Exponential(sigma_rate) # Predefined beta_mean and beta_sigma: beta_mean.shape: (K + 1, 1), beta_sigma.shape: (K + 1, 1) beta_mean = torch.rand(K + 1, 1, device=device) beta_sigma = torch.rand(K + 1, 1, device=device) beta = dist.Normal(beta_mean, beta_sigma) nu_value = nu.sample() nu_value.requires_grad_(True) sigma_value = sigma.sample() sigma_unconstrained_value = sigma_value.log() sigma_unconstrained_value.requires_grad_(True) beta_value = beta.sample() beta_value.requires_grad_(True) def forward(nu_value: Tensor, sigma_unconstrained_value: Tensor, beta_value: Tensor) -> Tensor: sigma_constrained_value = sigma_unconstrained_value.exp() mu = X.mm(beta_value) # For this model, we need to compute the following three scores: # We need to compute the first and second gradient of this score with respect # to nu_value. nu_score = dist.StudentT(nu_value, mu, sigma_constrained_value).log_prob(Y).sum() \ + nu.log_prob(nu_value) # We need to compute the first and second gradient of this score with respect # to sigma_unconstrained_value. sigma_score = dist.StudentT(nu_value, mu, sigma_constrained_value).log_prob(Y).sum() \ + sigma.log_prob(sigma_constrained_value) \ + sigma_unconstrained_value # We need to compute the first and second gradient of this score with respect # to beta_value. beta_score = dist.StudentT(nu_value, mu, sigma_constrained_value).log_prob(Y).sum() \ + beta.log_prob(beta_value) return nu_score.sum() + sigma_score.sum() + beta_score.sum() return forward, (nu_value.to(device), sigma_unconstrained_value.to(device), beta_value.to(device))
def __init__(self, dataset_size=50000, is_test=False): self.input_size = 2 self.label_size = 1 self.dataset_size = dataset_size self.base_dist = SynthDistribution(is_test=is_test) self.X, self.Y = self.base_dist.rsample(torch.Size([self.dataset_size ])) self.X = self.X.view(self.X.shape[0], -1) self.weights_dist = D.Exponential(torch.tensor([1.0]))
def __init__(self, ndim, input_feat_dim, hidden_dim1, hidden_dim2, dropout, encsto='semi', gdc='ip', ndist='Bernoulli', copyK=1, copyJ=1, device='cuda'): super(GCNModelSIGVAE, self).__init__() self.gce = GraphConvolution(ndim, hidden_dim1, dropout, act=F.relu) # self.gc0 = GraphConvolution(input_feat_dim, hidden_dim1, dropout, act=F.relu) self.gc1 = GraphConvolution(input_feat_dim, hidden_dim1, dropout, act=F.relu) self.gc2 = GraphConvolution(hidden_dim1, hidden_dim2, dropout, act=lambda x: x) self.gc3 = GraphConvolution(hidden_dim1, hidden_dim2, dropout, act=lambda x: x) self.encsto = encsto self.dc = GraphDecoder(hidden_dim2, dropout, gdc=gdc) self.device = device if ndist == 'Bernoulli': self.ndist = tdist.Bernoulli(torch.tensor([.5], device=self.device)) elif ndist == 'Normal': self.ndist == tdist.Normal(torch.tensor([0.], device=self.device), torch.tensor([1.], device=self.device)) elif ndist == 'Exponential': self.ndist = tdist.Exponential( torch.tensor([1.], device=self.device)) # K and J are defined in http://proceedings.mlr.press/v80/yin18b/yin18b-supp.pdf # Algorthm 1. self.K = copyK self.J = copyJ self.ndim = ndim # parameters in network gc1 and gce are NOT identically distributed, so we need to reweight the output # of gce() so that the effect of hiddenx + hiddene is equivalent to gc(x || e). self.reweight = ((self.ndim + hidden_dim1) / (input_feat_dim + hidden_dim1))**(.5)
def sigma(self) -> dist.Distribution: return dist.Exponential(1 / self.sigma_mean)
import torch import numpy as np import torch.nn as nn import torch.nn.functional as F import torch.optim as optim import torch.distributions as dist import matplotlib.pyplot as plt from src.params_to_flat import params_to_flat p = 2 n = 10 od = 1 dist_w = dist.Laplace(0, 0.1) dist_o = dist.Exponential(1) # number of samples N = 10**3 # create parameters xi = nn.Parameter(torch.zeros(n, p), requires_grad=True) old_ig = None # network vmarg_l_1 = nn.Linear(p * n, 64) vmarg_l_l2 = nn.Linear(64, 64) vmarg_l_mu = nn.Linear(64, od) vmarg_l_s = nn.Linear(64, int(od * (od + 1) / 2)) vmarg_nn_params = list(vmarg_l_1.parameters()) + list( vmarg_l_l2.parameters()) + list(vmarg_l_mu.parameters()) + list( vmarg_l_s.parameters())
def __init__(self, a): dist = dists.Exponential(a[0]) super().__init__(dist, "exponential", 2, torch.tensor(0.), 1 / a[0])