def test_bernoulli_underflow_gradient(init_tensor_type): p = Variable(init_tensor_type([0]), requires_grad=True) bernoulli = Bernoulli(sigmoid(p) * 0.0) log_pdf = bernoulli.batch_log_pdf(Variable(init_tensor_type([0]))) log_pdf.sum().backward() assert_equal(log_pdf.data[0], 0) assert_equal(p.grad.data[0], 0)
def test_bernoulli_with_logits_overflow_gradient(init_tensor_type): p = Variable(init_tensor_type([1e40]), requires_grad=True) bernoulli = Bernoulli(logits=p) log_pdf = bernoulli.batch_log_pdf(Variable(init_tensor_type([1]))) log_pdf.sum().backward() assert_equal(log_pdf.data[0], 0) assert_equal(p.grad.data[0], 0)
class MixtureDistribution(Distribution): def __init__(self, mix1, mix2, p=None): self.mix1 = mix1 self.mix2 = mix2 self.p = Bernoulli(p) def log_prob(self, x): lp1 = self.mix1.log_prob(x) lp2 = self.mix2.log_prob(x) p1 = self.p.mean * torch.exp(lp1) p2 = (1 - self.p.mean) * torch.exp(lp2) p = torch.log(p1 + p2) pj = torch.log(self.p.mean) + lp1 + torch.log(1 - self.p.mean) + lp2 mask = torch.isfinite(p) p[~mask] = pj[~mask] return p def sample(self, n_samples=None): if n_samples is None: p = self.p.sample() return p * self.mix1.sample() + (1 - p) * self.mix2.sample() else: p = self.p.sample(n_samples) return p * self.mix1.sample(n_samples) + ( 1 - p) * self.mix2.sample(n_samples)
def model(): p_latent = pyro.param("p1", torch.tensor([[0.7], [0.3]])) p_obs = pyro.param("p2", torch.tensor([[0.9], [0.1]])) latents = [torch.ones(1, 1)] observes = [] for t in range(self.model_steps): latents.append( pyro.sample( "latent_{}".format(str(t)), Bernoulli( torch.index_select(p_latent, 0, latents[-1].view(-1).long()) ), ) ) observes.append( pyro.sample( "observe_{}".format(str(t)), Bernoulli( torch.index_select(p_obs, 0, latents[-1].view(-1).long()) ), obs=self.data[t], ) ) return torch.sum(torch.cat(latents))
def model(): pyro.sample("x", Bernoulli(0.5)) for i in range(depth): pyro.sample("a_{}".format(i), Bernoulli(0.5), infer={"enumerate": "parallel"}) pyro.sample("y", y_dist, infer={"enumerate": "parallel"}) for i in range(depth): pyro.sample("b_{}".format(i), Bernoulli(0.5), infer={"enumerate": "parallel"})
def __init__(self, src, trg, pad_index=0, word_drop=0.0, unk_indx=0, use_cuda=False): src, src_lengths = src self.src = src self.src_lengths = src_lengths self.src_mask = (src != pad_index).unsqueeze(-2) self.nseqs = src.size(0) self.trg = None self.trg_y = None self.trg_mask = None self.trg_lengths = None self.ntokens = None if trg is not None: trg, trg_lengths = trg self.trg = trg[:, :-1] #word drop out approach proposed in bowman et. al 2016 mask = trg.new_zeros(self.trg.size(0), self.trg.size(1)).float().fill_(word_drop) mask = Bernoulli(mask).sample().byte() try: mask = mask.bool() except AttributeError as e: #just means your using an older pytorch version... _ = 0 self.trg.masked_fill_(mask, unk_indx) self.trg_lengths = trg_lengths self.trg_y = trg[:, 1:] self.trg_mask = (self.trg_y != pad_index) self.ntokens = (self.trg_y != pad_index).data.sum().item() if use_cuda: self.src = self.src.cuda() self.src_mask = self.src_mask.cuda() if trg is not None: self.trg = self.trg.cuda() self.trg_y = self.trg_y.cuda() self.trg_mask = self.trg_mask.cuda() else: self.src = self.src.cpu() self.src_mask = self.src_mask.cpu() self.src_lengths = self.src_lengths.cpu() if trg is not None: self.trg = self.trg.cpu() self.trg_y = self.trg_y.cpu() self.trg_mask = self.trg_mask.cpu() self.trg_lengths = self.trg_lengths.cpu()
def model(fc_network: BNN, x_data, y_data): # create prior for weight and bias per layer, p(w) [q(z) // p(w)] priors = {} for i, layer in enumerate(fc_network.fc): if not hasattr(layer, 'weight'): continue # print("model: ",i,layer) priors["model.{}.weight".format(str(i))] = \ Normal(Variable(torch.zeros_like(layer.weight)), Variable(torch.ones_like(layer.weight))) priors["model.{}.bias".format(str(i))] = \ Normal(Variable(torch.zeros_like(layer.bias)), Variable(torch.ones_like(layer.bias))) # print('model: ',priors) # exit(0) # print('model_shapes',layer.weight.shape, layer.bias.shape) # lift module parameters to random variables sampled from the priors --> Sample a NN from the priors! lifted_module = pyro.random_module("module", fc_network, priors) # sample a regressor (which also samples w and b) lifted_reg_model = lifted_module() with pyro.plate("map", len(x_train), subsample=data): x_data = data[:, :-1] y_data = data[:, -1] # run the regressor forward conditioned on inputs prediction_mean = lifted_reg_model(x_data).squeeze() pyro.sample("obs", Bernoulli(prediction_mean), obs=y_data.squeeze())
def location(preference): """ Flips a weighted coin to decide between two locations to meet. In this example, we assume that Alice and Bob share a prior preference for one location over another, reflected in the value of preference below. """ return pyro.sample("loc", Bernoulli(preference))
def inspect_posterior_samples(i): cll = local_guide(i, None) mean_param = Variable(torch.zeros(1, 784), requires_grad=True) # do MLE for class means mu = pyro.param("mean_of_class_" + str(cll[0]), mean_param) dat = pyro.sample("obs_" + str(i), Bernoulli(mu)) return dat
def model(): p_latent = pyro.sample("p_latent", Beta(self.alpha0, self.beta0)) x_dist = Bernoulli(p_latent) pyro.map_data(self.data, lambda i, x: pyro.observe("obs", x_dist, x), batch_size=2) return p_latent
def model_sample(cll=None): # wrap params for use in model -- required # decoder = pyro.module("decoder", pt_decode) # sample from prior z_mu, z_sigma = Variable(torch.zeros( [1, 20])), Variable(torch.ones([1, 20])) # sample z = pyro.sample("latent", DiagNormal(z_mu, z_sigma)) alpha = Variable(torch.ones([1, 10]) / 10.) if cll.data.cpu().numpy() is None: bb() cll = pyro.sample('class', Categorical(alpha)) print('sampling class') # decode into size of imgx1 for mu img_mu = pt_decode.forward(z, cll) # bb() # img=Bernoulli(img_mu).sample() # score against actual images img = pyro.sample("sample", Bernoulli(img_mu)) # return img return img, img_mu
def f_x(z, params): """ Samples from P(X|Z) P(X|Z) is a Bernoulli with E(X|Z) = logistic(Z * W), where W is a parameter (matrix). In training the W is hyperparameters of the W distribution are estimated such that in P(X|Z), the elements of the vector of X are conditionally independent of one another given Z. """ def sample_W(): """ Sample the W matrix W is a parameter of P(X|Z) that is sampled from a Normal with location and scale hyperparameters w_mean0 and w_std0 """ w_mean0 = params['w_mean0'] w_std0 = params['w_std0'] W = pyro.sample("W", Normal(loc=w_mean0, scale=w_std0)) return W W = sample_W() linear_exp = torch.matmul(z, W) # if mask: # linear_exp = torch.matmul(torch.matmul(z, W), params['mask']) # sample x using the Bernoulli likelihood x = pyro.sample("x", Bernoulli(logits=linear_exp)) x = torch.mul(x, params['mask']) return x
def model_given_c(data, cll): decoder_c = pyro.module("decoder_c", pt_decode_c) decoder_z = pyro.module("decoder_z", pt_decode_z) z_mu, z_sigma = decoder_c.forward(cll) z = pyro.sample("latent_z", DiagNormal(z_mu, z_sigma)) img_mu = decoder_z.forward(z) pyro.observe("obs", Bernoulli(img_mu), data.view(-1, 784))
def model(corpus): global counter dhWeights = pyro.sample("dhWeights", dhWeights_Prior) #Variable(torch.FloatTensor([0.0] * len(itos_deps)), requires_grad=True) distanceWeights = pyro.sample("distanceWeights", distanceWeights_Prior) #Variable(torch.FloatTensor([0.0] * len(itos_deps)), requires_grad=True) for q in pyro.irange("data_loop", corpus.length(), subsample_size=5, use_cuda=False): point = corpus.getSentence(q) current = [point] counter += 1 printHere = (counter % 100 == 0) batchOrderedLogits = zip(*map(lambda (y,x):orderSentence(x, dhLogits, y % batchSize==0 and printHere, dhWeights, distanceWeights), zip(range(len(current)),current))) batchOrdered = batchOrderedLogits[0] lengths = map(len, current) maxLength = lengths[int(0.8*batchSize)] assert batchSize == 1 if printHere: print "BACKWARD 3 "+__file__+" "+language+" "+str(myID)+" "+str(counter) logitCorr = batchOrdered[0][-1]["relevant_logprob_sum"] pyro.sample("result_Correct_{}".format(q), Bernoulli(logits=logitCorr), obs=Variable(torch.FloatTensor([1.0])))
def model(self, data): decoder = pyro.module('decoder', self.vae_decoder) z_mean, z_std = ng_zeros([data.size(0), 20]), ng_ones([data.size(0), 20]) z = pyro.sample('latent', Normal(z_mean, z_std)) img = decoder.forward(z) pyro.sample('obs', Bernoulli(img), obs=data.view(-1, 784))
def model(): prior = MultivariateNormal(torch.zeros(m),torch.Tensor(K)) fs = pyro.sample("fs",prior) likelihood = Bernoulli(probs = (fs > 0).float()) # softprobs = torch.sigmoid(fs) # likelihood = Bernoulli(probs = softprobs) ys = pyro.sample("ys",likelihood) return ys
def model_xz(data, foo): decoder_xz = pyro.module("decoder_xz", pt_decode_xz) z_mu, z_sigma = Variable(torch.zeros([data.size(0), 20])), Variable( torch.ones([data.size(0), 20])) z = pyro.sample("latent", DiagNormal(z_mu, z_sigma)) img_mu = decoder_xz.forward(z) pyro.observe("obs", Bernoulli(img_mu), data.view(-1, 784)) return z
def local_model(i, datum): beta = Variable(torch.ones(1, 10)) * 0.1 cll = pyro.sample("class_of_datum_" + str(i), Categorical(beta)) mean_param = Variable(torch.zeros(1, 784), requires_grad=True) # do MLE for class means mu = pyro.param("mean_of_class_" + str(cll[0]), mean_param) pyro.observe("obs_" + str(i), Bernoulli(mu), datum) return cll
def model(self, x): pyro.module("decoder", self.decoder) with pyro.plate("data", x.shape[0]): z_loc = x.new_zeros(torch.Size((x.shape[0], self.params.z_dim))) z_scale = x.new_ones(torch.Size((x.shape[0], self.params.z_dim))) z = pyro.sample("latent", Normal(z_loc, z_scale).to_event(1)) res = self.decoder(z) pyro.sample("obs", Bernoulli(res).to_event(1), obs=x)
def sub_model(datum): mu_latent = Variable(torch.ones(nr_samples, dim_z)) * 0.5 sigma_latent = Variable(torch.ones(mu_latent.size())) z = pyro.sample("embedding_of_datum_" + str(i), DiagNormal(mu_latent, sigma_latent)) mean_beta = z.mm(weight) beta = sigmoid(mean_beta) pyro.observe("obs_" + str(i), Bernoulli(beta), datum)
def model(): p = torch.tensor([0.5]) loc = torch.zeros(1) scale = torch.ones(1) x = pyro.sample("x", Normal(loc, scale)) y = pyro.sample("y", Bernoulli(p)) z = pyro.sample("z", Normal(loc, scale)) return dict(x=x, y=y, z=z)
def model(): p = torch.tensor([0.5]) loc = torch.zeros(1) scale = torch.ones(1) x = pyro.sample("x", Normal(loc, scale)) # Before the discrete variable. y = pyro.sample("y", Bernoulli(p)) z = pyro.sample("z", Normal(loc, scale)) # After the discrete variable. return dict(x=x, y=y, z=z)
def classify(data): z = guide_latent(data, None) img_mu = pt_decode_xz.forward(z) alpha_mu = pt_decode_c.forward(z) img = pyro.sample("sample_img", Bernoulli(img_mu)) cll = pyro.sample("sample_cll", Categorical(alpha_mu)) return img, img_mu, cll
def model(): p = Variable(torch.Tensor([0.5])) mu = Variable(torch.zeros(1)) sigma = Variable(torch.ones(1)) x = pyro.sample("x", Normal(mu, sigma)) # Before the discrete variable. y = pyro.sample("y", Bernoulli(p)) z = pyro.sample("z", Normal(mu, sigma)) # After the discrete variable. return dict(x=x, y=y, z=z)
def model(self, data): decoder = pyro.module('decoder', self.vae_decoder) z_mean, z_std = torch.zeros([data.size(0), 20]), torch.ones([data.size(0), 20]) with pyro.plate('data', data.size(0)): z = pyro.sample('latent', Normal(z_mean, z_std).to_event(1)) img = decoder.forward(z) pyro.sample('obs', Bernoulli(img).to_event(1), obs=data.reshape(-1, 784))
def model_latent(data): decoder_c = pyro.module("decoder_c", pt_decode_c) decoder_z = pyro.module("decoder_z", pt_decode_z) alpha = Variable(torch.ones([data.size(0), 10])) / 10. cll = pyro.sample('latent_class', Categorical(alpha)) z_mu, z_sigma = decoder_c.forward(cll) z = pyro.sample("latent_z", DiagNormal(z_mu, z_sigma)) img_mu = decoder_z.forward(z) pyro.observe("obs", Bernoulli(img_mu), data.view(-1, 784))
def local_model(i, datum): beta = Variable(torch.ones(1)) * 0.5 c = pyro.sample("class_of_datum_" + str(i), Bernoulli(beta)) mean_param = Variable(torch.zeros(784), requires_grad=True) # do MLE for class means m = pyro.param("mean_of_class_" + str(c[0]), mean_param) sigma = Variable(torch.ones(m.size())) pyro.observe("obs_" + str(i), DiagNormal(m, sigma), datum) return c
def getWordEmbeddingsWithWordDropout(self, embeddings, indexes, pad_mask): #word drop out approach proposed in bowman et. al 2016 if len(pad_mask.size()) > 2: pad_mask = pad_mask.squeeze() indexes = indexes.clone( ) #clone tensor just in case...don't want any mutation mask = torch.zeros_like(indexes).float().fill_(self.word_drop) mask = Bernoulli(mask).sample().byte() mask = mask * pad_mask.byte( ) # don't word drop things passed sentence length mask[0, :] = 0 #do not mask out sos token try: mask = mask.bool() except: #do nothing i = 0 indexes.masked_fill_(mask, self.unk_tok_indx) return embeddings(indexes)
def model(self, img, label): pyro.module("decoder", self.decoder) options = {'device': img.device, 'dtype': img.dtype} with pyro.plate("data", img.shape[0]): z_mean = torch.zeros(img.shape[0], self.latent_dim, **options) z_variance = torch.ones(img.shape[0], self.latent_dim, **options) z_sample = pyro.sample("latent", Normal(z_mean, z_variance).to_event(1)) image = self.decoder.forward(z_sample, self.label_variable(label)) pyro.sample("obs", Bernoulli(image).to_event(1), obs=img)
def survives(self, t, λ, μ, ρ): t_end = t - Exponential(μ).sample() if t_end <= 0: if Bernoulli(ρ).sample(): return True t_end = 0 for i in range(int(Poisson(λ * (t - t_end)).sample())): τ = Uniform(t_end, t).sample() if self.survives(τ, λ, μ, ρ): return True return False