def sample(self, X, compute_logprob=False): dist = Normal(F.linear(X, self.W), self.noise * torch.eye(self.K), learnable=False) z = dist.sample(1).squeeze(0) if compute_logprob: return z, dist.log_prob(z) return z
class ProbabilisticPCA(Distribution): has_latents = True def __init__(self, D, K=2, noise=1., tau=None): super().__init__() self.D = D self.K = K self.W = Parameter(torch.Tensor(D, K).float()) self.noise = torch.tensor(noise) self.latent = Normal(torch.zeros(K), torch.ones(K), learnable=False) self.tau = tau self.prior = None if tau: self.prior = Normal(torch.zeros(K), torch.full((K, ), tau), learnable=False) self.reset_parameters() def reset_parameters(self): init.kaiming_uniform_(self.W, a=math.sqrt(5)) def prior_probability(self, z): if self.prior is None: return 0. return self.prior.log_prob(z) def log_prob(self, X, z): dist = Normal(F.linear(z, self.W), torch.full((z.size(0), self.D), self.noise), learnable=False) return dist.log_prob(X) + self.prior_probability(z) def sample(self, z=None, batch_size=1): if z is None: if self.prior is None: raise ValueError( 'PPCA has no prior distribution to sample latents from, please set tau in init' ) z = self.prior.sample(batch_size) dist = Normal(F.linear(z, self.W), torch.full((z.size(0), self.D), self.noise), learnable=False) return dist.sample(1).squeeze(0) def fit(self, X, variational_dist=None, elbo_kwargs={}, **kwargs): if variational_dist is None: variational_dist = PPCA_Variational_V2(self) data = Data(X) stats = train(data, self, ELBO(variational_dist, **elbo_kwargs), **kwargs) return stats def transform(self, X): return X.mm(self.W)
def test_mcmc_2d(): true = MixtureModel([ Normal([5.2, 5.2], [[3.0, 0.0], [0.0, 3.0]]), Normal([0.0, 0.0], [[2.0, 0.0], [0.0, 2.0]]), Normal([-5.2, -5.2], [[1.5, 0.0], [0.0, 1.5]]) ], [0.25, 0.5, 0.25]) samples = metropolis(true, epochs=100, burn_in=10) samples = metropolis(true, epochs=100, burn_in=10, keep_every=5) samples = metropolis(true, epochs=10, burn_in=1, keep_every=5, init=None)
def sample(self, z=None, batch_size=1): if z is None: if self.prior is None: raise ValueError( 'PPCA has no prior distribution to sample latents from, please set tau in init' ) z = self.prior.sample(batch_size) dist = Normal(F.linear(z, self.W), torch.full((z.size(0), self.D), self.noise), learnable=False) return dist.sample(1).squeeze(0)
def test_gmm_clustering(): model = MixtureModel([ Normal([3.3, 3.3], [2.3, 0.1, 0.1, 7.]), Normal([-5.3, -6.3], [7, 4.2, 3.1, 3]) ], [0.75, 0.25]) X = model.sample(100).detach() m = GaussianMixtureModel(n_dims=2) m.fit(X, epochs=100, track_parameters=False) assert m.sample(5).shape == (5, 2) assert m.log_prob(m.sample(5)).shape == (5, ) assert m.predict(X).shape == (100, ) model.num_parameters
def test_gans(model): X = MixtureModel( [Normal(-4., 2.3, learnable=False), Normal(4., 2.3, learnable=False)], [0.5, 0.5]).sample(10000) X = X.numpy() stats = model.fit(X, epochs=5, lr=1e-4) preds = model.sample(10000) model.predict(model.sample(100)) model.num_parameters try: model.log_prob(model.sample(100)) except NotImplementedError: pass
def __init__(self, N, M, D=5, tau=None): super().__init__() self.N = N self.M = M self.D = D # latent self.U = Parameter(torch.Tensor(D, N).float()) self.V = Parameter(torch.Tensor(D, M).float()) if tau is None: self.prior = None else: self.prior = Normal(0., tau, learnable=False) self.reset_parameters()
def test_emd_distribution(): p_model = Normal(0., 1.) q_model = Normal(-4., 3.) emd_primal, gamma_primal = emd(p_model, q_model, batch_size=1024, n_bins=20) emd_dual, (f, g) = emd(p_model, q_model, dual=True, batch_size=1024, n_bins=20) assert gamma_primal.sum() - 1. < 1e-2
def __init__(self, D, K=2, noise=1., tau=None): super().__init__() self.D = D self.K = K self.W = Parameter(torch.Tensor(D, K).float()) self.noise = torch.tensor(noise) self.latent = Normal(torch.zeros(K), torch.ones(K), learnable=False) self.tau = tau self.prior = None if tau: self.prior = Normal(torch.zeros(K), torch.full((K, ), tau), learnable=False) self.reset_parameters()
def log_prob(self, R): if not isinstance(R, torch.Tensor): R = torch.tensor(R) R = R.view(-1, self.N * self.M).float() mean = self.reconstruct().view(-1) return Normal(mean, torch.ones_like(mean), learnable=False).log_prob(R) + self.prior_penalty()
def test_forward(gan): if gan.n_dims == 1: q_model = MixtureModel([Normal([-0.5],[[1.0]]), Normal([0.5],[[1.0]])], [0.5, 0.5]) p_model = MixtureModel([Normal([2.3], [[2.2]]), Normal([-2.3], [[2.2]])], [0.5, 0.5]) else: q_model = MixtureModel([Normal([0., 0.], [1., 0., 0., 1.0]), Normal([0., 0.], [1., 0., 0., 1.0])], [0.25, 0.75]) p_model = MixtureModel([Normal([0., 0.], [1., 0., 0., 1.0]), Normal([0., 0.], [1., 0., 0., 1.0])], [0.25, 0.75]) gan(p_model, q_model)
def __init__(self, n_components=2, n_dims=1): super().__init__() self.n_components = n_components self.n_dims = n_dims self.model = MixtureModel([ Normal(torch.randn(n_dims), torch.eye(n_dims)) for _ in range(n_components) ], [1.0 / n_components for _ in range(n_components)])
def __init__(self, input_dim=1, output_shape=1, tau=1.): super().__init__(input_dim, output_shape, distribution=partial(Normal, scale=torch.ones( 1, output_shape), learnable=False), prior=Normal(0., tau, learnable=False))
class PMF(Distribution): def __init__(self, N, M, D=5, tau=None): super().__init__() self.N = N self.M = M self.D = D # latent self.U = Parameter(torch.Tensor(D, N).float()) self.V = Parameter(torch.Tensor(D, M).float()) if tau is None: self.prior = None else: self.prior = Normal(0., tau, learnable=False) self.reset_parameters() def reset_parameters(self): init.kaiming_uniform_(self.U, a=math.sqrt(5)) init.kaiming_uniform_(self.V, a=math.sqrt(5)) def prior_penalty(self): if not self.prior: return 0. return self.prior.log_prob( torch.cat([p.view(-1) for p in self.parameters()]).view(-1, 1)).sum() def reconstruct(self): return self.U.t().mm(self.V) def log_prob(self, R): if not isinstance(R, torch.Tensor): R = torch.tensor(R) R = R.view(-1, self.N * self.M).float() mean = self.reconstruct().view(-1) return Normal(mean, torch.ones_like(mean), learnable=False).log_prob(R) + self.prior_penalty() def sample(self, batch_size, noise_std=1.0): return self.reconstruct().expand( (batch_size, self.N, self.M)) + noise_std * torch.randn( (batch_size, self.N, self.M)) def fit(self, R, **kwargs): data = Data(R.view(-1, self.N * self.M)) stats = train(data, self, cross_entropy, **kwargs) return stats def mse(self, R): if not isinstance(R, torch.Tensor): R = torch.tensor(R) return (self.reconstruct() - R.float()).pow(2).mean() def mae(self, R): if not isinstance(R, torch.Tensor): R = torch.tensor(R) return (self.reconstruct() - R.float()).abs().mean()
def test_gan_train(gan): if gan.n_dims == 1: q_model = MixtureModel([Normal([-0.5],[[1.0]]), Normal([0.5],[[1.0]])], [0.5, 0.5]) p_model = MixtureModel([Normal([2.3], [[2.2]]), Normal([-2.3], [[2.2]])], [0.5, 0.5]) else: q_model = MixtureModel([Normal([0., 0.], [1., 0., 0., 1.0]), Normal([0., 0.], [1., 0., 0., 1.0])], [0.25, 0.75]) p_model = MixtureModel([Normal([0., 0.], [1., 0., 0., 1.0]), Normal([0., 0.], [1., 0., 0., 1.0])], [0.25, 0.75]) train(p_model, q_model, gan, optimizer="RMSprop", epochs=3, lr=1e-3, batch_size=512) X = p_model.sample(100) gan.classify(X)
def test_normal_lognormal(): model = LogNormal(0.0, 1.0) transform = TransformDistribution(Normal(0.0, 1.0), [Exp()]) x = model.sample(4) assert torch.all(transform.log_prob(x)- model.log_prob(x) < 1e-5) x = transform.sample(4) assert torch.all(transform.log_prob(x)- model.log_prob(x) < 1e-5) transform.get_parameters()
def __init__(self, encoder_args={}, decoder_args={}, prior=None, elbo_kwargs={}): super().__init__() preset_encoder_args = { 'input_dim': 1, 'hidden_sizes': [24, 24], 'activation': 'ReLU', 'output_shapes': [1, 1], 'output_activations': [None, 'Softplus'], 'distribution': partial(Normal, learnable=False) } preset_decoder_args = { 'input_dim': 1, 'hidden_sizes': [24, 24], 'activation': 'ReLU', 'output_shapes': [1], 'output_activations': [Sigmoid()], 'distribution': partial(Bernoulli, learnable=False) } preset_encoder_args.update(encoder_args) preset_decoder_args.update(decoder_args) self.encoder = ConditionalModel(**preset_encoder_args) self.decoder = ConditionalModel(**preset_decoder_args) self.criterion = ELBO(self.encoder, **elbo_kwargs) self.prior = prior if prior is None: latent_dim = preset_decoder_args['input_dim'] self.prior = Normal(torch.zeros(latent_dim), torch.ones(latent_dim), learnable=False)
def test_normal_affine(): model = Normal(1.0, 4.0) transform = TransformDistribution(Normal(0.0, 1.0), [Affine(1.0, 2.0)]) x = model.sample(4) assert torch.all(transform.log_prob(x)- model.log_prob(x) < 1e-5) x = transform.sample(4) assert torch.all(transform.log_prob(x)- model.log_prob(x) < 1e-5) transform.get_parameters()
def plot_loss_function(loss, q_ref=Normal, p_model=Normal(), n_plot=100, batch_size=64): xlist = np.linspace(-15., 15.0, n_plot) ylist = np.linspace(1e-1, 50.0, n_plot) X, Y = np.meshgrid(xlist, ylist) Z = np.array( [[loss(p_model, q_ref(x, y), batch_size).item() for x in xlist] for y in ylist]) Z = np.log1p(Z - Z.min()) cp = plt.contourf(X, Y, Z, levels=np.linspace(Z.min(), Z.max(), 100), cmap='RdGy') plt.title('Log Loss') plt.colorbar() plt.xlabel(r'$\mu$') plt.ylabel(r'$\sigma$')
def __init__(self, n_components=2, n_dims=1, variational_kwargs={}, elbo_kwargs={}): super().__init__() self.n_components = n_components self.n_dims = n_dims self.normals = ModuleList([ Normal(torch.randn(n_dims), torch.eye(n_dims)) for _ in range(n_components) ]) variational_kwargs.update({ 'input_dim': n_dims, 'output_shapes': [n_components] }) self.variational_kwargs = variational_kwargs self.elbo_kwargs = elbo_kwargs self.categorical = VariationalCategorical(variational_kwargs) self.criterion = ELBO(self.categorical, **elbo_kwargs) self.prior = Categorical( [1.0 / n_components for _ in range(n_components)], learnable=False)
def log_prob(self, X, z): dist = Normal(F.linear(z, self.W), torch.full((z.size(0), self.D), self.noise), learnable=False) return dist.log_prob(X) + self.prior_probability(z)
from dpm.distributions import ( Arcsine, AsymmetricLaplace, Bernoulli, Beta, Categorical, Cauchy, ChiSquare, Convolution, Data, DiracDelta, Dirichlet, Exponential, FisherSnedecor, Gamma, Generator, GumbelSoftmax, Gumbel, HalfCauchy, HalfNormal, HyperbolicSecant, Langevin, Laplace, LogLaplace, LogCauchy, LogNormal, Logistic, LogitNormal, Normal, Rayleigh, RelaxedBernoulli, StudentT, Uniform, Distribution) from dpm.distributions import (MixtureModel, GumbelMixtureModel) import torch.distributions as dist import numpy as np import torch import pytest test_normal_dists = [ (Normal(0., 1.), 1), (Normal([0.], [1.]), 1), (Normal([0.], [[1.]]), 1), (Normal([0., 0.], [1., 1.]), 2), (Normal([0., 0.], [[1., 0.], [0., 1.]]), 2), (Normal([0., 0.], [1., 0., 0., 1.]), 2), ] @pytest.mark.parametrize("dist,n_dims", test_normal_dists) def test_normal_dist(dist, n_dims): assert dist.sample(1).shape == (1, n_dims) assert dist.log_prob(dist.sample(1)).shape == (1, ) samples = dist.sample(64) assert samples.shape == (64, n_dims)
def test_normal_errors(): model = Normal() assert model._diag_type == 'diag' model._diag_type = 'FAKE' try: model.log_prob(None) except NotImplementedError: pass try: model.sample(4) except NotImplementedError: pass try: model.entropy() except NotImplementedError: pass try: model.scale except NotImplementedError: pass model = Normal([0., 0.], [3., 1.0, 1., 3.]) assert model._diag_type == 'cholesky' try: model.cdf(5.) except NotImplementedError: pass try: model.icdf(5.) except NotImplementedError: pass
from dpm.distributions import (Normal, Exponential, GumbelSoftmax, Cauchy, Beta, LogNormal, Gamma, RelaxedBernoulli, Uniform, StudentT, Dirichlet, FisherSnedecor, HalfCauchy, HalfNormal, Laplace, Logistic, ChiSquare) from dpm.distributions import MixtureModel, GumbelMixtureModel from dpm.monte_carlo import metropolis from dpm.train import train from dpm.criterion import (forward_kl, reverse_kl, js_divergence, cross_entropy) import numpy as np import matplotlib.pyplot as plt import pytest models = [ (Normal(0., 1.), Normal(0., 1.)), (Exponential(0.5), Exponential(0.5)), (Cauchy(0., 1.), Cauchy(0., 1.)), (Beta(0.5, 1.), Beta(0.5, 1.)), (LogNormal(0., 1.), LogNormal(0., 1.)), (Gamma(0.5, 1.), Gamma(0.5, 1.)), (Uniform(-1.0, 3.0), Uniform(-1.0, 3.0)), (StudentT(30.0, 1.0, 3.0), StudentT(30.0, 1.0, 3.0)), (FisherSnedecor(10.0, 10.0), FisherSnedecor(10.0, 10.0)), (HalfCauchy(1.0), HalfCauchy(1.0)), (HalfNormal(1.0), HalfNormal(1.0)), (Laplace(0., 1.), Laplace(0., 1.)), (MixtureModel([Normal(0., 1.), Normal(1., 3.)], [0.25, 0.75]), MixtureModel([Normal(0., 1.), Normal(1., 3.)], [0.25, 0.75])), (GumbelMixtureModel([Normal(0., 1.), Normal(1., 3.)], [0.25, 0.75]), GumbelMixtureModel([Normal(0., 1.), Normal(1., 3.)], [0.25, 0.75])),
def __init__(self, n_classes=2, n_features=10): super().__init__(Categorical(probs=[1.0/n_classes for _ in range(n_classes)]), [Normal(loc=torch.randn(n_features), scale=torch.ones(n_features)) for _ in range(n_classes)]) self.n_classes = n_classes self.n_features = n_features
def __init__(self, input_dim=1, output_shape=1, tau=1.): super().__init__(input_dim=input_dim, output_shapes=output_shape, output_activations=Sigmoid(), distribution=partial(Bernoulli, learnable=False), prior=Normal(0., tau, learnable=False))
def create_dist(self, class_num): return Normal(self.x_means[class_num], self.covariance, learnable=False)
def test_mcmc_2d(): true = MixtureModel([ Normal([5.2, 5.2], [[3.0, 0.0], [0.0, 3.0]]), Normal([0.0, 0.0], [[2.0, 0.0], [0.0, 2.0]]), Normal([-5.2, -5.2], [[1.5, 0.0], [0.0, 1.5]]) ], [0.25, 0.5, 0.25]) samples = metropolis(true, epochs=100, burn_in=10) samples = metropolis(true, epochs=100, burn_in=10, keep_every=5) samples = metropolis(true, epochs=10, burn_in=1, keep_every=5, init=None) test_dists = [ (Normal(0., 1.), 1), (Exponential(0.5), 1), (Cauchy(0., 1.), 1), (Beta(0.5, 1.), 1), (LogNormal(0., 1.), 1), (Gamma(0.5, 1.), 1), (RelaxedBernoulli([0.5]), 1), (Uniform(-1., 3.), 1), (StudentT(30., 1., 3.), 1), (Dirichlet(0.5), 1), (FisherSnedecor(10., 10.), 1), (HalfCauchy(1.), 1), (HalfNormal(1.), 1), (Laplace(0., 1.), 1), (MixtureModel([Normal(0., 1.), Normal(1., 3.)], [0.25, 0.75]), 1), (GumbelMixtureModel([Normal(0., 1.), Normal(1., 3.)], [0.25, 0.75]), 1),
class VAE(Distribution): has_latents = True def __init__(self, encoder_args={}, decoder_args={}, prior=None, elbo_kwargs={}): super().__init__() preset_encoder_args = { 'input_dim': 1, 'hidden_sizes': [24, 24], 'activation': 'ReLU', 'output_shapes': [1, 1], 'output_activations': [None, 'Softplus'], 'distribution': partial(Normal, learnable=False) } preset_decoder_args = { 'input_dim': 1, 'hidden_sizes': [24, 24], 'activation': 'ReLU', 'output_shapes': [1], 'output_activations': [Sigmoid()], 'distribution': partial(Bernoulli, learnable=False) } preset_encoder_args.update(encoder_args) preset_decoder_args.update(decoder_args) self.encoder = ConditionalModel(**preset_encoder_args) self.decoder = ConditionalModel(**preset_decoder_args) self.criterion = ELBO(self.encoder, **elbo_kwargs) self.prior = prior if prior is None: latent_dim = preset_decoder_args['input_dim'] self.prior = Normal(torch.zeros(latent_dim), torch.ones(latent_dim), learnable=False) def log_prob(self, X, Z=None): # latent given if Z is not None: return self.decoder.log_prob(X, Z) + self.prior.log_prob(Z) Z, encoder_probs = self.encoder.sample(X, compute_logprob=True) prior_probs = self.prior.log_prob(Z) decoder_log_probs = self.decoder.log_prob(X, Z) return decoder_log_probs + prior_probs - encoder_probs def sample(self, batch_size, compute_logprob=False): Z = self.prior.sample(batch_size) return self.decoder.sample(Z, compute_logprob) def fit(self, x, use_elbo=True, **kwargs): data = Data(x) if use_elbo: return train(data, self, self.criterion, **kwargs) return train(data, self, cross_entropy, **kwargs) def parameters(self): for name, param in self.named_parameters(recurse=True): if 'encoder' in name: continue yield param
from dpm.distributions import LogNormal, Normal, TransformDistribution, Logistic import dpm.distributions as dpmdist from dpm.transforms import * import dpm import torch.autograd as autograd import torch from torch.distributions import Uniform from torch.distributions.transforms import SigmoidTransform, AffineTransform from torch.distributions.transformed_distribution import TransformedDistribution from dpm.utils import integrate import pytest transforms_dist_list = [ (TransformDistribution(Normal(0., 1.), [Exp()]), 1), (TransformDistribution(Normal([0., 0.], [1., 1.]), [Exp()]), 2), (TransformDistribution(Normal(0.0, 1.0), [Affine(1.0, 2.0)]), 1), ] @pytest.mark.parametrize("transform,n_dims", transforms_dist_list) def test_transform_dist(transform, n_dims): assert transform.sample(1).shape == (1, n_dims) assert transform.log_prob(transform.sample(1)).shape == (1, ) samples = transform.sample(64) assert samples.shape == (64, n_dims) log_probs = transform.log_prob(samples) assert log_probs.shape == (64, )