def _setup_prototype(self, *args, **kwargs): super()._setup_prototype(*args, **kwargs) self._event_dims = {} self.locs = PyroModule() self.scales = PyroModule() # Initialize guide params for name, site in self.prototype_trace.iter_stochastic_nodes(): # Collect unconstrained event_dims, which may differ from constrained event_dims. with helpful_support_errors(site): init_loc = (biject_to(site["fn"].support).inv( site["value"].detach()).detach()) event_dim = site["fn"].event_dim + init_loc.dim( ) - site["value"].dim() self._event_dims[name] = event_dim # If subsampling, repeat init_value to full size. for frame in site["cond_indep_stack"]: full_size = getattr(frame, "full_size", frame.size) if full_size != frame.size: dim = frame.dim - event_dim init_loc = periodic_repeat(init_loc, full_size, dim).contiguous() init_scale = torch.full_like(init_loc, self._init_scale) deep_setattr(self.locs, name, PyroParam(init_loc, constraints.real, event_dim)) deep_setattr( self.scales, name, PyroParam(init_scale, self.scale_constraint, event_dim), )
def __init__(self, in_features, out_features): super().__init__(in_features, out_features) self.loc = PyroParam(torch.zeros_like(self.weight)) self.scale = PyroParam(torch.ones_like(self.weight), constraint=constraints.positive) self.weight = PyroSample( lambda self: dist.Normal(self.loc, self.scale).to_event(2))
def __init__(self, X, y, kernel, Xu, noise=None, mean_function=None, approx=None, jitter=1e-6): super(SparseGPRegression, self).__init__(X, y, kernel, mean_function, jitter) self.Xu = Parameter(Xu) noise = self.X.new_tensor(1.) if noise is None else noise self.noise = PyroParam(noise, constraints.positive) if approx is None: self.approx = "VFE" elif approx in ["DTC", "FITC", "VFE"]: self.approx = approx else: raise ValueError( "The sparse approximation method should be one of " "'DTC', 'FITC', 'VFE'.")
def autoguide(self, name, dist_constructor): """ Sets an autoguide for an existing parameter with name ``name`` (mimic the behavior of module :mod:`pyro.infer.autoguide`). .. note:: `dist_constructor` should be one of :class:`~pyro.distributions.Delta`, :class:`~pyro.distributions.Normal`, and :class:`~pyro.distributions.MultivariateNormal`. More distribution constructor will be supported in the future if needed. :param str name: Name of the parameter. :param dist_constructor: A :class:`~pyro.distributions.distribution.Distribution` constructor. """ if name not in self._priors: raise ValueError( "There is no prior for parameter: {}".format(name)) if dist_constructor not in [ dist.Delta, dist.Normal, dist.MultivariateNormal ]: raise NotImplementedError( "Unsupported distribution type: {}".format(dist_constructor)) # delete old guide if name in self._guides: dist_args = self._guides[name][1] for arg in dist_args: delattr(self, "{}_{}".format(name, arg)) p = self._priors[name]() # init_to_sample strategy if dist_constructor is dist.Delta: support = self._priors[name].support if _is_real_support(support): p_map = Parameter(p.detach()) else: p_map = PyroParam(p.detach(), support) setattr(self, "{}_map".format(name), p_map) dist_args = ("map", ) elif dist_constructor is dist.Normal: loc = Parameter( biject_to(self._priors[name].support).inv(p).detach()) scale = PyroParam(loc.new_ones(loc.shape), constraints.positive) setattr(self, "{}_loc".format(name), loc) setattr(self, "{}_scale".format(name), scale) dist_args = ("loc", "scale") elif dist_constructor is dist.MultivariateNormal: loc = Parameter( biject_to(self._priors[name].support).inv(p).detach()) identity = eye_like(loc, loc.size(-1)) scale_tril = PyroParam(identity.repeat(loc.shape[:-1] + (1, 1)), constraints.lower_cholesky) setattr(self, "{}_loc".format(name), loc) setattr(self, "{}_scale_tril".format(name), scale_tril) dist_args = ("loc", "scale_tril") else: raise NotImplementedError self._guides[name] = (dist_constructor, dist_args)
def _get_params(self, name: str, prior: Distribution): try: loc = deep_getattr(self.locs, name) scale = deep_getattr(self.scales, name) return loc, scale except AttributeError: pass # Initialize. with torch.no_grad(): transform = biject_to(prior.support) event_dim = transform.domain.event_dim constrained = self.init_loc_fn({ "name": name, "fn": prior }).detach() unconstrained = transform.inv(constrained) # Initialize the distribution to be an affine combination: # init_scale * prior + (1 - init_scale) * init_loc init_loc = self._adjust_plates(unconstrained, event_dim) init_loc = init_loc * (1 - self._init_scale) init_scale = torch.full_like(init_loc, self._init_scale) deep_setattr(self, "locs." + name, PyroParam(init_loc, event_dim=event_dim)) deep_setattr( self, "scales." + name, PyroParam(init_scale, constraint=constraints.positive, event_dim=event_dim), ) return self._get_params(name, prior)
def _setup_prototype(self, *args, **kwargs): super()._setup_prototype(*args, **kwargs) # Initialize guide params self.loc = nn.Parameter(self._init_loc()) self.scale = PyroParam(torch.full_like(self.loc, self._init_scale), self.scale_constraint) self.scale_tril = PyroParam(eye_like(self.loc, self.latent_dim), self.scale_tril_constraint)
def __init__(self, size): super().__init__() self.x = PyroParam(torch.zeros(size)) self.y = PyroParam(lambda: torch.randn(size)) self.z = PyroParam( torch.ones(size), constraint=constraints.positive, event_dim=1 ) self.s = PyroSample(dist.Normal(0, 1)) self.t = PyroSample(lambda self: dist.Normal(self.s, self.z))
def __init__(self): super().__init__() self.x = nn.Parameter(torch.tensor(0.)) self.y = PyroParam(torch.tensor(1.), constraint=constraints.positive) self.m = nn.Module() self.m.u = nn.Parameter(torch.tensor(2.0)) self.p = PyroModule() self.p.v = nn.Parameter(torch.tensor(3.)) self.p.w = PyroParam(torch.tensor(4.), constraint=constraints.positive)
def __init__(self, X, y, kernel, noise=None, mean_function=None, jitter=1e-6): super(GPRegression, self).__init__(X, y, kernel, mean_function, jitter) noise = self.X.new_tensor(1.) if noise is None else noise self.noise = PyroParam(noise, constraints.positive)
def __init__(self, input_dim, variance=None, lengthscale=None, active_dims=None): super(Isotropy, self).__init__(input_dim, active_dims) variance = torch.tensor(1.) if variance is None else variance self.variance = PyroParam(variance, constraints.positive) lengthscale = torch.tensor(1.) if lengthscale is None else lengthscale self.lengthscale = PyroParam(lengthscale, constraints.positive)
def __init__(self): super().__init__() self.loc = nn.Parameter(torch.zeros(2)) self.scale = PyroParam(torch.ones(2), constraint=constraints.positive) self.z = PyroSample( lambda self: dist.Normal(self.loc, self.scale).to_event(1))
def __init__(self, input_dim, variance=None, lengthscale=None, period=None, active_dims=None): super(Periodic, self).__init__(input_dim, active_dims) variance = torch.tensor(1.) if variance is None else variance self.variance = PyroParam(variance, constraints.positive) lengthscale = torch.tensor(1.) if lengthscale is None else lengthscale self.lengthscale = PyroParam(lengthscale, constraints.positive) period = torch.tensor(1.) if period is None else period self.period = PyroParam(period, constraints.positive)
def __init__(self, X, y, kernel, likelihood, mean_function=None, latent_shape=None, whiten=False, jitter=1e-6, use_cuda=False): super().__init__(X, y, kernel, mean_function, jitter) self.likelihood = likelihood y_batch_shape = self.y.shape[:-1] if self.y is not None else torch.Size( []) self.latent_shape = latent_shape if latent_shape is not None else y_batch_shape N = self.X.size(0) f_loc = self.X.new_zeros(self.latent_shape + (N, )) self.f_loc = Parameter(f_loc) identity = eye_like(self.X, N) f_scale_tril = identity.repeat(self.latent_shape + (1, 1)) self.f_scale_tril = PyroParam(f_scale_tril, constraints.lower_cholesky) self.whiten = whiten self._sample_latent = True if use_cuda: self.cuda()
def __init__(self, X, y, kernel, Xu, likelihood, mean_function=None, latent_shape=None, num_data=None, whiten=False, jitter=1e-6): super(VariationalSparseGP, self).__init__(X, y, kernel, mean_function, jitter) self.likelihood = likelihood self.Xu = Parameter(Xu) y_batch_shape = self.y.shape[:-1] if self.y is not None else torch.Size( []) self.latent_shape = latent_shape if latent_shape is not None else y_batch_shape M = self.Xu.size(0) u_loc = self.Xu.new_zeros(self.latent_shape + (M, )) self.u_loc = Parameter(u_loc) identity = eye_like(self.Xu, M) u_scale_tril = identity.repeat(self.latent_shape + (1, 1)) self.u_scale_tril = PyroParam(u_scale_tril, constraints.lower_cholesky) self.num_data = num_data if num_data is not None else self.X.size(0) self.whiten = whiten self._sample_latent = True
def __init__(self, input_dim, rank=None, components=None, diagonal=None, active_dims=None): super().__init__(input_dim, active_dims) # Add a low-rank kernel with expected value torch.eye(input_dim, input_dim) / 2. if components is None: rank = input_dim if rank is None else rank components = torch.randn(input_dim, rank) * (0.5 / rank)**0.5 else: rank = components.size(-1) if components.shape != (input_dim, rank): raise ValueError( "Expected components.shape == ({},rank), actual {}".format( input_dim, components.shape)) self.components = Parameter(components) # Add a diagonal component initialized to torch.eye(input_dim, input_dim) / 2, # such that the total kernel has expected value the identity matrix. diagonal = (components.new_ones(input_dim) * 0.5 if diagonal is None else diagonal) if diagonal.shape != (input_dim, ): raise ValueError( "Expected diagonal.shape == ({},), actual {}".format( input_dim, diagonal.shape)) self.diagonal = PyroParam(diagonal, constraints.positive)
def __init__(self, input_dim, variance=None, active_dims=None): if input_dim != 1: raise ValueError("Input dimensional for Brownian kernel must be 1.") super().__init__(input_dim, active_dims) variance = torch.tensor(1.) if variance is None else variance self.variance = PyroParam(variance, constraints.positive)
def map_estimate(self, name): """ Construct a maximum a posteriori (MAP) guide using Delta distributions. :param str name: The name of a model sample site. :return: A sampled value. :rtype: torch.Tensor """ site = self.prototype_trace.nodes[name] fn = site["fn"] event_dim = fn.event_dim init_needed = not hasattr(self, name) if init_needed: init_value = site["value"].detach() with ExitStack() as stack: for frame in site["cond_indep_stack"]: plate = self.plate(frame.name) if plate not in runtime._PYRO_STACK: stack.enter_context(plate) elif init_needed and plate.subsample_size < plate.size: # Repeat the init_value to full size. dim = plate.dim - event_dim assert init_value.size(dim) == plate.subsample_size ind = torch.arange(plate.size, device=init_value.device) ind = ind % plate.subsample_size init_value = init_value.index_select(dim, ind) if init_needed: setattr(self, name, PyroParam(init_value, fn.support, event_dim)) value = getattr(self, name) return pyro.sample(name, dist.Delta(value, event_dim=event_dim))
class AutoMultivariateNormal(AutoContinuous): """ This implementation of :class:`AutoContinuous` uses a Cholesky factorization of a Multivariate Normal distribution to construct a guide over the entire latent space. The guide does not depend on the model's ``*args, **kwargs``. Usage:: guide = AutoMultivariateNormal(model) svi = SVI(model, guide, ...) By default the mean vector is initialized by ``init_loc_fn()`` and the Cholesky factor is initialized to the identity times a small factor. :param callable model: A generative model. :param callable init_loc_fn: A per-site initialization function. See :ref:`autoguide-initialization` section for available functions. :param float init_scale: Initial scale for the standard deviation of each (unconstrained transformed) latent variable. """ scale_constraint = constraints.softplus_positive scale_tril_constraint = constraints.unit_lower_cholesky def __init__(self, model, init_loc_fn=init_to_median, init_scale=0.1): if not isinstance(init_scale, float) or not (init_scale > 0): raise ValueError( "Expected init_scale > 0. but got {}".format(init_scale)) self._init_scale = init_scale super().__init__(model, init_loc_fn=init_loc_fn) def _setup_prototype(self, *args, **kwargs): super()._setup_prototype(*args, **kwargs) # Initialize guide params self.loc = nn.Parameter(self._init_loc()) self.scale = PyroParam(torch.full_like(self.loc, self._init_scale), self.scale_constraint) self.scale_tril = PyroParam(eye_like(self.loc, self.latent_dim), self.scale_tril_constraint) def get_base_dist(self): return dist.Normal(torch.zeros_like(self.loc), torch.ones_like(self.loc)).to_event(1) def get_transform(self, *args, **kwargs): scale_tril = self.scale[..., None] * self.scale_tril return dist.transforms.LowerCholeskyAffine(self.loc, scale_tril=scale_tril) def get_posterior(self, *args, **kwargs): """ Returns a MultivariateNormal posterior distribution. """ scale_tril = self.scale[..., None] * self.scale_tril return dist.MultivariateNormal(self.loc, scale_tril=scale_tril) def _loc_scale(self, *args, **kwargs): return self.loc, self.scale * self.scale_tril.diag()
def _setup_prototype(self, *args, **kwargs): super()._setup_prototype(*args, **kwargs) # Initialize guide params self.loc = nn.Parameter(self._init_loc()) self.scale = PyroParam( self.loc.new_full((self.latent_dim, ), self._init_scale), self.scale_constraint, )
def __init__(self, X, y, kernel, noise=None, mean_function=None, jitter=1e-6): assert isinstance( X, torch.Tensor ), "X needs to be a torch Tensor instead of a {}".format(type(X)) if y is not None: assert isinstance( y, torch.Tensor ), "y needs to be a torch Tensor instead of a {}".format(type(y)) super().__init__(X, y, kernel, mean_function, jitter) noise = self.X.new_tensor(1.0) if noise is None else noise self.noise = PyroParam(noise, constraints.positive)
def __init__(self, input_dim, variance=None, bias=None, degree=1, active_dims=None): super().__init__(input_dim, variance, active_dims) bias = torch.tensor(1.) if bias is None else bias self.bias = PyroParam(bias, constraints.positive) if not isinstance(degree, int) or degree < 1: raise ValueError("Degree for Polynomial kernel should be a positive integer.") self.degree = degree
class Constant(Kernel): r""" Implementation of Constant kernel: :math:`k(x, z) = \sigma^2.` """ def __init__(self, input_dim, variance=None, active_dims=None): super(Constant, self).__init__(input_dim, active_dims) variance = torch.tensor(1.) if variance is None else variance self.variance = PyroParam(variance, constraints.positive) def forward(self, X, Z=None, diag=False): if diag: return self.variance.expand(X.size(0)) if Z is None: Z = X return self.variance.expand(X.size(0), Z.size(0))
def apply_(self, net): """"Replaces all nn.Parameter attributes on a given PyroModule net according to the hide/expose logic and the classes' prior_dist method.""" for module_name, module in net.named_modules(): for param_name, param in list(module.named_parameters(recurse=False)): full_name = module_name + "." + param_name if self.expose_fn(module, full_name): prior_dist = self.prior_dist(full_name, module, param).expand(param.shape).to_event(param.dim()) setattr(module, param_name, PyroSample(prior_dist)) else: setattr(module, param_name, PyroParam(param.data.detach()))
def __init__(self): super(Linear, self).__init__() self._pyro_name = "Linear" self.a = PyroParam(torch.tensor(1.), constraints.positive) self.b = PyroSample(dist.Normal(0, 1)) self.c = PyroSample(dist.Normal(0, 1)) self.d = PyroSample(dist.Normal(0, 4).expand([1]).to_event()) self.e = PyroSample(dist.LogNormal(0, 1)) self.f = PyroSample( dist.MultivariateNormal(torch.zeros(2), torch.eye(2))) self.g = PyroSample(dist.Exponential(1))
class Isotropy(Kernel): """ Base class for a family of isotropic covariance kernels which are functions of the distance :math:`|x-z|/l`, where :math:`l` is the length-scale parameter. By default, the parameter ``lengthscale`` has size 1. To use the isotropic version (different lengthscale for each dimension), make sure that ``lengthscale`` has size equal to ``input_dim``. :param torch.Tensor lengthscale: Length-scale parameter of this kernel. """ def __init__(self, input_dim, variance=None, lengthscale=None, active_dims=None): super(Isotropy, self).__init__(input_dim, active_dims) variance = torch.tensor(1.) if variance is None else variance self.variance = PyroParam(variance, constraints.positive) lengthscale = torch.tensor(1.) if lengthscale is None else lengthscale self.lengthscale = PyroParam(lengthscale, constraints.positive) def _square_scaled_dist(self, X, Z=None): r""" Returns :math:`\|\frac{X-Z}{l}\|^2`. """ if Z is None: Z = X X = self._slice_input(X) Z = self._slice_input(Z) if X.size(1) != Z.size(1): raise ValueError("Inputs must have the same number of features.") scaled_X = X / self.lengthscale scaled_Z = Z / self.lengthscale X2 = (scaled_X**2).sum(1, keepdim=True) Z2 = (scaled_Z**2).sum(1, keepdim=True) XZ = scaled_X.matmul(scaled_Z.t()) r2 = X2 - 2 * XZ + Z2.t() return r2.clamp(min=0) def _scaled_dist(self, X, Z=None): r""" Returns :math:`\|\frac{X-Z}{l}\|`. """ return _torch_sqrt(self._square_scaled_dist(X, Z)) def _diag(self, X): """ Calculates the diagonal part of covariance matrix on active features. """ return self.variance.expand(X.size(0))
def __init__(self): super().__init__() self.a = torch.nn.Parameter(torch.zeros(2)) self.register_buffer("b", torch.zeros(3)) self.c = torch.randn(4) # this wouldn't work with torch.nn.Module.to() self.d = dist.Normal(0, 1) self.e = PyroParam( torch.randn(()), constraint=constraints.greater_than(torch.tensor(0.5)), ) self.f = PyroSample(dist.Normal(0, 1)) self.g = PyroSample(lambda self: dist.Normal(self.f, 1))
def __init__(self, input_dim, variance=None, lengthscale=None, scale_mixture=None, active_dims=None): super(RationalQuadratic, self).__init__(input_dim, variance, lengthscale, active_dims) if scale_mixture is None: scale_mixture = torch.tensor(1.) self.scale_mixture = PyroParam(scale_mixture, constraints.positive)
class WhiteNoise(Kernel): r""" Implementation of WhiteNoise kernel: :math:`k(x, z) = \sigma^2 \delta(x, z),` where :math:`\delta` is a Dirac delta function. """ def __init__(self, input_dim, variance=None, active_dims=None): super(WhiteNoise, self).__init__(input_dim, active_dims) variance = torch.tensor(1.) if variance is None else variance self.variance = PyroParam(variance, constraints.positive) def forward(self, X, Z=None, diag=False): if diag: return self.variance.expand(X.size(0)) if Z is None: return self.variance.expand(X.size(0)).diag() else: return X.data.new_zeros(X.size(0), Z.size(0))
def _setup_prototype(self, *args, **kwargs): super()._setup_prototype(*args, **kwargs) # Initialize guide params self.loc = nn.Parameter(self._init_loc()) if self.rank is None: self.rank = int(round(self.latent_dim**0.5)) self.scale = PyroParam( self.loc.new_full((self.latent_dim, ), 0.5**0.5 * self._init_scale), constraint=self.scale_constraint, ) self.cov_factor = nn.Parameter( self.loc.new_empty(self.latent_dim, self.rank).normal_(0, 1 / self.rank**0.5))
def test_cache(): class MyModule(PyroModule): def forward(self): return [self.gather(), self.gather()] def gather(self): return { "a": self.a, "b": self.b, "c": self.c, "p.d": self.p.d, "p.e": self.p.e, "p.f": self.p.f, } module = MyModule() module.a = nn.Parameter(torch.tensor(0.)) module.b = PyroParam(torch.tensor(1.), constraint=constraints.positive) module.c = PyroSample(dist.Normal(0, 1)) module.p = PyroModule() module.p.d = nn.Parameter(torch.tensor(3.)) module.p.e = PyroParam(torch.tensor(4.), constraint=constraints.positive) module.p.f = PyroSample(dist.Normal(0, 1)) assert module._pyro_context is module.p._pyro_context # Check that results are cached with an invocation of .__call__(). result1 = module() actual, expected = result1 for key in ["a", "c", "p.d", "p.f"]: assert actual[key] is expected[key], key # Check that results are not cached across invocations of .__call__(). result2 = module() for key in ["b", "c", "p.e", "p.f"]: assert result1[0] is not result2[0], key