def test_HSStep_NegativeBinomial_sparse(): np.random.seed(2032) M = 5 N = 50 X = np.random.normal(size=N * M).reshape((N, M)) beta_true = np.array([1, 1, 2, 2, 0]) y_nb = pm.NegativeBinomial.dist(np.exp(X.dot(beta_true)), 1).random() X = sp.sparse.csr_matrix(X) N_draws = 500 with pm.Model(): beta = HorseShoe("beta", tau=1, shape=M) pm.NegativeBinomial("y", mu=at.exp(sp_dot(X, at.shape_padright(beta))), alpha=1, observed=y_nb) hsstep = HSStep([beta]) trace = pm.sample( draws=N_draws, step=hsstep, chains=1, return_inferencedata=True, compute_convergence_checks=False, ) beta_samples = trace.posterior["beta"][0].values assert beta_samples.shape == (N_draws, M) np.testing.assert_allclose(beta_samples.mean(0), beta_true, atol=0.5)
def test_HSStep_sparse(): np.random.seed(2032) M = 5 N = 50 X = np.random.normal(size=N * M).reshape((N, M)) beta_true = np.random.normal(10, size=M) y = np.random.normal(X.dot(beta_true), 1) X = sp.sparse.csr_matrix(X) M = X.shape[1] with pm.Model(): beta = HorseShoe("beta", tau=1, shape=M) pm.Normal("y", mu=sp_dot(X, at.shape_padright(beta)), sigma=1, observed=y) hsstep = HSStep([beta]) trace = pm.sample( draws=50, tune=0, step=hsstep, chains=1, return_inferencedata=True, compute_convergence_checks=False, ) beta_samples = trace.posterior["beta"][0].values assert beta_samples.shape == (50, M) np.testing.assert_allclose(beta_samples.mean(0), beta_true, atol=0.3)
def pad_dims(input, leftdims, rightdims): """Reshapes the input to a (leftdims + rightdims) tensor This helper function is used to convert pooling inputs with arbitrary non-pooling dimensions to the correct number of dimensions for the GPU pooling ops. This reduces or expands the number of dimensions of the input to exactly `leftdims`, by adding extra dimensions on the left or by combining some existing dimensions on the left of the input. Use `unpad_dims` to reshape back to the original dimensions. Examples -------- Given input of shape (3, 5, 7), ``pad_dims(input, 2, 2)`` adds a singleton dimension and reshapes to (1, 3, 5, 7). Given that output from pad_dims, ``unpad_dims(output, input, 2, 2)`` reshapes back to (3, 5, 7). Given input of shape (3, 5, 7, 9), ``pad_dims(input, 2, 2)`` does not reshape and returns output with shape (3, 5, 7, 9). Given input of shape (3, 5, 7, 9, 11), ``pad_dims(input, 2, 2)`` combines the first two dimensions and reshapes to (15, 7, 9, 11). Given input of shape (3, 5, 7, 9), ``pad_dims(input, 2, 3)`` adds a singleton dimension and reshapes to (1, 3, 5, 7, 9). """ assert input.ndim >= rightdims if input.ndim == (leftdims + rightdims): return input # extract image dimensions img_shape = input.shape[-rightdims:] non_pool_ndim = input.ndim - rightdims if non_pool_ndim < leftdims: # too few dimensions, pad on the left dummy_dims = tensor.as_tensor([1] * (leftdims - non_pool_ndim)) new_shape = tensor.join(0, dummy_dims, input.shape[:non_pool_ndim], img_shape) else: # too many dimensions, combine the leading dimensions batched_ndim = non_pool_ndim - leftdims + 1 batch_size = tensor.prod(input.shape[:batched_ndim]) # convert to a vector for tensor.join batch_size = tensor.shape_padright(batch_size, 1) new_shape = tensor.join( 0, batch_size, input.shape[batched_ndim:non_pool_ndim], img_shape ) # store in the required shape new_shape = tensor.cast(new_shape, "int64") input_ND = GpuReshape(leftdims + rightdims)(input, new_shape) return input_ND
def logp(self, states): r"""Create a Theano graph that computes the log-likelihood for a discrete Markov chain. This is the log-likelihood for the joint distribution of states, :math:`S_t`, conditional on state samples, :math:`s_t`, given by the following: .. math:: \int_{S_0} P(S_1 = s_1 \mid S_0) dP(S_0) \prod^{T}_{t=2} P(S_t = s_t \mid S_{t-1} = s_{t-1}) The first term (i.e. the integral) simply computes the marginal :math:`P(S_1 = s_1)`, so another way to express this result is as follows: .. math:: P(S_1 = s_1) \prod^{T}_{t=2} P(S_t = s_t \mid S_{t-1} = s_{t-1}) """ # noqa: E501 states_tt = at.as_tensor(states) if states.ndim > 1 or self.Gammas.ndim > 3 or self.gamma_0.ndim > 1: raise NotImplementedError("Broadcasting not supported.") Gammas_tt = at_broadcast_to(self.Gammas, (states.shape[0], ) + tuple(self.Gammas.shape)[-2:]) gamma_0_tt = self.gamma_0 Gamma_1_tt = Gammas_tt[0] P_S_1_tt = at.dot(gamma_0_tt, Gamma_1_tt)[states_tt[0]] # def S_logp_fn(S_tm1, S_t, Gamma): # return at.log(Gamma[..., S_tm1, S_t]) # # P_S_2T_tt, _ = aesara.scan( # S_logp_fn, # sequences=[ # { # "input": states_tt, # "taps": [-1, 0], # }, # Gammas_tt, # ], # ) P_S_2T_tt = Gammas_tt[at.arange(1, states.shape[0]), states[:-1], states[1:]] log_P_S_1T_tt = at.concatenate( [at.shape_padright(at.log(P_S_1_tt)), at.log(P_S_2T_tt)]) res = log_P_S_1T_tt.sum() res.name = "states_logp" return res
def dist( cls, rho, sigma=None, tau=None, *, init_dist=None, steps=None, constant=False, ar_order=None, **kwargs, ): _, sigma = get_tau_sigma(tau=tau, sigma=sigma) sigma = at.as_tensor_variable(floatX(sigma)) rhos = at.atleast_1d(at.as_tensor_variable(floatX(rho))) if "init" in kwargs: warnings.warn( "init parameter is now called init_dist. Using init will raise an error in a future release.", FutureWarning, ) init_dist = kwargs["init"] ar_order = cls._get_ar_order(rhos=rhos, constant=constant, ar_order=ar_order) steps = get_steps(steps=steps, shape=kwargs.get("shape", None), step_shape_offset=ar_order) if steps is None: raise ValueError("Must specify steps or shape parameter") steps = at.as_tensor_variable(intX(steps), ndim=0) if init_dist is not None: if not isinstance(init_dist, TensorVariable) or not isinstance( init_dist.owner.op, RandomVariable): raise ValueError( f"Init dist must be a distribution created via the `.dist()` API, " f"got {type(init_dist)}") check_dist_not_registered(init_dist) if init_dist.owner.op.ndim_supp > 1: raise ValueError( "Init distribution must have a scalar or vector support dimension, ", f"got ndim_supp={init_dist.owner.op.ndim_supp}.", ) else: # Sigma must broadcast with ar_order init_dist = Normal.dist(sigma=at.shape_padright(sigma), size=(*sigma.shape, ar_order)) # Tell Aeppl to ignore init_dist, as it will be accounted for in the logp term init_dist = ignore_logprob(init_dist) return super().dist( [rhos, sigma, init_dist, steps, ar_order, constant], **kwargs)
def logp(self, value): """ Calculate log-probability of defined ``MixtureSameFamily`` distribution at specified value. Parameters ---------- value : numeric Value(s) for which log-probability is calculated. If the log probabilities for multiple values are desired the values must be provided in a numpy array or Aesara tensor Returns ------- TensorVariable """ comp_dists = self.comp_dists w = self.w mixture_axis = self.mixture_axis event_shape = comp_dists.shape[mixture_axis + 1:] # To be able to broadcast the comp_dists.logp with w and value # We first have to pad the shape of w to the right with ones # so that it can broadcast with the event_shape. w = at.shape_padright(w, len(event_shape)) # Second, we have to add the mixture_axis to the value tensor # To insert the mixture axis at the correct location, we use the # negative number index. This way, we can also handle situations # in which, value is an observed value with more batch dimensions # than the ones present in the comp_dists. comp_dists_ndim = len(comp_dists.shape) value = at.shape_padaxis(value, axis=mixture_axis - comp_dists_ndim) comp_logp = comp_dists.logp(value) return bound( logsumexp(at.log(w) + comp_logp, axis=mixture_axis, keepdims=False), w >= 0, w <= 1, at.allclose(w.sum(axis=mixture_axis - comp_dists_ndim), 1), broadcast_conditions=False, )
def marginal_mixture_moment(op, rv, rng, weights, *components): ndim_supp = components[0].owner.op.ndim_supp weights = at.shape_padright(weights, ndim_supp) mix_axis = -ndim_supp - 1 if len(components) == 1: moment_components = moment(components[0]) else: moment_components = at.stack( [moment(component) for component in components], axis=mix_axis, ) mix_moment = at.sum(weights * moment_components, axis=mix_axis) if components[0].dtype in discrete_types: mix_moment = at.round(mix_moment) return mix_moment
def __init__(self, w, comp_dists, mixture_axis=-1, *args, **kwargs): self.w = at.as_tensor_variable(w) if not isinstance(comp_dists, Distribution): raise TypeError( "The MixtureSameFamily distribution only accepts Distribution " f"instances as its components. Got {type(comp_dists)} instead." ) self.comp_dists = comp_dists if mixture_axis < 0: mixture_axis = len(comp_dists.shape) + mixture_axis if mixture_axis < 0: raise ValueError( "`mixture_axis` is supposed to be in shape of components' distribution. " f"Got {mixture_axis + len(comp_dists.shape)} axis instead out of the bounds." ) comp_shape = to_tuple(comp_dists.shape) self.shape = comp_shape[:mixture_axis] + comp_shape[mixture_axis + 1:] self.mixture_axis = mixture_axis kwargs.setdefault("dtype", self.comp_dists.dtype) # Compute the mode so we don't always have to pass a initval defaults = kwargs.pop("defaults", []) event_shape = self.comp_dists.shape[mixture_axis + 1:] _w = at.shape_padleft( at.shape_padright(w, len(event_shape)), len(self.comp_dists.shape) - w.ndim - len(event_shape), ) mode = take_along_axis( self.comp_dists.mode, at.argmax(_w, keepdims=True), axis=mixture_axis, ) self.mode = mode[(..., 0) + (slice(None), ) * len(event_shape)] if not all_discrete(comp_dists): mean = at.as_tensor_variable(self.comp_dists.mean) self.mean = (_w * mean).sum(axis=mixture_axis) if "mean" not in defaults: defaults.append("mean") defaults.append("mode") super().__init__(defaults=defaults, *args, **kwargs)
def rv_op(cls, weights, *components, size=None): # Create new rng for the mix_indexes internal RV mix_indexes_rng = aesara.shared(np.random.default_rng()) single_component = len(components) == 1 ndim_supp = components[0].owner.op.ndim_supp if size is not None: components = cls._resize_components(size, *components) elif not single_component: # We might need to broadcast components when size is not specified shape = tuple(at.broadcast_shape(*components)) size = shape[:len(shape) - ndim_supp] components = cls._resize_components(size, *components) # Extract replication ndims from components and weights ndim_batch = components[0].ndim - ndim_supp if single_component: # One dimension is taken by the mixture axis in the single component case ndim_batch -= 1 # The weights may imply extra batch dimensions that go beyond what is already # implied by the component dimensions (ndim_batch) weights_ndim_batch = max(0, weights.ndim - ndim_batch - 1) # If weights are large enough that they would broadcast the component distributions # we try to resize them. This in necessary to avoid duplicated values in the # random method and for equivalency with the logp method if weights_ndim_batch: new_size = at.concatenate([ weights.shape[:weights_ndim_batch], components[0].shape[:ndim_batch], ]) components = cls._resize_components(new_size, *components) # Extract support and batch ndims from components and weights ndim_batch = components[0].ndim - ndim_supp if single_component: ndim_batch -= 1 weights_ndim_batch = max(0, weights.ndim - ndim_batch - 1) assert weights_ndim_batch == 0 # Component RVs terms are accounted by the Mixture logprob, so they can be # safely ignored by Aeppl components = [ignore_logprob(component) for component in components] # Create a OpFromGraph that encapsulates the random generating process # Create dummy input variables with the same type as the ones provided weights_ = weights.type() components_ = [component.type() for component in components] mix_indexes_rng_ = mix_indexes_rng.type() mix_axis = -ndim_supp - 1 # Stack components across mixture axis if single_component: # If single component, we consider it as being already "stacked" stacked_components_ = components_[0] else: stacked_components_ = at.stack(components_, axis=mix_axis) # Broadcast weights to (*batched dimensions, stack dimension), ignoring support dimensions weights_broadcast_shape_ = stacked_components_.shape[:ndim_batch + 1] weights_broadcasted_ = at.broadcast_to(weights_, weights_broadcast_shape_) # Draw mixture indexes and append (stack + ndim_supp) broadcastable dimensions to the right mix_indexes_ = at.random.categorical(weights_broadcasted_, rng=mix_indexes_rng_) mix_indexes_padded_ = at.shape_padright(mix_indexes_, ndim_supp + 1) # Index components and squeeze mixture dimension mix_out_ = at.take_along_axis(stacked_components_, mix_indexes_padded_, axis=mix_axis) mix_out_ = at.squeeze(mix_out_, axis=mix_axis) # Output mix_indexes rng update so that it can be updated in place mix_indexes_rng_next_ = mix_indexes_.owner.outputs[0] mix_op = MarginalMixtureRV( inputs=[mix_indexes_rng_, weights_, *components_], outputs=[mix_indexes_rng_next_, mix_out_], ) # Create the actual MarginalMixture variable mix_out = mix_op(mix_indexes_rng, weights, *components) # Reference nodes to facilitate identification in other classmethods mix_out.tag.weights = weights mix_out.tag.components = components mix_out.tag.choices_rng = mix_indexes_rng return mix_out
def _comp_logp(self, value): comp_dists = self.comp_dists if self.comp_is_distribution: # Value can be many things. It can be the self tensor, the mode # test point or it can be observed data. The latter case requires # careful handling of shape, as the observed's shape could look # like (repetitions,) + dist_shape, which does not include the last # mixture axis. For this reason, we try to eval the value.shape, # compare it with self.shape and shape_padright if we infer that # the value holds observed data try: val_shape = tuple(value.shape.eval()) except AttributeError: val_shape = value.shape except aesara.graph.fg.MissingInputError: val_shape = None try: self_shape = tuple(self.shape) except AttributeError: # Happens in __init__ when computing self.logp(comp_modes) self_shape = None comp_shape = tuple(comp_dists.shape) ndim = value.ndim if val_shape is not None and not ( (self_shape is not None and val_shape == self_shape) or val_shape == comp_shape): # value is neither the test point nor the self tensor, it # is likely to hold observed values, so we must compute the # ndim discarding the dimensions that don't match # self_shape if self_shape and val_shape[-len(self_shape):] == self_shape: # value has observed values for the Mixture ndim = len(self_shape) elif comp_shape and val_shape[-len(comp_shape):] == comp_shape: # value has observed for the Mixture components ndim = len(comp_shape) else: # We cannot infer what was passed, we handle this # as was done in earlier versions of Mixture. We pad # always if ndim is lower or equal to 1 (default # legacy implementation) if ndim <= 1: ndim = len(comp_dists.shape) - 1 else: # We reach this point if value does not hold observed data, so # we can use its ndim safely to determine shape padding, or it # holds something that we cannot infer, so we revert to using # the value's ndim for shape padding. # We will always pad a single dimension if ndim is lower or # equal to 1 (default legacy implementation) if ndim <= 1: ndim = len(comp_dists.shape) - 1 if ndim < len(comp_dists.shape): value_ = at.shape_padright(value, len(comp_dists.shape) - ndim) else: value_ = value return comp_dists.logp(value_) else: return at.squeeze( at.stack([comp_dist.logp(value) for comp_dist in comp_dists], axis=-1))