def rv_op(cls, rhos, sigma, init_dist, steps, ar_order, constant_term, size=None): # Init dist should have shape (*size, ar_order) if size is not None: batch_size = size else: # In this case the size of the init_dist depends on the parameters shape # The last dimension of rho and init_dist does not matter batch_size = at.broadcast_shape(sigma, rhos[..., 0], init_dist[..., 0]) if init_dist.owner.op.ndim_supp == 0: init_dist_size = (*batch_size, ar_order) else: # In this case the support dimension must cover for ar_order init_dist_size = batch_size init_dist = change_rv_size(init_dist, init_dist_size) # Create OpFromGraph representing random draws form AR process # Variables with underscore suffix are dummy inputs into the OpFromGraph init_ = init_dist.type() rhos_ = rhos.type() sigma_ = sigma.type() steps_ = steps.type() rhos_bcast_shape_ = init_.shape if constant_term: # In this case init shape is one unit smaller than rhos in the last dimension rhos_bcast_shape_ = (*rhos_bcast_shape_[:-1], rhos_bcast_shape_[-1] + 1) rhos_bcast_ = at.broadcast_to(rhos_, rhos_bcast_shape_) noise_rng = aesara.shared(np.random.default_rng()) def step(*args): *prev_xs, reversed_rhos, sigma, rng = args if constant_term: mu = reversed_rhos[-1] + at.sum(prev_xs * reversed_rhos[:-1], axis=0) else: mu = at.sum(prev_xs * reversed_rhos, axis=0) next_rng, new_x = Normal.dist(mu=mu, sigma=sigma, rng=rng).owner.outputs return new_x, {rng: next_rng} # We transpose inputs as scan iterates over first dimension innov_, innov_updates_ = aesara.scan( fn=step, outputs_info=[{"initial": init_.T, "taps": range(-ar_order, 0)}], non_sequences=[rhos_bcast_.T[::-1], sigma_.T, noise_rng], n_steps=steps_, strict=True, ) (noise_next_rng,) = tuple(innov_updates_.values()) ar_ = at.concatenate([init_, innov_.T], axis=-1) ar_op = AutoRegressiveRV( inputs=[rhos_, sigma_, init_, steps_], outputs=[noise_next_rng, ar_], ar_order=ar_order, constant_term=constant_term, inline=True, ) ar = ar_op(rhos, sigma, init_dist, steps) return ar
def rv_op(cls, dist, lower=None, upper=None, size=None, rngs=None): lower = at.constant( -np.inf) if lower is None else at.as_tensor_variable(lower) upper = at.constant( np.inf) if upper is None else at.as_tensor_variable(upper) # When size is not specified, dist may have to be broadcasted according to lower/upper dist_shape = size if size is not None else at.broadcast_shape( dist, lower, upper) dist = change_rv_size(dist, dist_shape) # Censoring is achieved by clipping the base distribution between lower and upper rv_out = at.clip(dist, lower, upper) # Reference nodes to facilitate identification in other classmethods, without # worring about possible dimshuffles rv_out.tag.dist = dist rv_out.tag.lower = lower rv_out.tag.upper = upper if rngs is not None: rv_out = cls._change_rngs(rv_out, rngs) return rv_out
def make_node(self, rng, size, dtype, mu, sigma, init, steps): steps = at.as_tensor_variable(steps) if not steps.ndim == 0 or not steps.dtype.startswith("int"): raise ValueError("steps must be an integer scalar (ndim=0).") mu = at.as_tensor_variable(mu) sigma = at.as_tensor_variable(sigma) init = at.as_tensor_variable(init) # Resize init distribution size = normalize_size_param(size) # If not explicit, size is determined by the shapes of mu, sigma, and init init_size = size if not rv_size_is_none(size) else at.broadcast_shape(mu, sigma, init) init = change_rv_size(init, init_size) return super().make_node(rng, size, dtype, mu, sigma, init, steps)
def rv_op(cls, weights, *components, size=None): # Create new rng for the mix_indexes internal RV mix_indexes_rng = aesara.shared(np.random.default_rng()) single_component = len(components) == 1 ndim_supp = components[0].owner.op.ndim_supp if size is not None: components = cls._resize_components(size, *components) elif not single_component: # We might need to broadcast components when size is not specified shape = tuple(at.broadcast_shape(*components)) size = shape[:len(shape) - ndim_supp] components = cls._resize_components(size, *components) # Extract replication ndims from components and weights ndim_batch = components[0].ndim - ndim_supp if single_component: # One dimension is taken by the mixture axis in the single component case ndim_batch -= 1 # The weights may imply extra batch dimensions that go beyond what is already # implied by the component dimensions (ndim_batch) weights_ndim_batch = max(0, weights.ndim - ndim_batch - 1) # If weights are large enough that they would broadcast the component distributions # we try to resize them. This in necessary to avoid duplicated values in the # random method and for equivalency with the logp method if weights_ndim_batch: new_size = at.concatenate([ weights.shape[:weights_ndim_batch], components[0].shape[:ndim_batch], ]) components = cls._resize_components(new_size, *components) # Extract support and batch ndims from components and weights ndim_batch = components[0].ndim - ndim_supp if single_component: ndim_batch -= 1 weights_ndim_batch = max(0, weights.ndim - ndim_batch - 1) assert weights_ndim_batch == 0 # Component RVs terms are accounted by the Mixture logprob, so they can be # safely ignored by Aeppl components = [ignore_logprob(component) for component in components] # Create a OpFromGraph that encapsulates the random generating process # Create dummy input variables with the same type as the ones provided weights_ = weights.type() components_ = [component.type() for component in components] mix_indexes_rng_ = mix_indexes_rng.type() mix_axis = -ndim_supp - 1 # Stack components across mixture axis if single_component: # If single component, we consider it as being already "stacked" stacked_components_ = components_[0] else: stacked_components_ = at.stack(components_, axis=mix_axis) # Broadcast weights to (*batched dimensions, stack dimension), ignoring support dimensions weights_broadcast_shape_ = stacked_components_.shape[:ndim_batch + 1] weights_broadcasted_ = at.broadcast_to(weights_, weights_broadcast_shape_) # Draw mixture indexes and append (stack + ndim_supp) broadcastable dimensions to the right mix_indexes_ = at.random.categorical(weights_broadcasted_, rng=mix_indexes_rng_) mix_indexes_padded_ = at.shape_padright(mix_indexes_, ndim_supp + 1) # Index components and squeeze mixture dimension mix_out_ = at.take_along_axis(stacked_components_, mix_indexes_padded_, axis=mix_axis) mix_out_ = at.squeeze(mix_out_, axis=mix_axis) # Output mix_indexes rng update so that it can be updated in place mix_indexes_rng_next_ = mix_indexes_.owner.outputs[0] mix_op = MarginalMixtureRV( inputs=[mix_indexes_rng_, weights_, *components_], outputs=[mix_indexes_rng_next_, mix_out_], ) # Create the actual MarginalMixture variable mix_out = mix_op(mix_indexes_rng, weights, *components) # Reference nodes to facilitate identification in other classmethods mix_out.tag.weights = weights mix_out.tag.components = components mix_out.tag.choices_rng = mix_indexes_rng return mix_out