def func(chol_vec, delta): chol = at.stack([ at.stack([at.exp(0.1 * chol_vec[0]), 0]), at.stack([chol_vec[1], 2 * at.exp(chol_vec[2])]), ]) cov = at.dot(chol, chol.T) return MvNormalLogp()(cov, delta)
def max_pool(images, imgshp, maxpoolshp): """Implements a max pooling layer Takes as input a 2D tensor of shape batch_size x img_size and performs max pooling. Max pooling downsamples by taking the max value in a given area, here defined by maxpoolshp. Outputs a 2D tensor of shape batch_size x output_size. :param images: 2D tensor containing images on which to apply convolution. Assumed to be of shape batch_size x img_size :param imgshp: tuple containing image dimensions :param maxpoolshp: tuple containing shape of area to max pool over :return: out1, symbolic result (2D tensor) :return: out2, logical shape of the output """ poolsize = np.int64(np.prod(maxpoolshp)) # imgshp contains either 2 entries (height,width) or 3 (nfeatures,h,w) # in the first case, default nfeatures to 1 if np.size(imgshp) == 2: imgshp = (1, ) + imgshp # construct indices and index pointers for sparse matrix, which, # when multiplied with input images will generate a stack of image # patches indices, indptr, spmat_shape, sptype, outshp = convolution_indices.conv_eval( imgshp, maxpoolshp, maxpoolshp, mode="valid") # print 'XXXXXXXXXXXXXXXX MAX POOLING LAYER XXXXXXXXXXXXXXXXXXXX' # print 'imgshp = ', imgshp # print 'maxpoolshp = ', maxpoolshp # print 'outshp = ', outshp # build sparse matrix, then generate stack of image patches csc = aesara.sparse.CSM(sptype)(np.ones(indices.size), indices, indptr, spmat_shape) patches = sparse.structured_dot(csc, images.T).T pshape = aet.stack([ images.shape[0] * aet.as_tensor(np.prod(outshp)), aet.as_tensor(imgshp[0]), aet.as_tensor(poolsize), ]) patch_stack = reshape(patches, pshape, ndim=3) out1 = tt_max(patch_stack, axis=2) pshape = aet.stack([ images.shape[0], aet.as_tensor(np.prod(outshp)), aet.as_tensor(imgshp[0]), ]) out2 = reshape(out1, pshape, ndim=3) out3 = DimShuffle(out2.broadcastable, (0, 2, 1))(out2) return aet.flatten(out3, 2), outshp
def test_hessian(self): chol_vec = at.vector("chol_vec") chol_vec.tag.test_value = np.array([0.1, 2, 3]) chol = at.stack([ at.stack([at.exp(0.1 * chol_vec[0]), 0]), at.stack([chol_vec[1], 2 * at.exp(chol_vec[2])]), ]) cov = at.dot(chol, chol.T) delta = at.matrix("delta") delta.tag.test_value = np.ones((5, 2)) logp = MvNormalLogp()(cov, delta) g_cov, g_delta = at.grad(logp, [cov, delta]) at.grad(g_delta.sum() + g_cov.sum(), [delta, cov])
def test_hessian(self): chol_vec = at.vector("chol_vec") chol_vec.tag.test_value = floatX(np.array([0.1, 2, 3])) chol = at.stack([ at.stack([at.exp(0.1 * chol_vec[0]), 0]), at.stack([chol_vec[1], 2 * at.exp(chol_vec[2])]), ]) cov = at.dot(chol, chol.T) delta = at.matrix("delta") delta.tag.test_value = floatX(np.ones((5, 2))) logp = MvNormalLogp()(cov, delta) g_cov, g_delta = at.grad(logp, [cov, delta]) # TODO: What's the test? Something needs to be asserted. at.grad(g_delta.sum() + g_cov.sum(), [delta, cov])
def marginal_mixture_logcdf(op, value, rng, weights, *components, **kwargs): # single component if len(components) == 1: # Need to broadcast value across mixture axis mix_axis = -components[0].owner.op.ndim_supp - 1 components_logcdf = logcdf(components[0], at.expand_dims(value, mix_axis)) else: components_logcdf = at.stack( [logcdf(component, value) for component in components], axis=-1, ) mix_logcdf = at.logsumexp(at.log(weights) + components_logcdf, axis=-1) mix_logcdf = check_parameters( mix_logcdf, 0 <= weights, weights <= 1, at.isclose(at.sum(weights, axis=-1), 1), msg="0 <= weights <= 1, sum(weights) == 1", ) return mix_logcdf
def marginal_mixture_logcdf(op, value, rng, weights, *components, **kwargs): # single component if len(components) == 1: # Need to broadcast value across mixture axis mix_axis = -components[0].owner.op.ndim_supp - 1 components_logcdf = logcdf(components[0], at.expand_dims(value, mix_axis)) else: components_logcdf = at.stack( [logcdf(component, value) for component in components], axis=-1, ) mix_logcdf = at.logsumexp(at.log(weights) + components_logcdf, axis=-1) # Squeeze stack dimension # There is a Aesara bug in squeeze with negative axis # https://github.com/aesara-devs/aesara/issues/830 # mix_logp = at.squeeze(mix_logp, axis=-1) mix_logcdf = at.squeeze(mix_logcdf, axis=mix_logcdf.ndim - 1) mix_logcdf = check_parameters( mix_logcdf, 0 <= weights, weights <= 1, at.isclose(at.sum(weights, axis=-1), 1), msg="0 <= weights <= 1, sum(weights) == 1", ) return mix_logcdf
def _comp_modes(self): try: return at.as_tensor_variable(self.comp_dists.mode) except AttributeError: return at.squeeze( at.stack([comp_dist.mode for comp_dist in self.comp_dists], axis=-1))
def simulate_poiszero_hmm(N, mu=10.0, pi_0_a=np.r_[1, 1], p_0_a=np.r_[5, 1], p_1_a=np.r_[1, 1]): with pm.Model() as test_model: p_0_rv = pm.Dirichlet("p_0", p_0_a, shape=np.shape(pi_0_a)) p_1_rv = pm.Dirichlet("p_1", p_1_a, shape=np.shape(pi_0_a)) P_tt = at.stack([p_0_rv, p_1_rv]) P_rv = pm.Deterministic("P_tt", at.shape_padleft(P_tt)) pi_0_tt = pm.Dirichlet("pi_0", pi_0_a, shape=np.shape(pi_0_a)) S_rv = DiscreteMarkovChain("S_t", P_rv, pi_0_tt, shape=N) PoissonZeroProcess("Y_t", mu, S_rv, observed=np.zeros(N)) sample_point = pm.sample_prior_predictive(samples=1) # Remove the extra "sampling" dimension from the sample results sample_point = {k: v.squeeze(0) for k, v in sample_point.items()} # Remove the extra dimension added due to `pm.sample_prior_predictive` # forcing `size=1` in its call to `test_model.Y_t.random`. sample_point["Y_t"] = sample_point["Y_t"].squeeze(0) return sample_point, test_model
def test_FFBSStep_extreme(): """Test a long series with extremely large mixture separation (and, thus, very small likelihoods).""" # noqa: E501 np.random.seed(2032) mu_true = 5000 poiszero_sim, _ = simulate_poiszero_hmm(9000, mu_true) y_test = poiszero_sim["Y_t"] with pm.Model() as test_model: p_0_rv = poiszero_sim["p_0"] p_1_rv = poiszero_sim["p_1"] P_tt = at.stack([p_0_rv, p_1_rv]) P_rv = pm.Deterministic("P_tt", at.shape_padleft(P_tt)) pi_0_tt = poiszero_sim["pi_0"] S_rv = DiscreteMarkovChain("S_t", P_rv, pi_0_tt, shape=y_test.shape[0]) S_rv.tag.test_value = (y_test > 0).astype(int) # This prior is very far from the true value... E_mu, Var_mu = 100.0, 10000.0 mu_rv = pm.Gamma("mu", E_mu**2 / Var_mu, E_mu / Var_mu) PoissonZeroProcess("Y_t", mu_rv, S_rv, observed=y_test) with test_model: ffbs = FFBSStep([S_rv]) test_point = test_model.test_point.copy() test_point["p_0_stickbreaking__"] = poiszero_sim["p_0_stickbreaking__"] test_point["p_1_stickbreaking__"] = poiszero_sim["p_1_stickbreaking__"] with np.errstate(over="ignore", under="ignore"): res = ffbs.step(test_point) assert np.array_equal(res["S_t"], poiszero_sim["S_t"]) with test_model, np.errstate(over="ignore", under="ignore"), warnings.catch_warnings(): warnings.filterwarnings("ignore", category=UserWarning) warnings.filterwarnings("ignore", category=DeprecationWarning) warnings.filterwarnings("ignore", category=FutureWarning) mu_step = pm.NUTS([mu_rv]) ffbs = FFBSStep([S_rv]) steps = [ffbs, mu_step] trace = pm.sample( 20, step=steps, cores=1, chains=1, tune=100, n_init=100, progressbar=False, ) assert not trace.get_sampler_stats("diverging").all() assert trace["mu"].mean() > 1000.0
def test_FFBSStep(): with pm.Model(), pytest.raises(ValueError): P_rv = np.eye(2)[None, ...] S_rv = DiscreteMarkovChain("S_t", P_rv, np.r_[1.0, 0.0], shape=10) S_2_rv = DiscreteMarkovChain("S_2_t", P_rv, np.r_[0.0, 1.0], shape=10) PoissonZeroProcess("Y_t", 9.0, S_rv + S_2_rv, observed=np.random.poisson(9.0, size=10)) # Only one variable can be sampled by this step method ffbs = FFBSStep([S_rv, S_2_rv]) with pm.Model(), pytest.raises(TypeError): S_rv = pm.Categorical("S_t", np.r_[1.0, 0.0], shape=10) PoissonZeroProcess("Y_t", 9.0, S_rv, observed=np.random.poisson(9.0, size=10)) # Only `DiscreteMarkovChains` can be sampled with this step method ffbs = FFBSStep([S_rv]) with pm.Model(), pytest.raises(TypeError): P_rv = np.eye(2)[None, ...] S_rv = DiscreteMarkovChain("S_t", P_rv, np.r_[1.0, 0.0], shape=10) pm.Poisson("Y_t", S_rv, observed=np.random.poisson(9.0, size=10)) # Only `SwitchingProcess`es can used as dependent variables ffbs = FFBSStep([S_rv]) np.random.seed(2032) poiszero_sim, _ = simulate_poiszero_hmm(30, 150) y_test = poiszero_sim["Y_t"] with pm.Model() as test_model: p_0_rv = pm.Dirichlet("p_0", np.r_[1, 1], shape=2) p_1_rv = pm.Dirichlet("p_1", np.r_[1, 1], shape=2) P_tt = at.stack([p_0_rv, p_1_rv]) P_rv = pm.Deterministic("P_tt", at.shape_padleft(P_tt)) pi_0_tt = compute_steady_state(P_rv) S_rv = DiscreteMarkovChain("S_t", P_rv, pi_0_tt, shape=y_test.shape[0]) PoissonZeroProcess("Y_t", 9.0, S_rv, observed=y_test) with test_model: ffbs = FFBSStep([S_rv]) test_point = test_model.test_point.copy() test_point["p_0_stickbreaking__"] = poiszero_sim["p_0_stickbreaking__"] test_point["p_1_stickbreaking__"] = poiszero_sim["p_1_stickbreaking__"] res = ffbs.step(test_point) assert np.array_equal(res["S_t"], poiszero_sim["S_t"])
def grad(self, inputs, gout): shapes = at.stack([i.shape for i in inputs]) index_end = shapes.cumsum(0) index_begin = index_end - shapes slices = [ ix_( at.arange(index_begin[i, 0], index_end[i, 0]), at.arange(index_begin[i, 1], index_end[i, 1]), ) for i in range(len(inputs)) ] return [gout[0][slc] for slc in slices]
def __init__(self, w, comp_dists, *args, **kwargs): # comp_dists type checking if not ( isinstance(comp_dists, Distribution) or ( isinstance(comp_dists, Iterable) and all(isinstance(c, Distribution) for c in comp_dists) ) ): raise TypeError( "Supplied Mixture comp_dists must be a " "Distribution or an iterable of " "Distributions. Got {} instead.".format( type(comp_dists) if not isinstance(comp_dists, Iterable) else [type(c) for c in comp_dists] ) ) shape = kwargs.pop("shape", ()) self.w = w = at.as_tensor_variable(w) self.comp_dists = comp_dists defaults = kwargs.pop("defaults", []) if all_discrete(comp_dists): default_dtype = _conversion_map[aesara.config.floatX] else: default_dtype = aesara.config.floatX try: self.mean = (w * self._comp_means()).sum(axis=-1) if "mean" not in defaults: defaults.append("mean") except AttributeError: pass dtype = kwargs.pop("dtype", default_dtype) try: if isinstance(comp_dists, Distribution): comp_mode_logps = comp_dists.logp(comp_dists.mode) else: comp_mode_logps = at.stack([cd.logp(cd.mode) for cd in comp_dists]) mode_idx = at.argmax(at.log(w) + comp_mode_logps, axis=-1) self.mode = self._comp_modes()[mode_idx] if "mode" not in defaults: defaults.append("mode") except (AttributeError, ValueError, IndexError): pass super().__init__(shape, dtype, defaults=defaults, *args, **kwargs)
def _check_size(size): """ Canonicalise inputs to get valid output sizes for Aesara tensors. Parameters ---------- size : int_vector_like Some variable that could serve as the shape for an Aesara tensor. This can be an int, a tuple of ints, a list of ints or an Aesara Variable with similar properties. Returns ------- size_var : int_vector A one-dimensional Aesara variable encapsulating the given size. Raises ------ ValueError If this method can not build a valid size from the input. """ # non-tuple checks and scalar-to-tuple transform if isinstance(size, Variable): if size.ndim == 1: return size elif size.ndim == 0: return at.stack([size], ndim=1) else: raise ValueError( "Aesara variable must have 1 dimension to be a valid size.", size) elif isinstance(size, (np.integer, int)): return at.constant([size], ndim=1) elif not isinstance(size, (tuple, list)): raise ValueError("Size must be a int, tuple, list or Aesara variable.", size) # check entries of list or tuple for i in size: if isinstance(i, Variable): if i.ndim != 0: raise ValueError("Non-scalar Aesara variable in size", size, i) elif isinstance(i, (np.integer, int)): if i <= 0: raise ValueError( "Non-positive dimensions not allowed in size.", size, i) else: raise ValueError( "Only Aesara variables and integers are allowed in a size-tuple.", size, i, ) return at.as_tensor_variable(size, ndim=1)
def grad(self, inputs, output_grads): _log.debug(f"grad w.r.t. inputs {hash(tuple(inputs))}") # fetch symbolic sensitivity output node from cache ihash = hash(tuple(inputs)) if ihash in self._output_sensitivities: sens = self._output_sensitivities[ihash] else: _log.debug("No cached sensitivities found!") _, sens = self.__call__(*inputs, return_sens=True) ograds = output_grads[0] # for each parameter, multiply sensitivities with the output gradient and sum the result # sens is (n_times, n_states, n_p) # ograds is (n_times, n_states) grads = [at.sum(sens[:, :, p] * ograds) for p in range(self.n_p)] # return separate gradient tensors for y0 and theta inputs result = at.stack(grads[:self.n_states]), at.stack( grads[self.n_states:]) return result
def __init__( self, x, y, intercept=True, labels=None, priors=None, vars=None, name="", model=None, offset=0.0, ): super().__init__(name, model) if len(y.shape) > 1: err_msg = ("Only one-dimensional observed variable objects (i.e." " of shape `(n, )`) are supported") raise TypeError(err_msg) if priors is None: priors = {} if vars is None: vars = {} x, labels = any_to_tensor_and_labels(x, labels) # now we have x, shape and labels if intercept: x = at.concatenate([at.ones((x.shape[0], 1), x.dtype), x], axis=1) labels = ["Intercept"] + labels coeffs = list() for name in labels: if name == "Intercept": if name in vars: v = Deterministic(name, vars[name]) else: v = self.Var(name=name, dist=priors.get(name, self.default_intercept_prior)) coeffs.append(v) else: if name in vars: v = Deterministic(name, vars[name]) else: v = self.Var( name=name, dist=priors.get( name, priors.get("Regressor", self.default_regressor_prior)), ) coeffs.append(v) self.coeffs = at.stack(coeffs, axis=0) self.y_est = x.dot(self.coeffs) + offset
def __init__(self, vars, values=None, model=None): if len(vars) > 1: raise ValueError("This sampler only takes one variable.") (var, ) = pm.inputvars(vars) if not isinstance(var.distribution, DiscreteMarkovChain): raise TypeError( "This sampler only samples `DiscreteMarkovChain`s.") model = pm.modelcontext(model) self.vars = [var] self.dependent_rvs = [ v for v in model.basic_RVs if v is not var and var in graph_inputs([v.logpt]) ] dep_comps_logp_stacked = [] for i, dependent_rv in enumerate(self.dependent_rvs): if isinstance(dependent_rv.distribution, SwitchingProcess): comp_logps = [] # Get the log-likelihoood sequences for each state in this # `SwitchingProcess` observations distribution for comp_dist in dependent_rv.distribution.comp_dists: comp_logps.append(comp_dist.logp(dependent_rv)) comp_logp_stacked = at.stack(comp_logps) else: raise TypeError( "This sampler only supports `SwitchingProcess` observations" ) dep_comps_logp_stacked.append(comp_logp_stacked) comp_logp_stacked = at.sum(dep_comps_logp_stacked, axis=0) (M, ) = draw_values([var.distribution.gamma_0.shape[-1]], point=model.test_point) N = model.test_point[var.name].shape[-1] self.alphas = np.empty((M, N), dtype=float) self.log_lik_states = model.fn(comp_logp_stacked) self.gamma_0_fn = model.fn(var.distribution.gamma_0) self.Gammas_fn = model.fn(var.distribution.Gammas)
def test_TransMatConjugateStep(): with pm.Model() as test_model, pytest.raises(ValueError): p_0_rv = pm.Dirichlet("p_0", np.r_[1, 1], shape=2) transmat = TransMatConjugateStep(p_0_rv) np.random.seed(2032) poiszero_sim, _ = simulate_poiszero_hmm(30, 150) y_test = poiszero_sim["Y_t"] with pm.Model() as test_model: p_0_rv = pm.Dirichlet("p_0", np.r_[1, 1], shape=2) p_1_rv = pm.Dirichlet("p_1", np.r_[1, 1], shape=2) P_tt = at.stack([p_0_rv, p_1_rv]) P_rv = pm.Deterministic("P_tt", at.shape_padleft(P_tt)) pi_0_tt = compute_steady_state(P_rv) S_rv = DiscreteMarkovChain("S_t", P_rv, pi_0_tt, shape=y_test.shape[0]) PoissonZeroProcess("Y_t", 9.0, S_rv, observed=y_test) with test_model: transmat = TransMatConjugateStep(P_rv) test_point = test_model.test_point.copy() test_point["S_t"] = (y_test > 0).astype(int) res = transmat.step(test_point) p_0_smpl = get_test_value( p_0_rv.distribution.transform.backward(res[p_0_rv.transformed.name])) p_1_smpl = get_test_value( p_1_rv.distribution.transform.backward(res[p_1_rv.transformed.name])) sampled_trans_mat = np.stack([p_0_smpl, p_1_smpl]) true_trans_mat = ( compute_trans_freqs(poiszero_sim["S_t"], 2, counts_only=True) + np.c_[[1, 1], [1, 1]]) true_trans_mat = true_trans_mat / true_trans_mat.sum(0)[..., None] assert np.allclose(sampled_trans_mat, true_trans_mat, atol=0.3)
def infer_shape(self, node, in_shapes): shape_a = in_shapes[0] n = node.inputs[1] axis = node.inputs[2] if len(shape_a) == 1: return [(n, )] elif isinstance(axis, tensor.TensorConstant): out_shape = (list(shape_a[0:axis.data.item()]) + [n] + list(shape_a[axis.data + 1:])) else: l = len(shape_a) shape_a = tensor.stack(shape_a) out_shape = tensor.concatenate( (shape_a[0:axis], [n], shape_a[axis + 1:])) n_splits = [1] * l out_shape = tensor.split(out_shape, n_splits, l) out_shape = [a[0] for a in out_shape] return [out_shape]
def marginal_mixture_moment(op, rv, rng, weights, *components): ndim_supp = components[0].owner.op.ndim_supp weights = at.shape_padright(weights, ndim_supp) mix_axis = -ndim_supp - 1 if len(components) == 1: moment_components = moment(components[0]) else: moment_components = at.stack( [moment(component) for component in components], axis=mix_axis, ) mix_moment = at.sum(weights * moment_components, axis=mix_axis) if components[0].dtype in discrete_types: mix_moment = at.round(mix_moment) return mix_moment
def test_only_positive_state(): number_of_draws = 50 S = 2 mu = 10 y_t = np.repeat(0, 100) with pm.Model(): p_0_rv = pm.Dirichlet("p_0", np.r_[1, 1], shape=2) p_1_rv = pm.Dirichlet("p_1", np.r_[1, 1], shape=2) P_tt = at.stack([p_0_rv, p_1_rv]) Gammas_tt = pm.Deterministic("P_tt", at.shape_padleft(P_tt)) gamma_0_rv = pm.Dirichlet("gamma_0", np.ones((S, )), shape=S) V_rv = DiscreteMarkovChain("V_t", Gammas_tt, gamma_0_rv, shape=y_t.shape[0]) V_rv.tag.test_value = (y_t > 0) * 1 _ = SwitchingProcess( "Y_t", [Constant.dist(np.array(0, dtype=np.int64)), Constant.dist(mu)], V_rv, observed=y_t, ) posterior_trace = pm.sample( chains=1, draws=number_of_draws, return_inferencedata=True, step=FFBSStep([V_rv]), ) posterior_pred_trace = pm.sample_posterior_predictive( posterior_trace.posterior, var_names=["Y_t"]) assert np.all(posterior_pred_trace["Y_t"] == 0)
def create_dirac_zero_hmm(X, mu, xis, observed): S = 2 z_tt = at.stack([at.dot(X, xis[..., s, :]) for s in range(S)], axis=1) Gammas_tt = pm.Deterministic("Gamma", multilogit_inv(z_tt)) gamma_0_rv = pm.Dirichlet("gamma_0", np.ones((S,)), shape=S) if type(observed) == np.ndarray: T = X.shape[0] else: T = X.get_value().shape[0] V_rv = DiscreteMarkovChain("V_t", Gammas_tt, gamma_0_rv, shape=T) if type(observed) == np.ndarray: V_rv.tag.test_value = (observed > 0) * 1 else: V_rv.tag.test_value = (observed.get_value() > 0) * 1 Y_rv = SwitchingProcess( "Y_t", [pm.Constant.dist(0), pm.Constant.dist(mu)], V_rv, observed=observed, ) return Y_rv
def neibs2images(neibs, neib_shape, original_shape, mode="valid"): """ Function :func:`neibs2images <aesara.sandbox.neighbours.neibs2images>` performs the inverse operation of :func:`images2neibs <aesara.sandbox.neigbours.neibs2images>`. It inputs the output of :func:`images2neibs <aesara.sandbox.neigbours.neibs2images>` and reconstructs its input. Parameters ---------- neibs : 2d tensor Like the one obtained by :func:`images2neibs <aesara.sandbox.neigbours.neibs2images>`. neib_shape `neib_shape` that was used in :func:`images2neibs <aesara.sandbox.neigbours.neibs2images>`. original_shape Original shape of the 4d tensor given to :func:`images2neibs <aesara.sandbox.neigbours.neibs2images>` Returns ------- object Reconstructs the input of :func:`images2neibs <aesara.sandbox.neigbours.neibs2images>`, a 4d tensor of shape `original_shape`. Notes ----- Currently, the function doesn't support tensors created with `neib_step` different from default value. This means that it may be impossible to compute the gradient of a variable gained by :func:`images2neibs <aesara.sandbox.neigbours.neibs2images>` w.r.t. its inputs in this case, because it uses :func:`images2neibs <aesara.sandbox.neigbours.neibs2images>` for gradient computation. Examples -------- Example, which uses a tensor gained in example for :func:`images2neibs <aesara.sandbox.neigbours.neibs2images>`: .. code-block:: python im_new = neibs2images(neibs, (5, 5), im_val.shape) # Aesara function definition inv_window = aesara.function([neibs], im_new) # Function application im_new_val = inv_window(neibs_val) .. note:: The code will output the initial image array. """ neibs = tt.as_tensor_variable(neibs) neib_shape = tt.as_tensor_variable(neib_shape) original_shape = tt.as_tensor_variable(original_shape) new_neib_shape = tt.stack( [original_shape[-1] // neib_shape[1], neib_shape[1]]) output_2d = images2neibs(neibs.dimshuffle("x", "x", 0, 1), new_neib_shape, mode=mode) if mode == "ignore_borders": # We use set_subtensor to accept original_shape we can't infer # the shape and still raise error when it don't have the right # shape. valid_shape = original_shape valid_shape = tt.set_subtensor( valid_shape[2], (valid_shape[2] // neib_shape[0]) * neib_shape[0]) valid_shape = tt.set_subtensor( valid_shape[3], (valid_shape[3] // neib_shape[1]) * neib_shape[1]) output_4d = output_2d.reshape(valid_shape, ndim=4) # padding the borders with zeros for d in [2, 3]: pad_shape = list(output_4d.shape) pad_shape[d] = original_shape[d] - valid_shape[d] output_4d = tt.concatenate( [output_4d, tt.zeros(pad_shape)], axis=d) elif mode == "valid": # TODO: we do not implement all mode with this code. # Add a check for the good cases. output_4d = output_2d.reshape(original_shape, ndim=4) else: raise NotImplementedError("neibs2images do not support mode=%s" % mode) return output_4d
def test_ScanArgs_basics_mit_sot(): srng = at.random.RandomStream() N_tt = at.iscalar("N") N_tt.tag.test_value = 10 M_tt = at.iscalar("M") M_tt.tag.test_value = 2 mus_tt = at.matrix("mus") mus_tt.tag.test_value = np.stack( [np.arange(0.0, 10), np.arange(0.0, -10, -1)], axis=-1).astype(aesara.config.floatX) sigmas_tt = at.ones((N_tt, )) sigmas_tt.name = "sigmas" pi_0_rv = srng.dirichlet(at.ones((M_tt, )), name="pi_0") Gamma_rv = srng.dirichlet(at.ones((M_tt, M_tt)), name="Gamma") S_0_rv = srng.categorical(pi_0_rv, name="S_0") def scan_fn(mus_t, sigma_t, S_tm2, S_tm1, Gamma_t): S_t = srng.categorical(Gamma_t[S_tm2], name="S_t") Y_t = srng.normal(mus_t[S_tm1], sigma_t, name="Y_t") return S_t, Y_t (S_rv, Y_rv), scan_updates = aesara.scan( fn=scan_fn, sequences=[mus_tt, sigmas_tt], non_sequences=[Gamma_rv], outputs_info=[{ "initial": at.stack([S_0_rv, S_0_rv]), "taps": [-2, -1] }, {}], strict=True, name="scan_rv", ) # Adding names should make output easier to read Y_rv.name = "Y_rv" # This `S_rv` outer-output is actually a `Subtensor` of the "real" output S_rv = S_rv.owner.inputs[0] S_rv.name = "S_rv" mus_in = Y_rv.owner.inputs[1] mus_in.name = "mus_in" sigmas_in = Y_rv.owner.inputs[2] sigmas_in.name = "sigmas_in" scan_args = ScanArgs.from_node(Y_rv.owner) test_v = scan_args.inner_in_mit_sot[0][1] field_info = scan_args.find_among_fields(test_v) assert field_info.name == "inner_in_mit_sot" assert field_info.index == 0 assert field_info.inner_index == 1 assert field_info.agg_index == 3 rm_info = scan_args._remove_from_fields(at.ones(2)) assert rm_info is None rm_info = scan_args._remove_from_fields(test_v) assert rm_info.name == "inner_in_mit_sot" assert rm_info.index == 0 assert rm_info.inner_index == 1 assert rm_info.agg_index == 3
def rv_op(cls, weights, *components, size=None): # Create new rng for the mix_indexes internal RV mix_indexes_rng = aesara.shared(np.random.default_rng()) single_component = len(components) == 1 ndim_supp = components[0].owner.op.ndim_supp if size is not None: components = cls._resize_components(size, *components) elif not single_component: # We might need to broadcast components when size is not specified shape = tuple(at.broadcast_shape(*components)) size = shape[:len(shape) - ndim_supp] components = cls._resize_components(size, *components) # Extract replication ndims from components and weights ndim_batch = components[0].ndim - ndim_supp if single_component: # One dimension is taken by the mixture axis in the single component case ndim_batch -= 1 # The weights may imply extra batch dimensions that go beyond what is already # implied by the component dimensions (ndim_batch) weights_ndim_batch = max(0, weights.ndim - ndim_batch - 1) # If weights are large enough that they would broadcast the component distributions # we try to resize them. This in necessary to avoid duplicated values in the # random method and for equivalency with the logp method if weights_ndim_batch: new_size = at.concatenate([ weights.shape[:weights_ndim_batch], components[0].shape[:ndim_batch], ]) components = cls._resize_components(new_size, *components) # Extract support and batch ndims from components and weights ndim_batch = components[0].ndim - ndim_supp if single_component: ndim_batch -= 1 weights_ndim_batch = max(0, weights.ndim - ndim_batch - 1) assert weights_ndim_batch == 0 # Component RVs terms are accounted by the Mixture logprob, so they can be # safely ignored by Aeppl components = [ignore_logprob(component) for component in components] # Create a OpFromGraph that encapsulates the random generating process # Create dummy input variables with the same type as the ones provided weights_ = weights.type() components_ = [component.type() for component in components] mix_indexes_rng_ = mix_indexes_rng.type() mix_axis = -ndim_supp - 1 # Stack components across mixture axis if single_component: # If single component, we consider it as being already "stacked" stacked_components_ = components_[0] else: stacked_components_ = at.stack(components_, axis=mix_axis) # Broadcast weights to (*batched dimensions, stack dimension), ignoring support dimensions weights_broadcast_shape_ = stacked_components_.shape[:ndim_batch + 1] weights_broadcasted_ = at.broadcast_to(weights_, weights_broadcast_shape_) # Draw mixture indexes and append (stack + ndim_supp) broadcastable dimensions to the right mix_indexes_ = at.random.categorical(weights_broadcasted_, rng=mix_indexes_rng_) mix_indexes_padded_ = at.shape_padright(mix_indexes_, ndim_supp + 1) # Index components and squeeze mixture dimension mix_out_ = at.take_along_axis(stacked_components_, mix_indexes_padded_, axis=mix_axis) mix_out_ = at.squeeze(mix_out_, axis=mix_axis) # Output mix_indexes rng update so that it can be updated in place mix_indexes_rng_next_ = mix_indexes_.owner.outputs[0] mix_op = MarginalMixtureRV( inputs=[mix_indexes_rng_, weights_, *components_], outputs=[mix_indexes_rng_next_, mix_out_], ) # Create the actual MarginalMixture variable mix_out = mix_op(mix_indexes_rng, weights, *components) # Reference nodes to facilitate identification in other classmethods mix_out.tag.weights = weights mix_out.tag.components = components mix_out.tag.choices_rng = mix_indexes_rng return mix_out
def any_to_tensor_and_labels(x, labels=None): """Util for converting input x to tensor trying to create labels for columns if they are not provided. Default names for columns are ['x0', 'x1', ...], for mappable arrays (e.g. pd.DataFrame) their names are treated as labels. You can override them with `labels` argument. If you have tensor input you should provide labels as we cannot get their shape directly If you pass dict input we cannot rely on labels order thus dict keys are treated as labels anyway Parameters ---------- x: np.ndarray | pd.DataFrame | Variable | dict | list labels: list - names for columns of output tensor Returns ------- (x, labels) - tensor and labels for its columns """ if isinstance(labels, str): labels = [labels] # pandas.DataFrame # labels can come from here # we can override them if isinstance(x, pd.DataFrame): if not labels: labels = x.columns x = x.to_numpy() # pandas.Series # there can still be a label # we can override labels elif isinstance(x, pd.Series): if not labels: labels = [x.name] x = x.to_numpy()[:, None] # dict # labels are keys, # cannot override them elif isinstance(x, dict): # try to do it via pandas try: x = pd.DataFrame.from_dict(x) labels = x.columns x = x.to_numpy() # some types fail there # another approach is to construct # variable by hand except (ValueError, TypeError): res = [] labels = [] for k, v in x.items(): res.append(v) labels.append(k) x = aet.stack(res, axis=1) if x.ndim == 1: x = x[:, None] # case when it can appear to be some # array like value like lists of lists # numpy deals with it elif not isinstance(x, Variable): x = np.asarray(x) if x.ndim == 0: raise ValueError("Cannot use scalars") elif x.ndim == 1: x = x[:, None] # something really strange goes here, # but user passes labels trusting seems # to be a good option elif labels is not None: x = aet.as_tensor_variable(x) if x.ndim == 0: raise ValueError("Cannot use scalars") elif x.ndim == 1: x = x[:, None] else: # trust input pass # we should check that we can extract labels if labels is None and not isinstance(x, Variable): labels = ["x%d" % i for i in range(x.shape[1])] # for aesara variables we should have labels from user elif labels is None: raise ValueError("Please provide labels as " "we cannot infer shape of input") else: # trust labels, user knows what he is doing pass # it's time to check shapes if we can if not isinstance(x, Variable): if not len(labels) == x.shape[1]: raise ValueError( "Please provide full list " "of labels for coefficients, " "got len(labels)=%d instead of %d" % (len(labels), x.shape[1]) ) else: # trust labels, as we raised an # error in bad case, we have labels pass # convert labels to list if isinstance(labels, pd.RangeIndex): labels = ["x%d" % i for i in labels] # maybe it was a tuple ot whatever elif not isinstance(labels, list): labels = list(labels) # as output we need tensor if not isinstance(x, Variable): x = aet.as_tensor_variable(x) # finally check dimensions if x.ndim == 0: raise ValueError("Cannot use scalars") elif x.ndim == 1: x = x[:, None] return x, labels
def augment_system(ode_func, n_states, n_theta): """ Function to create augmented system. Take a function which specifies a set of differential equations and return a compiled function which allows for computation of gradients of the differential equation's solition with repsect to the parameters. Uses float64 even if floatX=float32, because the scipy integrator always uses float64. Parameters ---------- ode_func: function Differential equation. Returns array-like. n_states: int Number of rows of the sensitivity matrix. (n_states) n_theta: int Number of ODE parameters Returns ------- system: function Augemted system of differential equations. """ # Present state of the system t_y = aet.vector("y", dtype="float64") t_y.tag.test_value = np.ones((n_states, ), dtype="float64") # Parameter(s). Should be vector to allow for generaliztion to multiparameter # systems of ODEs. Is m dimensional because it includes all initial conditions as well as ode parameters t_p = aet.vector("p", dtype="float64") t_p.tag.test_value = np.ones((n_states + n_theta, ), dtype="float64") # Time. Allow for non-automonous systems of ODEs to be analyzed t_t = aet.scalar("t", dtype="float64") t_t.tag.test_value = 2.459 # Present state of the gradients: # Will always be 0 unless the parameter is the inital condition # Entry i,j is partial of y[i] wrt to p[j] dydp_vec = aet.vector("dydp", dtype="float64") dydp_vec.tag.test_value = make_sens_ic(n_states, n_theta, "float64") dydp = dydp_vec.reshape((n_states, n_states + n_theta)) # Get symbolic representation of the ODEs by passing tensors for y, t and theta yhat = ode_func(t_y, t_t, t_p[n_states:]) # Stack the results of the ode_func into a single tensor variable if not isinstance(yhat, (list, tuple)): yhat = (yhat, ) t_yhat = aet.stack(yhat, axis=0) # Now compute gradients J = aet.jacobian(t_yhat, t_y) Jdfdy = aet.dot(J, dydp) grad_f = aet.jacobian(t_yhat, t_p) # This is the time derivative of dydp ddt_dydp = (Jdfdy + grad_f).flatten() system = aesara.function(inputs=[t_y, t_t, t_p, dydp_vec], outputs=[t_yhat, ddt_dydp], on_unused_input="ignore") return system
def test_TransMatConjugateStep_subtensors(): # Confirm that Dirichlet/non-Dirichlet mixed rows can be # parsed with pm.Model(): d_0_rv = pm.Dirichlet("p_0", np.r_[1, 1], shape=2) d_1_rv = pm.Dirichlet("p_1", np.r_[1, 1], shape=2) p_0_rv = at.as_tensor([0, 0, 1]) p_1_rv = at.zeros(3) p_1_rv = at.set_subtensor(p_0_rv[[0, 2]], d_0_rv) p_2_rv = at.zeros(3) p_2_rv = at.set_subtensor(p_1_rv[[1, 2]], d_1_rv) P_tt = at.stack([p_0_rv, p_1_rv, p_2_rv]) P_rv = pm.Deterministic("P_tt", at.shape_padleft(P_tt)) DiscreteMarkovChain("S_t", P_rv, np.r_[1, 0, 0], shape=(10, )) transmat = TransMatConjugateStep(P_rv) assert transmat.row_remaps == {0: 1, 1: 2} exp_slices = {0: np.r_[0, 2], 1: np.r_[1, 2]} assert exp_slices.keys() == transmat.row_slices.keys() assert all( np.array_equal(transmat.row_slices[i], exp_slices[i]) for i in exp_slices.keys()) # Same thing, just with some manipulations of the transition matrix with pm.Model(): d_0_rv = pm.Dirichlet("p_0", np.r_[1, 1], shape=2) d_1_rv = pm.Dirichlet("p_1", np.r_[1, 1], shape=2) p_0_rv = at.as_tensor([0, 0, 1]) p_1_rv = at.zeros(3) p_1_rv = at.set_subtensor(p_0_rv[[0, 2]], d_0_rv) p_2_rv = at.zeros(3) p_2_rv = at.set_subtensor(p_1_rv[[1, 2]], d_1_rv) P_tt = at.horizontal_stack(p_0_rv[..., None], p_1_rv[..., None], p_2_rv[..., None]) P_rv = pm.Deterministic("P_tt", at.shape_padleft(P_tt.T)) DiscreteMarkovChain("S_t", P_rv, np.r_[1, 0, 0], shape=(10, )) transmat = TransMatConjugateStep(P_rv) assert transmat.row_remaps == {0: 1, 1: 2} exp_slices = {0: np.r_[0, 2], 1: np.r_[1, 2]} assert exp_slices.keys() == transmat.row_slices.keys() assert all( np.array_equal(transmat.row_slices[i], exp_slices[i]) for i in exp_slices.keys()) # Use an observed `DiscreteMarkovChain` and check the conjugate results with pm.Model(): d_0_rv = pm.Dirichlet("p_0", np.r_[1, 1], shape=2) d_1_rv = pm.Dirichlet("p_1", np.r_[1, 1], shape=2) p_0_rv = at.as_tensor([0, 0, 1]) p_1_rv = at.zeros(3) p_1_rv = at.set_subtensor(p_0_rv[[0, 2]], d_0_rv) p_2_rv = at.zeros(3) p_2_rv = at.set_subtensor(p_1_rv[[1, 2]], d_1_rv) P_tt = at.horizontal_stack(p_0_rv[..., None], p_1_rv[..., None], p_2_rv[..., None]) P_rv = pm.Deterministic("P_tt", at.shape_padleft(P_tt.T)) DiscreteMarkovChain("S_t", P_rv, np.r_[1, 0, 0], shape=(4, ), observed=np.r_[0, 1, 0, 2]) transmat = TransMatConjugateStep(P_rv)
def conv2d( input, filters, image_shape=None, filter_shape=None, border_mode="valid", subsample=(1, 1), **kargs, ): """ signal.conv.conv2d performs a basic 2D convolution of the input with the given filters. The input parameter can be a single 2D image or a 3D tensor, containing a set of images. Similarly, filters can be a single 2D filter or a 3D tensor, corresponding to a set of 2D filters. Shape parameters are optional and will result in faster execution. Parameters ---------- input : Symbolic aesara tensor for images to be filtered. Dimensions: ([num_images], image height, image width) filters : Symbolic aesara tensor for convolution filter(s). Dimensions: ([num_filters], filter height, filter width) border_mode: {'valid', 'full'} See scipy.signal.convolve2d. subsample Factor by which to subsample output. image_shape : tuple of length 2 or 3 ([num_images,] image height, image width). filter_shape : tuple of length 2 or 3 ([num_filters,] filter height, filter width). kwargs See aesara.tensor.nnet.conv.conv2d. Returns ------- symbolic 2D,3D or 4D tensor Tensor of filtered images, with shape ([number images,] [number filters,] image height, image width). """ assert input.ndim in (2, 3) assert filters.ndim in (2, 3) # use shape information if it is given to us ### if filter_shape and image_shape: if input.ndim == 3: bsize = image_shape[0] else: bsize = 1 imshp = (1, ) + tuple(image_shape[-2:]) if filters.ndim == 3: nkern = filter_shape[0] else: nkern = 1 kshp = filter_shape[-2:] else: nkern, kshp = None, None bsize, imshp = None, None # reshape tensors to 4D, for compatibility with ConvOp ### if input.ndim == 3: sym_bsize = input.shape[0] else: sym_bsize = 1 if filters.ndim == 3: sym_nkern = filters.shape[0] else: sym_nkern = 1 new_input_shape = aet.join(0, aet.stack([sym_bsize, 1]), input.shape[-2:]) input4D = reshape(input, new_input_shape, ndim=4) new_filter_shape = aet.join(0, aet.stack([sym_nkern, 1]), filters.shape[-2:]) filters4D = reshape(filters, new_filter_shape, ndim=4) # perform actual convolution ### op = conv.ConvOp( output_mode=border_mode, dx=subsample[0], dy=subsample[1], imshp=imshp, kshp=kshp, nkern=nkern, bsize=bsize, **kargs, ) output = op(input4D, filters4D) # flatten to 3D tensor if convolving with single filter or single image if input.ndim == 2 and filters.ndim == 2: if config.warn__signal_conv2d_interface: warnings.warn( "aesara.tensor.signal.conv2d() now outputs a 2d tensor when both" " inputs are 2d. To disable this warning, set the Aesara flag" " warn__signal_conv2d_interface to False", stacklevel=3, ) output = aet.flatten(output.T, ndim=2).T elif input.ndim == 2 or filters.ndim == 2: output = aet.flatten(output.T, ndim=3).T return output
def convolve( kerns, kshp, nkern, images, imgshp, step=(1, 1), bias=None, mode="valid", flatten=True, ): """Convolution implementation by sparse matrix multiplication. :note: For best speed, put the matrix which you expect to be smaller as the 'kernel' argument "images" is assumed to be a matrix of shape batch_size x img_size, where the second dimension represents each image in raster order If flatten is "False", the output feature map will have shape: .. code-block:: python batch_size x number of kernels x output_size If flatten is "True", the output feature map will have shape: .. code-block:: python batch_size x number of kernels * output_size .. note:: IMPORTANT: note that this means that each feature map (image generate by each kernel) is contiguous in memory. The memory layout will therefore be: [ <feature_map_0> <feature_map_1> ... <feature_map_n>], where <feature_map> represents a "feature map" in raster order kerns is a 2D tensor of shape nkern x N.prod(kshp) :param kerns: 2D tensor containing kernels which are applied at every pixel :param kshp: tuple containing actual dimensions of kernel (not symbolic) :param nkern: number of kernels/filters to apply. nkern=1 will apply one common filter to all input pixels :param images: tensor containing images on which to apply convolution :param imgshp: tuple containing image dimensions :param step: determines number of pixels between adjacent receptive fields (tuple containing dx,dy values) :param mode: 'full', 'valid' see CSM.evaluate function for details :param sumdims: dimensions over which to sum for the tensordot operation. By default ((2,),(1,)) assumes kerns is a nkern x kernsize matrix and images is a batchsize x imgsize matrix containing flattened images in raster order :param flatten: flatten the last 2 dimensions of the output. By default, instead of generating a batchsize x outsize x nkern tensor, will flatten to batchsize x outsize*nkern :return: out1, symbolic result :return: out2, logical shape of the output img (nkern,heigt,width) :TODO: test for 1D and think of how to do n-d convolutions """ # start by computing output dimensions, size, etc kern_size = np.int64(np.prod(kshp)) # inshp contains either 2 entries (height,width) or 3 (nfeatures,h,w) # in the first case, default nfeatures to 1 if np.size(imgshp) == 2: imgshp = (1, ) + imgshp # construct indices and index pointers for sparse matrix, which, # when multiplied with input images will generate a stack of image # patches indices, indptr, spmat_shape, sptype, outshp = convolution_indices.conv_eval( imgshp, kshp, step, mode) # build sparse matrix, then generate stack of image patches csc = aesara.sparse.CSM(sptype)(np.ones(indices.size), indices, indptr, spmat_shape) patches = (sparse.structured_dot(csc, images.T)).T # compute output of linear classifier pshape = aet.stack([ images.shape[0] * aet.as_tensor(np.prod(outshp)), aet.as_tensor(imgshp[0] * kern_size), ]) patch_stack = reshape(patches, pshape, ndim=2) # kern is of shape: nkern x ksize*number_of_input_features # output is thus of shape: bsize*outshp x nkern output = dot(patch_stack, kerns.T) # add bias across each feature map (more efficient to do it now) if bias is not None: output += bias # now to have feature maps in raster order ... # go from bsize*outshp x nkern to bsize x nkern*outshp newshp = aet.stack([ images.shape[0], aet.as_tensor(np.prod(outshp)), aet.as_tensor(nkern) ]) tensout = reshape(output, newshp, ndim=3) output = DimShuffle((False, ) * tensout.ndim, (0, 2, 1))(tensout) if flatten: output = aet.flatten(output, 2) return output, np.hstack((nkern, outshp))
def _comp_logp(self, value): comp_dists = self.comp_dists if self.comp_is_distribution: # Value can be many things. It can be the self tensor, the mode # test point or it can be observed data. The latter case requires # careful handling of shape, as the observed's shape could look # like (repetitions,) + dist_shape, which does not include the last # mixture axis. For this reason, we try to eval the value.shape, # compare it with self.shape and shape_padright if we infer that # the value holds observed data try: val_shape = tuple(value.shape.eval()) except AttributeError: val_shape = value.shape except aesara.graph.fg.MissingInputError: val_shape = None try: self_shape = tuple(self.shape) except AttributeError: # Happens in __init__ when computing self.logp(comp_modes) self_shape = None comp_shape = tuple(comp_dists.shape) ndim = value.ndim if val_shape is not None and not ( (self_shape is not None and val_shape == self_shape) or val_shape == comp_shape): # value is neither the test point nor the self tensor, it # is likely to hold observed values, so we must compute the # ndim discarding the dimensions that don't match # self_shape if self_shape and val_shape[-len(self_shape):] == self_shape: # value has observed values for the Mixture ndim = len(self_shape) elif comp_shape and val_shape[-len(comp_shape):] == comp_shape: # value has observed for the Mixture components ndim = len(comp_shape) else: # We cannot infer what was passed, we handle this # as was done in earlier versions of Mixture. We pad # always if ndim is lower or equal to 1 (default # legacy implementation) if ndim <= 1: ndim = len(comp_dists.shape) - 1 else: # We reach this point if value does not hold observed data, so # we can use its ndim safely to determine shape padding, or it # holds something that we cannot infer, so we revert to using # the value's ndim for shape padding. # We will always pad a single dimension if ndim is lower or # equal to 1 (default legacy implementation) if ndim <= 1: ndim = len(comp_dists.shape) - 1 if ndim < len(comp_dists.shape): value_ = at.shape_padright(value, len(comp_dists.shape) - ndim) else: value_ = value return comp_dists.logp(value_) else: return at.squeeze( at.stack([comp_dist.logp(value) for comp_dist in comp_dists], axis=-1))