Beispiel #1
0
 def func(chol_vec, delta):
     chol = at.stack([
         at.stack([at.exp(0.1 * chol_vec[0]), 0]),
         at.stack([chol_vec[1], 2 * at.exp(chol_vec[2])]),
     ])
     cov = at.dot(chol, chol.T)
     return MvNormalLogp()(cov, delta)
Beispiel #2
0
def max_pool(images, imgshp, maxpoolshp):
    """Implements a max pooling layer

    Takes as input a 2D tensor of shape batch_size x img_size and
    performs max pooling.  Max pooling downsamples by taking the max
    value in a given area, here defined by maxpoolshp. Outputs a 2D
    tensor of shape batch_size x output_size.

    :param images: 2D tensor containing images on which to apply convolution.
                   Assumed to be of shape batch_size x img_size
    :param imgshp: tuple containing image dimensions
    :param maxpoolshp: tuple containing shape of area to max pool over

    :return: out1, symbolic result (2D tensor)
    :return: out2, logical shape of the output
    """
    poolsize = np.int64(np.prod(maxpoolshp))

    # imgshp contains either 2 entries (height,width) or 3 (nfeatures,h,w)
    # in the first case, default nfeatures to 1
    if np.size(imgshp) == 2:
        imgshp = (1, ) + imgshp

    # construct indices and index pointers for sparse matrix, which,
    # when multiplied with input images will generate a stack of image
    # patches
    indices, indptr, spmat_shape, sptype, outshp = convolution_indices.conv_eval(
        imgshp, maxpoolshp, maxpoolshp, mode="valid")

    #    print 'XXXXXXXXXXXXXXXX MAX POOLING LAYER XXXXXXXXXXXXXXXXXXXX'
    #    print 'imgshp = ', imgshp
    #    print 'maxpoolshp = ', maxpoolshp
    #    print 'outshp = ', outshp

    # build sparse matrix, then generate stack of image patches
    csc = aesara.sparse.CSM(sptype)(np.ones(indices.size), indices, indptr,
                                    spmat_shape)
    patches = sparse.structured_dot(csc, images.T).T

    pshape = aet.stack([
        images.shape[0] * aet.as_tensor(np.prod(outshp)),
        aet.as_tensor(imgshp[0]),
        aet.as_tensor(poolsize),
    ])
    patch_stack = reshape(patches, pshape, ndim=3)

    out1 = tt_max(patch_stack, axis=2)

    pshape = aet.stack([
        images.shape[0],
        aet.as_tensor(np.prod(outshp)),
        aet.as_tensor(imgshp[0]),
    ])
    out2 = reshape(out1, pshape, ndim=3)

    out3 = DimShuffle(out2.broadcastable, (0, 2, 1))(out2)

    return aet.flatten(out3, 2), outshp
Beispiel #3
0
 def test_hessian(self):
     chol_vec = at.vector("chol_vec")
     chol_vec.tag.test_value = np.array([0.1, 2, 3])
     chol = at.stack([
         at.stack([at.exp(0.1 * chol_vec[0]), 0]),
         at.stack([chol_vec[1], 2 * at.exp(chol_vec[2])]),
     ])
     cov = at.dot(chol, chol.T)
     delta = at.matrix("delta")
     delta.tag.test_value = np.ones((5, 2))
     logp = MvNormalLogp()(cov, delta)
     g_cov, g_delta = at.grad(logp, [cov, delta])
     at.grad(g_delta.sum() + g_cov.sum(), [delta, cov])
Beispiel #4
0
 def test_hessian(self):
     chol_vec = at.vector("chol_vec")
     chol_vec.tag.test_value = floatX(np.array([0.1, 2, 3]))
     chol = at.stack([
         at.stack([at.exp(0.1 * chol_vec[0]), 0]),
         at.stack([chol_vec[1], 2 * at.exp(chol_vec[2])]),
     ])
     cov = at.dot(chol, chol.T)
     delta = at.matrix("delta")
     delta.tag.test_value = floatX(np.ones((5, 2)))
     logp = MvNormalLogp()(cov, delta)
     g_cov, g_delta = at.grad(logp, [cov, delta])
     # TODO: What's the test?  Something needs to be asserted.
     at.grad(g_delta.sum() + g_cov.sum(), [delta, cov])
Beispiel #5
0
def marginal_mixture_logcdf(op, value, rng, weights, *components, **kwargs):

    # single component
    if len(components) == 1:
        # Need to broadcast value across mixture axis
        mix_axis = -components[0].owner.op.ndim_supp - 1
        components_logcdf = logcdf(components[0],
                                   at.expand_dims(value, mix_axis))
    else:
        components_logcdf = at.stack(
            [logcdf(component, value) for component in components],
            axis=-1,
        )

    mix_logcdf = at.logsumexp(at.log(weights) + components_logcdf, axis=-1)

    mix_logcdf = check_parameters(
        mix_logcdf,
        0 <= weights,
        weights <= 1,
        at.isclose(at.sum(weights, axis=-1), 1),
        msg="0 <= weights <= 1, sum(weights) == 1",
    )

    return mix_logcdf
Beispiel #6
0
def marginal_mixture_logcdf(op, value, rng, weights, *components, **kwargs):

    # single component
    if len(components) == 1:
        # Need to broadcast value across mixture axis
        mix_axis = -components[0].owner.op.ndim_supp - 1
        components_logcdf = logcdf(components[0],
                                   at.expand_dims(value, mix_axis))
    else:
        components_logcdf = at.stack(
            [logcdf(component, value) for component in components],
            axis=-1,
        )

    mix_logcdf = at.logsumexp(at.log(weights) + components_logcdf, axis=-1)

    # Squeeze stack dimension
    # There is a Aesara bug in squeeze with negative axis
    # https://github.com/aesara-devs/aesara/issues/830
    # mix_logp = at.squeeze(mix_logp, axis=-1)
    mix_logcdf = at.squeeze(mix_logcdf, axis=mix_logcdf.ndim - 1)

    mix_logcdf = check_parameters(
        mix_logcdf,
        0 <= weights,
        weights <= 1,
        at.isclose(at.sum(weights, axis=-1), 1),
        msg="0 <= weights <= 1, sum(weights) == 1",
    )

    return mix_logcdf
Beispiel #7
0
 def _comp_modes(self):
     try:
         return at.as_tensor_variable(self.comp_dists.mode)
     except AttributeError:
         return at.squeeze(
             at.stack([comp_dist.mode for comp_dist in self.comp_dists],
                      axis=-1))
Beispiel #8
0
def simulate_poiszero_hmm(N,
                          mu=10.0,
                          pi_0_a=np.r_[1, 1],
                          p_0_a=np.r_[5, 1],
                          p_1_a=np.r_[1, 1]):

    with pm.Model() as test_model:
        p_0_rv = pm.Dirichlet("p_0", p_0_a, shape=np.shape(pi_0_a))
        p_1_rv = pm.Dirichlet("p_1", p_1_a, shape=np.shape(pi_0_a))

        P_tt = at.stack([p_0_rv, p_1_rv])
        P_rv = pm.Deterministic("P_tt", at.shape_padleft(P_tt))

        pi_0_tt = pm.Dirichlet("pi_0", pi_0_a, shape=np.shape(pi_0_a))

        S_rv = DiscreteMarkovChain("S_t", P_rv, pi_0_tt, shape=N)

        PoissonZeroProcess("Y_t", mu, S_rv, observed=np.zeros(N))

        sample_point = pm.sample_prior_predictive(samples=1)

        # Remove the extra "sampling" dimension from the sample results
        sample_point = {k: v.squeeze(0) for k, v in sample_point.items()}
        # Remove the extra dimension added due to `pm.sample_prior_predictive`
        # forcing `size=1` in its call to `test_model.Y_t.random`.
        sample_point["Y_t"] = sample_point["Y_t"].squeeze(0)

    return sample_point, test_model
Beispiel #9
0
def test_FFBSStep_extreme():
    """Test a long series with extremely large mixture separation (and, thus, very small likelihoods)."""  # noqa: E501

    np.random.seed(2032)

    mu_true = 5000
    poiszero_sim, _ = simulate_poiszero_hmm(9000, mu_true)
    y_test = poiszero_sim["Y_t"]

    with pm.Model() as test_model:
        p_0_rv = poiszero_sim["p_0"]
        p_1_rv = poiszero_sim["p_1"]

        P_tt = at.stack([p_0_rv, p_1_rv])
        P_rv = pm.Deterministic("P_tt", at.shape_padleft(P_tt))

        pi_0_tt = poiszero_sim["pi_0"]

        S_rv = DiscreteMarkovChain("S_t", P_rv, pi_0_tt, shape=y_test.shape[0])
        S_rv.tag.test_value = (y_test > 0).astype(int)

        # This prior is very far from the true value...
        E_mu, Var_mu = 100.0, 10000.0
        mu_rv = pm.Gamma("mu", E_mu**2 / Var_mu, E_mu / Var_mu)

        PoissonZeroProcess("Y_t", mu_rv, S_rv, observed=y_test)

    with test_model:
        ffbs = FFBSStep([S_rv])

    test_point = test_model.test_point.copy()
    test_point["p_0_stickbreaking__"] = poiszero_sim["p_0_stickbreaking__"]
    test_point["p_1_stickbreaking__"] = poiszero_sim["p_1_stickbreaking__"]

    with np.errstate(over="ignore", under="ignore"):
        res = ffbs.step(test_point)

    assert np.array_equal(res["S_t"], poiszero_sim["S_t"])

    with test_model, np.errstate(over="ignore",
                                 under="ignore"), warnings.catch_warnings():
        warnings.filterwarnings("ignore", category=UserWarning)
        warnings.filterwarnings("ignore", category=DeprecationWarning)
        warnings.filterwarnings("ignore", category=FutureWarning)
        mu_step = pm.NUTS([mu_rv])
        ffbs = FFBSStep([S_rv])
        steps = [ffbs, mu_step]
        trace = pm.sample(
            20,
            step=steps,
            cores=1,
            chains=1,
            tune=100,
            n_init=100,
            progressbar=False,
        )

        assert not trace.get_sampler_stats("diverging").all()
        assert trace["mu"].mean() > 1000.0
Beispiel #10
0
def test_FFBSStep():

    with pm.Model(), pytest.raises(ValueError):
        P_rv = np.eye(2)[None, ...]
        S_rv = DiscreteMarkovChain("S_t", P_rv, np.r_[1.0, 0.0], shape=10)
        S_2_rv = DiscreteMarkovChain("S_2_t", P_rv, np.r_[0.0, 1.0], shape=10)
        PoissonZeroProcess("Y_t",
                           9.0,
                           S_rv + S_2_rv,
                           observed=np.random.poisson(9.0, size=10))
        # Only one variable can be sampled by this step method
        ffbs = FFBSStep([S_rv, S_2_rv])

    with pm.Model(), pytest.raises(TypeError):
        S_rv = pm.Categorical("S_t", np.r_[1.0, 0.0], shape=10)
        PoissonZeroProcess("Y_t",
                           9.0,
                           S_rv,
                           observed=np.random.poisson(9.0, size=10))
        # Only `DiscreteMarkovChains` can be sampled with this step method
        ffbs = FFBSStep([S_rv])

    with pm.Model(), pytest.raises(TypeError):
        P_rv = np.eye(2)[None, ...]
        S_rv = DiscreteMarkovChain("S_t", P_rv, np.r_[1.0, 0.0], shape=10)
        pm.Poisson("Y_t", S_rv, observed=np.random.poisson(9.0, size=10))
        # Only `SwitchingProcess`es can used as dependent variables
        ffbs = FFBSStep([S_rv])

    np.random.seed(2032)

    poiszero_sim, _ = simulate_poiszero_hmm(30, 150)
    y_test = poiszero_sim["Y_t"]

    with pm.Model() as test_model:
        p_0_rv = pm.Dirichlet("p_0", np.r_[1, 1], shape=2)
        p_1_rv = pm.Dirichlet("p_1", np.r_[1, 1], shape=2)

        P_tt = at.stack([p_0_rv, p_1_rv])
        P_rv = pm.Deterministic("P_tt", at.shape_padleft(P_tt))

        pi_0_tt = compute_steady_state(P_rv)

        S_rv = DiscreteMarkovChain("S_t", P_rv, pi_0_tt, shape=y_test.shape[0])

        PoissonZeroProcess("Y_t", 9.0, S_rv, observed=y_test)

    with test_model:
        ffbs = FFBSStep([S_rv])

    test_point = test_model.test_point.copy()
    test_point["p_0_stickbreaking__"] = poiszero_sim["p_0_stickbreaking__"]
    test_point["p_1_stickbreaking__"] = poiszero_sim["p_1_stickbreaking__"]

    res = ffbs.step(test_point)

    assert np.array_equal(res["S_t"], poiszero_sim["S_t"])
Beispiel #11
0
 def grad(self, inputs, gout):
     shapes = at.stack([i.shape for i in inputs])
     index_end = shapes.cumsum(0)
     index_begin = index_end - shapes
     slices = [
         ix_(
             at.arange(index_begin[i, 0], index_end[i, 0]),
             at.arange(index_begin[i, 1], index_end[i, 1]),
         ) for i in range(len(inputs))
     ]
     return [gout[0][slc] for slc in slices]
Beispiel #12
0
    def __init__(self, w, comp_dists, *args, **kwargs):
        # comp_dists type checking
        if not (
            isinstance(comp_dists, Distribution)
            or (
                isinstance(comp_dists, Iterable)
                and all(isinstance(c, Distribution) for c in comp_dists)
            )
        ):
            raise TypeError(
                "Supplied Mixture comp_dists must be a "
                "Distribution or an iterable of "
                "Distributions. Got {} instead.".format(
                    type(comp_dists)
                    if not isinstance(comp_dists, Iterable)
                    else [type(c) for c in comp_dists]
                )
            )
        shape = kwargs.pop("shape", ())

        self.w = w = at.as_tensor_variable(w)
        self.comp_dists = comp_dists

        defaults = kwargs.pop("defaults", [])

        if all_discrete(comp_dists):
            default_dtype = _conversion_map[aesara.config.floatX]
        else:
            default_dtype = aesara.config.floatX

            try:
                self.mean = (w * self._comp_means()).sum(axis=-1)

                if "mean" not in defaults:
                    defaults.append("mean")
            except AttributeError:
                pass
        dtype = kwargs.pop("dtype", default_dtype)

        try:
            if isinstance(comp_dists, Distribution):
                comp_mode_logps = comp_dists.logp(comp_dists.mode)
            else:
                comp_mode_logps = at.stack([cd.logp(cd.mode) for cd in comp_dists])

            mode_idx = at.argmax(at.log(w) + comp_mode_logps, axis=-1)
            self.mode = self._comp_modes()[mode_idx]

            if "mode" not in defaults:
                defaults.append("mode")
        except (AttributeError, ValueError, IndexError):
            pass

        super().__init__(shape, dtype, defaults=defaults, *args, **kwargs)
Beispiel #13
0
def _check_size(size):
    """
    Canonicalise inputs to get valid output sizes for Aesara tensors.

    Parameters
    ----------
    size : int_vector_like
        Some variable that could serve as the shape for an Aesara tensor.
        This can be an int, a tuple of ints, a list of ints
        or an Aesara Variable with similar properties.

    Returns
    -------
    size_var : int_vector
        A one-dimensional Aesara variable encapsulating the given size.

    Raises
    ------
    ValueError
        If this method can not build a valid size from the input.
    """
    # non-tuple checks and scalar-to-tuple transform
    if isinstance(size, Variable):
        if size.ndim == 1:
            return size
        elif size.ndim == 0:
            return at.stack([size], ndim=1)
        else:
            raise ValueError(
                "Aesara variable must have 1 dimension to be a valid size.",
                size)
    elif isinstance(size, (np.integer, int)):
        return at.constant([size], ndim=1)
    elif not isinstance(size, (tuple, list)):
        raise ValueError("Size must be a int, tuple, list or Aesara variable.",
                         size)

    # check entries of list or tuple
    for i in size:
        if isinstance(i, Variable):
            if i.ndim != 0:
                raise ValueError("Non-scalar Aesara variable in size", size, i)
        elif isinstance(i, (np.integer, int)):
            if i <= 0:
                raise ValueError(
                    "Non-positive dimensions not allowed in size.", size, i)
        else:
            raise ValueError(
                "Only Aesara variables and integers are allowed in a size-tuple.",
                size,
                i,
            )

    return at.as_tensor_variable(size, ndim=1)
Beispiel #14
0
    def grad(self, inputs, output_grads):
        _log.debug(f"grad w.r.t. inputs {hash(tuple(inputs))}")

        # fetch symbolic sensitivity output node from cache
        ihash = hash(tuple(inputs))
        if ihash in self._output_sensitivities:
            sens = self._output_sensitivities[ihash]
        else:
            _log.debug("No cached sensitivities found!")
            _, sens = self.__call__(*inputs, return_sens=True)
        ograds = output_grads[0]

        # for each parameter, multiply sensitivities with the output gradient and sum the result
        # sens is (n_times, n_states, n_p)
        # ograds is (n_times, n_states)
        grads = [at.sum(sens[:, :, p] * ograds) for p in range(self.n_p)]

        # return separate gradient tensors for y0 and theta inputs
        result = at.stack(grads[:self.n_states]), at.stack(
            grads[self.n_states:])
        return result
Beispiel #15
0
 def __init__(
     self,
     x,
     y,
     intercept=True,
     labels=None,
     priors=None,
     vars=None,
     name="",
     model=None,
     offset=0.0,
 ):
     super().__init__(name, model)
     if len(y.shape) > 1:
         err_msg = ("Only one-dimensional observed variable objects (i.e."
                    " of shape `(n, )`) are supported")
         raise TypeError(err_msg)
     if priors is None:
         priors = {}
     if vars is None:
         vars = {}
     x, labels = any_to_tensor_and_labels(x, labels)
     # now we have x, shape and labels
     if intercept:
         x = at.concatenate([at.ones((x.shape[0], 1), x.dtype), x], axis=1)
         labels = ["Intercept"] + labels
     coeffs = list()
     for name in labels:
         if name == "Intercept":
             if name in vars:
                 v = Deterministic(name, vars[name])
             else:
                 v = self.Var(name=name,
                              dist=priors.get(name,
                                              self.default_intercept_prior))
             coeffs.append(v)
         else:
             if name in vars:
                 v = Deterministic(name, vars[name])
             else:
                 v = self.Var(
                     name=name,
                     dist=priors.get(
                         name,
                         priors.get("Regressor",
                                    self.default_regressor_prior)),
                 )
             coeffs.append(v)
     self.coeffs = at.stack(coeffs, axis=0)
     self.y_est = x.dot(self.coeffs) + offset
Beispiel #16
0
    def __init__(self, vars, values=None, model=None):

        if len(vars) > 1:
            raise ValueError("This sampler only takes one variable.")

        (var, ) = pm.inputvars(vars)

        if not isinstance(var.distribution, DiscreteMarkovChain):
            raise TypeError(
                "This sampler only samples `DiscreteMarkovChain`s.")

        model = pm.modelcontext(model)

        self.vars = [var]

        self.dependent_rvs = [
            v for v in model.basic_RVs
            if v is not var and var in graph_inputs([v.logpt])
        ]

        dep_comps_logp_stacked = []
        for i, dependent_rv in enumerate(self.dependent_rvs):
            if isinstance(dependent_rv.distribution, SwitchingProcess):
                comp_logps = []

                # Get the log-likelihoood sequences for each state in this
                # `SwitchingProcess` observations distribution
                for comp_dist in dependent_rv.distribution.comp_dists:
                    comp_logps.append(comp_dist.logp(dependent_rv))

                comp_logp_stacked = at.stack(comp_logps)
            else:
                raise TypeError(
                    "This sampler only supports `SwitchingProcess` observations"
                )

            dep_comps_logp_stacked.append(comp_logp_stacked)

        comp_logp_stacked = at.sum(dep_comps_logp_stacked, axis=0)

        (M, ) = draw_values([var.distribution.gamma_0.shape[-1]],
                            point=model.test_point)
        N = model.test_point[var.name].shape[-1]
        self.alphas = np.empty((M, N), dtype=float)

        self.log_lik_states = model.fn(comp_logp_stacked)
        self.gamma_0_fn = model.fn(var.distribution.gamma_0)
        self.Gammas_fn = model.fn(var.distribution.Gammas)
Beispiel #17
0
def test_TransMatConjugateStep():

    with pm.Model() as test_model, pytest.raises(ValueError):
        p_0_rv = pm.Dirichlet("p_0", np.r_[1, 1], shape=2)
        transmat = TransMatConjugateStep(p_0_rv)

    np.random.seed(2032)

    poiszero_sim, _ = simulate_poiszero_hmm(30, 150)
    y_test = poiszero_sim["Y_t"]

    with pm.Model() as test_model:
        p_0_rv = pm.Dirichlet("p_0", np.r_[1, 1], shape=2)
        p_1_rv = pm.Dirichlet("p_1", np.r_[1, 1], shape=2)

        P_tt = at.stack([p_0_rv, p_1_rv])
        P_rv = pm.Deterministic("P_tt", at.shape_padleft(P_tt))

        pi_0_tt = compute_steady_state(P_rv)

        S_rv = DiscreteMarkovChain("S_t", P_rv, pi_0_tt, shape=y_test.shape[0])

        PoissonZeroProcess("Y_t", 9.0, S_rv, observed=y_test)

    with test_model:
        transmat = TransMatConjugateStep(P_rv)

    test_point = test_model.test_point.copy()
    test_point["S_t"] = (y_test > 0).astype(int)

    res = transmat.step(test_point)

    p_0_smpl = get_test_value(
        p_0_rv.distribution.transform.backward(res[p_0_rv.transformed.name]))
    p_1_smpl = get_test_value(
        p_1_rv.distribution.transform.backward(res[p_1_rv.transformed.name]))

    sampled_trans_mat = np.stack([p_0_smpl, p_1_smpl])

    true_trans_mat = (
        compute_trans_freqs(poiszero_sim["S_t"], 2, counts_only=True) +
        np.c_[[1, 1], [1, 1]])
    true_trans_mat = true_trans_mat / true_trans_mat.sum(0)[..., None]

    assert np.allclose(sampled_trans_mat, true_trans_mat, atol=0.3)
Beispiel #18
0
 def infer_shape(self, node, in_shapes):
     shape_a = in_shapes[0]
     n = node.inputs[1]
     axis = node.inputs[2]
     if len(shape_a) == 1:
         return [(n, )]
     elif isinstance(axis, tensor.TensorConstant):
         out_shape = (list(shape_a[0:axis.data.item()]) + [n] +
                      list(shape_a[axis.data + 1:]))
     else:
         l = len(shape_a)
         shape_a = tensor.stack(shape_a)
         out_shape = tensor.concatenate(
             (shape_a[0:axis], [n], shape_a[axis + 1:]))
         n_splits = [1] * l
         out_shape = tensor.split(out_shape, n_splits, l)
         out_shape = [a[0] for a in out_shape]
     return [out_shape]
Beispiel #19
0
def marginal_mixture_moment(op, rv, rng, weights, *components):
    ndim_supp = components[0].owner.op.ndim_supp
    weights = at.shape_padright(weights, ndim_supp)
    mix_axis = -ndim_supp - 1

    if len(components) == 1:
        moment_components = moment(components[0])

    else:
        moment_components = at.stack(
            [moment(component) for component in components],
            axis=mix_axis,
        )

    mix_moment = at.sum(weights * moment_components, axis=mix_axis)
    if components[0].dtype in discrete_types:
        mix_moment = at.round(mix_moment)
    return mix_moment
Beispiel #20
0
def test_only_positive_state():
    number_of_draws = 50
    S = 2
    mu = 10
    y_t = np.repeat(0, 100)

    with pm.Model():
        p_0_rv = pm.Dirichlet("p_0", np.r_[1, 1], shape=2)
        p_1_rv = pm.Dirichlet("p_1", np.r_[1, 1], shape=2)

        P_tt = at.stack([p_0_rv, p_1_rv])
        Gammas_tt = pm.Deterministic("P_tt", at.shape_padleft(P_tt))

        gamma_0_rv = pm.Dirichlet("gamma_0", np.ones((S, )), shape=S)

        V_rv = DiscreteMarkovChain("V_t",
                                   Gammas_tt,
                                   gamma_0_rv,
                                   shape=y_t.shape[0])
        V_rv.tag.test_value = (y_t > 0) * 1

        _ = SwitchingProcess(
            "Y_t",
            [Constant.dist(np.array(0, dtype=np.int64)),
             Constant.dist(mu)],
            V_rv,
            observed=y_t,
        )

        posterior_trace = pm.sample(
            chains=1,
            draws=number_of_draws,
            return_inferencedata=True,
            step=FFBSStep([V_rv]),
        )

        posterior_pred_trace = pm.sample_posterior_predictive(
            posterior_trace.posterior, var_names=["Y_t"])
        assert np.all(posterior_pred_trace["Y_t"] == 0)
Beispiel #21
0
def create_dirac_zero_hmm(X, mu, xis, observed):
    S = 2
    z_tt = at.stack([at.dot(X, xis[..., s, :]) for s in range(S)], axis=1)
    Gammas_tt = pm.Deterministic("Gamma", multilogit_inv(z_tt))
    gamma_0_rv = pm.Dirichlet("gamma_0", np.ones((S,)), shape=S)

    if type(observed) == np.ndarray:
        T = X.shape[0]
    else:
        T = X.get_value().shape[0]

    V_rv = DiscreteMarkovChain("V_t", Gammas_tt, gamma_0_rv, shape=T)
    if type(observed) == np.ndarray:
        V_rv.tag.test_value = (observed > 0) * 1
    else:
        V_rv.tag.test_value = (observed.get_value() > 0) * 1
    Y_rv = SwitchingProcess(
        "Y_t",
        [pm.Constant.dist(0), pm.Constant.dist(mu)],
        V_rv,
        observed=observed,
    )
    return Y_rv
def neibs2images(neibs, neib_shape, original_shape, mode="valid"):
    """
    Function :func:`neibs2images <aesara.sandbox.neighbours.neibs2images>`
    performs the inverse operation of
    :func:`images2neibs <aesara.sandbox.neigbours.neibs2images>`. It inputs
    the output of :func:`images2neibs <aesara.sandbox.neigbours.neibs2images>`
    and reconstructs its input.

    Parameters
    ----------
    neibs : 2d tensor
        Like the one obtained by
        :func:`images2neibs <aesara.sandbox.neigbours.neibs2images>`.
    neib_shape
        `neib_shape` that was used in
        :func:`images2neibs <aesara.sandbox.neigbours.neibs2images>`.
    original_shape
        Original shape of the 4d tensor given to
        :func:`images2neibs <aesara.sandbox.neigbours.neibs2images>`

    Returns
    -------
    object
        Reconstructs the input of
        :func:`images2neibs <aesara.sandbox.neigbours.neibs2images>`,
        a 4d tensor of shape `original_shape`.

    Notes
    -----
    Currently, the function doesn't support tensors created with
    `neib_step` different from default value. This means that it may be
    impossible to compute the gradient of a variable gained by
    :func:`images2neibs <aesara.sandbox.neigbours.neibs2images>` w.r.t.
    its inputs in this case, because it uses
    :func:`images2neibs <aesara.sandbox.neigbours.neibs2images>` for
    gradient computation.

    Examples
    --------
    Example, which uses a tensor gained in example for
    :func:`images2neibs <aesara.sandbox.neigbours.neibs2images>`:

    .. code-block:: python

        im_new = neibs2images(neibs, (5, 5), im_val.shape)
        # Aesara function definition
        inv_window = aesara.function([neibs], im_new)
        # Function application
        im_new_val = inv_window(neibs_val)

    .. note:: The code will output the initial image array.

    """
    neibs = tt.as_tensor_variable(neibs)
    neib_shape = tt.as_tensor_variable(neib_shape)
    original_shape = tt.as_tensor_variable(original_shape)

    new_neib_shape = tt.stack(
        [original_shape[-1] // neib_shape[1], neib_shape[1]])
    output_2d = images2neibs(neibs.dimshuffle("x", "x", 0, 1),
                             new_neib_shape,
                             mode=mode)

    if mode == "ignore_borders":
        # We use set_subtensor to accept original_shape we can't infer
        # the shape and still raise error when it don't have the right
        # shape.
        valid_shape = original_shape
        valid_shape = tt.set_subtensor(
            valid_shape[2], (valid_shape[2] // neib_shape[0]) * neib_shape[0])
        valid_shape = tt.set_subtensor(
            valid_shape[3], (valid_shape[3] // neib_shape[1]) * neib_shape[1])
        output_4d = output_2d.reshape(valid_shape, ndim=4)
        # padding the borders with zeros
        for d in [2, 3]:
            pad_shape = list(output_4d.shape)
            pad_shape[d] = original_shape[d] - valid_shape[d]
            output_4d = tt.concatenate(
                [output_4d, tt.zeros(pad_shape)], axis=d)
    elif mode == "valid":
        # TODO: we do not implement all mode with this code.
        # Add a check for the good cases.
        output_4d = output_2d.reshape(original_shape, ndim=4)
    else:
        raise NotImplementedError("neibs2images do not support mode=%s" % mode)

    return output_4d
Beispiel #23
0
def test_ScanArgs_basics_mit_sot():

    srng = at.random.RandomStream()

    N_tt = at.iscalar("N")
    N_tt.tag.test_value = 10
    M_tt = at.iscalar("M")
    M_tt.tag.test_value = 2

    mus_tt = at.matrix("mus")
    mus_tt.tag.test_value = np.stack(
        [np.arange(0.0, 10), np.arange(0.0, -10, -1)],
        axis=-1).astype(aesara.config.floatX)

    sigmas_tt = at.ones((N_tt, ))
    sigmas_tt.name = "sigmas"

    pi_0_rv = srng.dirichlet(at.ones((M_tt, )), name="pi_0")
    Gamma_rv = srng.dirichlet(at.ones((M_tt, M_tt)), name="Gamma")

    S_0_rv = srng.categorical(pi_0_rv, name="S_0")

    def scan_fn(mus_t, sigma_t, S_tm2, S_tm1, Gamma_t):
        S_t = srng.categorical(Gamma_t[S_tm2], name="S_t")
        Y_t = srng.normal(mus_t[S_tm1], sigma_t, name="Y_t")
        return S_t, Y_t

    (S_rv, Y_rv), scan_updates = aesara.scan(
        fn=scan_fn,
        sequences=[mus_tt, sigmas_tt],
        non_sequences=[Gamma_rv],
        outputs_info=[{
            "initial": at.stack([S_0_rv, S_0_rv]),
            "taps": [-2, -1]
        }, {}],
        strict=True,
        name="scan_rv",
    )
    # Adding names should make output easier to read
    Y_rv.name = "Y_rv"
    # This `S_rv` outer-output is actually a `Subtensor` of the "real" output
    S_rv = S_rv.owner.inputs[0]
    S_rv.name = "S_rv"
    mus_in = Y_rv.owner.inputs[1]
    mus_in.name = "mus_in"
    sigmas_in = Y_rv.owner.inputs[2]
    sigmas_in.name = "sigmas_in"

    scan_args = ScanArgs.from_node(Y_rv.owner)

    test_v = scan_args.inner_in_mit_sot[0][1]
    field_info = scan_args.find_among_fields(test_v)

    assert field_info.name == "inner_in_mit_sot"
    assert field_info.index == 0
    assert field_info.inner_index == 1
    assert field_info.agg_index == 3

    rm_info = scan_args._remove_from_fields(at.ones(2))
    assert rm_info is None

    rm_info = scan_args._remove_from_fields(test_v)

    assert rm_info.name == "inner_in_mit_sot"
    assert rm_info.index == 0
    assert rm_info.inner_index == 1
    assert rm_info.agg_index == 3
Beispiel #24
0
    def rv_op(cls, weights, *components, size=None):
        # Create new rng for the mix_indexes internal RV
        mix_indexes_rng = aesara.shared(np.random.default_rng())

        single_component = len(components) == 1
        ndim_supp = components[0].owner.op.ndim_supp

        if size is not None:
            components = cls._resize_components(size, *components)
        elif not single_component:
            # We might need to broadcast components when size is not specified
            shape = tuple(at.broadcast_shape(*components))
            size = shape[:len(shape) - ndim_supp]
            components = cls._resize_components(size, *components)

        # Extract replication ndims from components and weights
        ndim_batch = components[0].ndim - ndim_supp
        if single_component:
            # One dimension is taken by the mixture axis in the single component case
            ndim_batch -= 1

        # The weights may imply extra batch dimensions that go beyond what is already
        # implied by the component dimensions (ndim_batch)
        weights_ndim_batch = max(0, weights.ndim - ndim_batch - 1)

        # If weights are large enough that they would broadcast the component distributions
        # we try to resize them. This in necessary to avoid duplicated values in the
        # random method and for equivalency with the logp method
        if weights_ndim_batch:
            new_size = at.concatenate([
                weights.shape[:weights_ndim_batch],
                components[0].shape[:ndim_batch],
            ])
            components = cls._resize_components(new_size, *components)

            # Extract support and batch ndims from components and weights
            ndim_batch = components[0].ndim - ndim_supp
            if single_component:
                ndim_batch -= 1
            weights_ndim_batch = max(0, weights.ndim - ndim_batch - 1)

        assert weights_ndim_batch == 0

        # Component RVs terms are accounted by the Mixture logprob, so they can be
        # safely ignored by Aeppl
        components = [ignore_logprob(component) for component in components]

        # Create a OpFromGraph that encapsulates the random generating process
        # Create dummy input variables with the same type as the ones provided
        weights_ = weights.type()
        components_ = [component.type() for component in components]
        mix_indexes_rng_ = mix_indexes_rng.type()

        mix_axis = -ndim_supp - 1

        # Stack components across mixture axis
        if single_component:
            # If single component, we consider it as being already "stacked"
            stacked_components_ = components_[0]
        else:
            stacked_components_ = at.stack(components_, axis=mix_axis)

        # Broadcast weights to (*batched dimensions, stack dimension), ignoring support dimensions
        weights_broadcast_shape_ = stacked_components_.shape[:ndim_batch + 1]
        weights_broadcasted_ = at.broadcast_to(weights_,
                                               weights_broadcast_shape_)

        # Draw mixture indexes and append (stack + ndim_supp) broadcastable dimensions to the right
        mix_indexes_ = at.random.categorical(weights_broadcasted_,
                                             rng=mix_indexes_rng_)
        mix_indexes_padded_ = at.shape_padright(mix_indexes_, ndim_supp + 1)

        # Index components and squeeze mixture dimension
        mix_out_ = at.take_along_axis(stacked_components_,
                                      mix_indexes_padded_,
                                      axis=mix_axis)
        mix_out_ = at.squeeze(mix_out_, axis=mix_axis)

        # Output mix_indexes rng update so that it can be updated in place
        mix_indexes_rng_next_ = mix_indexes_.owner.outputs[0]

        mix_op = MarginalMixtureRV(
            inputs=[mix_indexes_rng_, weights_, *components_],
            outputs=[mix_indexes_rng_next_, mix_out_],
        )

        # Create the actual MarginalMixture variable
        mix_out = mix_op(mix_indexes_rng, weights, *components)

        # Reference nodes to facilitate identification in other classmethods
        mix_out.tag.weights = weights
        mix_out.tag.components = components
        mix_out.tag.choices_rng = mix_indexes_rng

        return mix_out
Beispiel #25
0
def any_to_tensor_and_labels(x, labels=None):
    """Util for converting input x to tensor trying to
    create labels for columns if they are not provided.

    Default names for columns are ['x0', 'x1', ...], for mappable
    arrays (e.g. pd.DataFrame) their names are treated as labels.
    You can override them with `labels` argument.

    If you have tensor input you should provide labels as we
    cannot get their shape directly

    If you pass dict input we cannot rely on labels order thus dict
    keys are treated as labels anyway

    Parameters
    ----------
    x: np.ndarray | pd.DataFrame | Variable | dict | list
    labels: list - names for columns of output tensor

    Returns
    -------
    (x, labels) - tensor and labels for its columns
    """
    if isinstance(labels, str):
        labels = [labels]
    # pandas.DataFrame
    # labels can come from here
    # we can override them
    if isinstance(x, pd.DataFrame):
        if not labels:
            labels = x.columns
        x = x.to_numpy()

    # pandas.Series
    # there can still be a label
    # we can override labels
    elif isinstance(x, pd.Series):
        if not labels:
            labels = [x.name]
        x = x.to_numpy()[:, None]

    # dict
    # labels are keys,
    # cannot override them
    elif isinstance(x, dict):
        # try to do it via pandas
        try:
            x = pd.DataFrame.from_dict(x)
            labels = x.columns
            x = x.to_numpy()
        # some types fail there
        # another approach is to construct
        # variable by hand
        except (ValueError, TypeError):
            res = []
            labels = []
            for k, v in x.items():
                res.append(v)
                labels.append(k)
            x = aet.stack(res, axis=1)
            if x.ndim == 1:
                x = x[:, None]
    # case when it can appear to be some
    # array like value like lists of lists
    # numpy deals with it
    elif not isinstance(x, Variable):
        x = np.asarray(x)
        if x.ndim == 0:
            raise ValueError("Cannot use scalars")
        elif x.ndim == 1:
            x = x[:, None]
    # something really strange goes here,
    # but user passes labels trusting seems
    # to be a good option
    elif labels is not None:
        x = aet.as_tensor_variable(x)
        if x.ndim == 0:
            raise ValueError("Cannot use scalars")
        elif x.ndim == 1:
            x = x[:, None]
    else:  # trust input
        pass
    # we should check that we can extract labels
    if labels is None and not isinstance(x, Variable):
        labels = ["x%d" % i for i in range(x.shape[1])]
    # for aesara variables we should have labels from user
    elif labels is None:
        raise ValueError("Please provide labels as " "we cannot infer shape of input")
    else:  # trust labels, user knows what he is doing
        pass
    # it's time to check shapes if we can
    if not isinstance(x, Variable):
        if not len(labels) == x.shape[1]:
            raise ValueError(
                "Please provide full list "
                "of labels for coefficients, "
                "got len(labels)=%d instead of %d" % (len(labels), x.shape[1])
            )
    else:
        # trust labels, as we raised an
        # error in bad case, we have labels
        pass
    # convert labels to list
    if isinstance(labels, pd.RangeIndex):
        labels = ["x%d" % i for i in labels]
    # maybe it was a tuple ot whatever
    elif not isinstance(labels, list):
        labels = list(labels)
    # as output we need tensor
    if not isinstance(x, Variable):
        x = aet.as_tensor_variable(x)
        # finally check dimensions
        if x.ndim == 0:
            raise ValueError("Cannot use scalars")
        elif x.ndim == 1:
            x = x[:, None]
    return x, labels
Beispiel #26
0
def augment_system(ode_func, n_states, n_theta):
    """
    Function to create augmented system.

    Take a function which specifies a set of differential equations and return
    a compiled function which allows for computation of gradients of the
    differential equation's solition with repsect to the parameters.

    Uses float64 even if floatX=float32, because the scipy integrator always uses float64.

    Parameters
    ----------
    ode_func: function
        Differential equation.  Returns array-like.
    n_states: int
        Number of rows of the sensitivity matrix. (n_states)
    n_theta: int
        Number of ODE parameters

    Returns
    -------
    system: function
        Augemted system of differential equations.
    """

    # Present state of the system
    t_y = aet.vector("y", dtype="float64")
    t_y.tag.test_value = np.ones((n_states, ), dtype="float64")
    # Parameter(s).  Should be vector to allow for generaliztion to multiparameter
    # systems of ODEs.  Is m dimensional because it includes all initial conditions as well as ode parameters
    t_p = aet.vector("p", dtype="float64")
    t_p.tag.test_value = np.ones((n_states + n_theta, ), dtype="float64")
    # Time.  Allow for non-automonous systems of ODEs to be analyzed
    t_t = aet.scalar("t", dtype="float64")
    t_t.tag.test_value = 2.459

    # Present state of the gradients:
    # Will always be 0 unless the parameter is the inital condition
    # Entry i,j is partial of y[i] wrt to p[j]
    dydp_vec = aet.vector("dydp", dtype="float64")
    dydp_vec.tag.test_value = make_sens_ic(n_states, n_theta, "float64")

    dydp = dydp_vec.reshape((n_states, n_states + n_theta))

    # Get symbolic representation of the ODEs by passing tensors for y, t and theta
    yhat = ode_func(t_y, t_t, t_p[n_states:])
    # Stack the results of the ode_func into a single tensor variable
    if not isinstance(yhat, (list, tuple)):
        yhat = (yhat, )
    t_yhat = aet.stack(yhat, axis=0)

    # Now compute gradients
    J = aet.jacobian(t_yhat, t_y)

    Jdfdy = aet.dot(J, dydp)

    grad_f = aet.jacobian(t_yhat, t_p)

    # This is the time derivative of dydp
    ddt_dydp = (Jdfdy + grad_f).flatten()

    system = aesara.function(inputs=[t_y, t_t, t_p, dydp_vec],
                             outputs=[t_yhat, ddt_dydp],
                             on_unused_input="ignore")

    return system
Beispiel #27
0
def test_TransMatConjugateStep_subtensors():

    # Confirm that Dirichlet/non-Dirichlet mixed rows can be
    # parsed
    with pm.Model():
        d_0_rv = pm.Dirichlet("p_0", np.r_[1, 1], shape=2)
        d_1_rv = pm.Dirichlet("p_1", np.r_[1, 1], shape=2)

        p_0_rv = at.as_tensor([0, 0, 1])
        p_1_rv = at.zeros(3)
        p_1_rv = at.set_subtensor(p_0_rv[[0, 2]], d_0_rv)
        p_2_rv = at.zeros(3)
        p_2_rv = at.set_subtensor(p_1_rv[[1, 2]], d_1_rv)

        P_tt = at.stack([p_0_rv, p_1_rv, p_2_rv])
        P_rv = pm.Deterministic("P_tt", at.shape_padleft(P_tt))
        DiscreteMarkovChain("S_t", P_rv, np.r_[1, 0, 0], shape=(10, ))

        transmat = TransMatConjugateStep(P_rv)

    assert transmat.row_remaps == {0: 1, 1: 2}
    exp_slices = {0: np.r_[0, 2], 1: np.r_[1, 2]}
    assert exp_slices.keys() == transmat.row_slices.keys()
    assert all(
        np.array_equal(transmat.row_slices[i], exp_slices[i])
        for i in exp_slices.keys())

    # Same thing, just with some manipulations of the transition matrix
    with pm.Model():
        d_0_rv = pm.Dirichlet("p_0", np.r_[1, 1], shape=2)
        d_1_rv = pm.Dirichlet("p_1", np.r_[1, 1], shape=2)

        p_0_rv = at.as_tensor([0, 0, 1])
        p_1_rv = at.zeros(3)
        p_1_rv = at.set_subtensor(p_0_rv[[0, 2]], d_0_rv)
        p_2_rv = at.zeros(3)
        p_2_rv = at.set_subtensor(p_1_rv[[1, 2]], d_1_rv)

        P_tt = at.horizontal_stack(p_0_rv[..., None], p_1_rv[..., None],
                                   p_2_rv[..., None])
        P_rv = pm.Deterministic("P_tt", at.shape_padleft(P_tt.T))
        DiscreteMarkovChain("S_t", P_rv, np.r_[1, 0, 0], shape=(10, ))

        transmat = TransMatConjugateStep(P_rv)

    assert transmat.row_remaps == {0: 1, 1: 2}
    exp_slices = {0: np.r_[0, 2], 1: np.r_[1, 2]}
    assert exp_slices.keys() == transmat.row_slices.keys()
    assert all(
        np.array_equal(transmat.row_slices[i], exp_slices[i])
        for i in exp_slices.keys())

    # Use an observed `DiscreteMarkovChain` and check the conjugate results
    with pm.Model():
        d_0_rv = pm.Dirichlet("p_0", np.r_[1, 1], shape=2)
        d_1_rv = pm.Dirichlet("p_1", np.r_[1, 1], shape=2)

        p_0_rv = at.as_tensor([0, 0, 1])
        p_1_rv = at.zeros(3)
        p_1_rv = at.set_subtensor(p_0_rv[[0, 2]], d_0_rv)
        p_2_rv = at.zeros(3)
        p_2_rv = at.set_subtensor(p_1_rv[[1, 2]], d_1_rv)

        P_tt = at.horizontal_stack(p_0_rv[..., None], p_1_rv[..., None],
                                   p_2_rv[..., None])
        P_rv = pm.Deterministic("P_tt", at.shape_padleft(P_tt.T))
        DiscreteMarkovChain("S_t",
                            P_rv,
                            np.r_[1, 0, 0],
                            shape=(4, ),
                            observed=np.r_[0, 1, 0, 2])

        transmat = TransMatConjugateStep(P_rv)
Beispiel #28
0
def conv2d(
        input,
        filters,
        image_shape=None,
        filter_shape=None,
        border_mode="valid",
        subsample=(1, 1),
        **kargs,
):
    """
    signal.conv.conv2d performs a basic 2D convolution of the input with the
    given filters. The input parameter can be a single 2D image or a 3D tensor,
    containing a set of images. Similarly, filters can be a single 2D filter or
    a 3D tensor, corresponding to a set of 2D filters.

    Shape parameters are optional and will result in faster execution.

    Parameters
    ----------
    input   : Symbolic aesara tensor for images to be filtered.
              Dimensions: ([num_images], image height, image width)
    filters : Symbolic aesara tensor for convolution filter(s).
              Dimensions: ([num_filters], filter height, filter width)
    border_mode: {'valid', 'full'}
        See scipy.signal.convolve2d.
    subsample
        Factor by which to subsample output.
    image_shape : tuple of length 2 or 3
        ([num_images,] image height, image width).
    filter_shape : tuple of length 2 or 3
        ([num_filters,] filter height, filter width).
    kwargs
        See aesara.tensor.nnet.conv.conv2d.

    Returns
    -------
    symbolic 2D,3D or 4D tensor
        Tensor of filtered images, with shape
        ([number images,] [number filters,] image height, image width).

    """
    assert input.ndim in (2, 3)
    assert filters.ndim in (2, 3)

    # use shape information if it is given to us ###
    if filter_shape and image_shape:
        if input.ndim == 3:
            bsize = image_shape[0]
        else:
            bsize = 1
        imshp = (1, ) + tuple(image_shape[-2:])

        if filters.ndim == 3:
            nkern = filter_shape[0]
        else:
            nkern = 1
        kshp = filter_shape[-2:]
    else:
        nkern, kshp = None, None
        bsize, imshp = None, None

    # reshape tensors to 4D, for compatibility with ConvOp ###
    if input.ndim == 3:
        sym_bsize = input.shape[0]
    else:
        sym_bsize = 1

    if filters.ndim == 3:
        sym_nkern = filters.shape[0]
    else:
        sym_nkern = 1

    new_input_shape = aet.join(0, aet.stack([sym_bsize, 1]), input.shape[-2:])
    input4D = reshape(input, new_input_shape, ndim=4)

    new_filter_shape = aet.join(0, aet.stack([sym_nkern, 1]),
                                filters.shape[-2:])
    filters4D = reshape(filters, new_filter_shape, ndim=4)

    # perform actual convolution ###
    op = conv.ConvOp(
        output_mode=border_mode,
        dx=subsample[0],
        dy=subsample[1],
        imshp=imshp,
        kshp=kshp,
        nkern=nkern,
        bsize=bsize,
        **kargs,
    )

    output = op(input4D, filters4D)

    # flatten to 3D tensor if convolving with single filter or single image
    if input.ndim == 2 and filters.ndim == 2:
        if config.warn__signal_conv2d_interface:
            warnings.warn(
                "aesara.tensor.signal.conv2d() now outputs a 2d tensor when both"
                " inputs are 2d. To disable this warning, set the Aesara flag"
                " warn__signal_conv2d_interface to False",
                stacklevel=3,
            )

        output = aet.flatten(output.T, ndim=2).T
    elif input.ndim == 2 or filters.ndim == 2:
        output = aet.flatten(output.T, ndim=3).T

    return output
Beispiel #29
0
def convolve(
        kerns,
        kshp,
        nkern,
        images,
        imgshp,
        step=(1, 1),
        bias=None,
        mode="valid",
        flatten=True,
):
    """Convolution implementation by sparse matrix multiplication.

    :note: For best speed, put the matrix which you expect to be
           smaller as the 'kernel' argument

    "images" is assumed to be a matrix of shape batch_size x img_size,
    where the second dimension represents each image in raster order

    If flatten is "False", the output feature map will have shape:

    .. code-block:: python

        batch_size x number of kernels x output_size

    If flatten is "True", the output feature map will have shape:

    .. code-block:: python

        batch_size x number of kernels * output_size

    .. note::

        IMPORTANT: note that this means that each feature map (image
        generate by each kernel) is contiguous in memory. The memory
        layout will therefore be: [ <feature_map_0> <feature_map_1>
        ... <feature_map_n>], where <feature_map> represents a
        "feature map" in raster order

    kerns is a 2D tensor of shape nkern x N.prod(kshp)

    :param kerns: 2D tensor containing kernels which are applied at every pixel
    :param kshp: tuple containing actual dimensions of kernel (not symbolic)
    :param nkern: number of kernels/filters to apply.
                  nkern=1 will apply one common filter to all input pixels
    :param images: tensor containing images on which to apply convolution
    :param imgshp: tuple containing image dimensions
    :param step: determines number of pixels between adjacent receptive fields
                 (tuple containing dx,dy values)
    :param mode: 'full', 'valid' see CSM.evaluate function for details
    :param sumdims: dimensions over which to sum for the tensordot operation.
                    By default ((2,),(1,)) assumes kerns is a nkern x kernsize
                    matrix and images is a batchsize x imgsize matrix
                    containing flattened images in raster order
    :param flatten: flatten the last 2 dimensions of the output. By default,
                    instead of generating a batchsize x outsize x nkern tensor,
                    will flatten to batchsize x outsize*nkern

    :return: out1, symbolic result
    :return: out2, logical shape of the output img (nkern,heigt,width)

    :TODO: test for 1D and think of how to do n-d convolutions
    """
    # start by computing output dimensions, size, etc
    kern_size = np.int64(np.prod(kshp))

    # inshp contains either 2 entries (height,width) or 3 (nfeatures,h,w)
    # in the first case, default nfeatures to 1
    if np.size(imgshp) == 2:
        imgshp = (1, ) + imgshp

    # construct indices and index pointers for sparse matrix, which,
    # when multiplied with input images will generate a stack of image
    # patches
    indices, indptr, spmat_shape, sptype, outshp = convolution_indices.conv_eval(
        imgshp, kshp, step, mode)

    # build sparse matrix, then generate stack of image patches
    csc = aesara.sparse.CSM(sptype)(np.ones(indices.size), indices, indptr,
                                    spmat_shape)
    patches = (sparse.structured_dot(csc, images.T)).T

    # compute output of linear classifier
    pshape = aet.stack([
        images.shape[0] * aet.as_tensor(np.prod(outshp)),
        aet.as_tensor(imgshp[0] * kern_size),
    ])
    patch_stack = reshape(patches, pshape, ndim=2)

    # kern is of shape: nkern x ksize*number_of_input_features
    # output is thus of shape: bsize*outshp x nkern
    output = dot(patch_stack, kerns.T)

    # add bias across each feature map (more efficient to do it now)
    if bias is not None:
        output += bias

    # now to have feature maps in raster order ...
    # go from bsize*outshp x nkern to bsize x nkern*outshp
    newshp = aet.stack([
        images.shape[0],
        aet.as_tensor(np.prod(outshp)),
        aet.as_tensor(nkern)
    ])
    tensout = reshape(output, newshp, ndim=3)
    output = DimShuffle((False, ) * tensout.ndim, (0, 2, 1))(tensout)
    if flatten:
        output = aet.flatten(output, 2)

    return output, np.hstack((nkern, outshp))
Beispiel #30
0
    def _comp_logp(self, value):
        comp_dists = self.comp_dists

        if self.comp_is_distribution:
            # Value can be many things. It can be the self tensor, the mode
            # test point or it can be observed data. The latter case requires
            # careful handling of shape, as the observed's shape could look
            # like (repetitions,) + dist_shape, which does not include the last
            # mixture axis. For this reason, we try to eval the value.shape,
            # compare it with self.shape and shape_padright if we infer that
            # the value holds observed data
            try:
                val_shape = tuple(value.shape.eval())
            except AttributeError:
                val_shape = value.shape
            except aesara.graph.fg.MissingInputError:
                val_shape = None
            try:
                self_shape = tuple(self.shape)
            except AttributeError:
                # Happens in __init__ when computing self.logp(comp_modes)
                self_shape = None
            comp_shape = tuple(comp_dists.shape)
            ndim = value.ndim
            if val_shape is not None and not (
                (self_shape is not None and val_shape == self_shape)
                    or val_shape == comp_shape):
                # value is neither the test point nor the self tensor, it
                # is likely to hold observed values, so we must compute the
                # ndim discarding the dimensions that don't match
                # self_shape
                if self_shape and val_shape[-len(self_shape):] == self_shape:
                    # value has observed values for the Mixture
                    ndim = len(self_shape)
                elif comp_shape and val_shape[-len(comp_shape):] == comp_shape:
                    # value has observed for the Mixture components
                    ndim = len(comp_shape)
                else:
                    # We cannot infer what was passed, we handle this
                    # as was done in earlier versions of Mixture. We pad
                    # always if ndim is lower or equal to 1  (default
                    # legacy implementation)
                    if ndim <= 1:
                        ndim = len(comp_dists.shape) - 1
            else:
                # We reach this point if value does not hold observed data, so
                # we can use its ndim safely to determine shape padding, or it
                # holds something that we cannot infer, so we revert to using
                # the value's ndim for shape padding.
                # We will always pad a single dimension if ndim is lower or
                # equal to 1 (default legacy implementation)
                if ndim <= 1:
                    ndim = len(comp_dists.shape) - 1
            if ndim < len(comp_dists.shape):
                value_ = at.shape_padright(value, len(comp_dists.shape) - ndim)
            else:
                value_ = value
            return comp_dists.logp(value_)
        else:
            return at.squeeze(
                at.stack([comp_dist.logp(value) for comp_dist in comp_dists],
                         axis=-1))