Beispiel #1
0
    def propagate(self,
                  state,
                  controller,
                  t0,
                  tf,
                  atol=1e-8,
                  rtol=1e-8,
                  method='DOP853'):

        # integrate dynamics
        sol = solve_ivp(
            jit(lambda t, x: self.state_dynamics(x, controller(x),
                                                 *self.params.values())),
            (t0, tf),
            state,
            method=method,
            rtol=rtol,
            atol=atol,
            # jac=jit(lambda t, x: self.state_dynamics_jac_state(x, controller(x), *self.params.values()))
        )

        # return times, states, and controls
        times, states = sol.t, sol.y.T
        controls = np.apply_along_axis(controller, 1, states)
        return times, states, controls
Beispiel #2
0
def _log_compare(mat, cats, significance_test=scipy.stats.ttest_ind):
    """Calculates pairwise log ratios between all features and performs a
    significiance test (i.e. t-test) to determine if there is a significant
    difference in feature ratios with respect to the variable of interest.

    Parameters
    ----------
    mat: np.array
       rows correspond to samples and columns correspond to
       features (i.e. OTUs)
    cats: np.array, float
       Vector of categories
    significance_test: function
        statistical test to run

    Returns:
    --------
    log_ratio : np.array
        log ratio pvalue matrix
    """
    r, c = mat.shape
    log_ratio = np.zeros((c, c))
    log_mat = np.log(mat)
    cs = np.unique(cats)

    def func(x):
        return significance_test(*[x[cats == k] for k in cs])

    for i in range(c - 1):
        ratio = (log_mat[:, i].T - log_mat[:, i + 1:].T).T
        m, p = np.apply_along_axis(func, axis=0, arr=ratio)
        log_ratio[i, i + 1:] = np.squeeze(np.array(p.T))
    return log_ratio
Beispiel #3
0
def undo_lowfreq_transform(x):
    def undo_dct(row):
        return jidct2(row.reshape(28, 28)).reshape(784)

    gauss_mask = 1 - jnp.array(get_gauss_mask((28, 28)))
    large = 256 * 100
    scale = 1 / jnp.maximum(gauss_mask, 1 / large).reshape(784)
    scale = jnp.expand_dims(scale, axis=0)
    return jnp.apply_along_axis(undo_dct, 1, x / scale)
Beispiel #4
0
def do_lowfreq_transform(x):
    def do_dct(row):
        return jdct2(row.reshape(28, 28)).reshape(784)

    # rescale so high-frequencies are easier to change
    gauss_mask = jnp.array(get_gauss_mask((28, 28)))
    large = 256 * 100
    scale = 1 / jnp.maximum(1 - gauss_mask, 1 / large).reshape(784)
    scale = jnp.expand_dims(scale, axis=0)
    return jnp.apply_along_axis(do_dct, 1, x) * scale
Beispiel #5
0
def _categorical(logits, num_samples, dtype=None, seed=None, name=None):  # pylint: disable=unused-argument
    rng = np.random if seed is None else np.random.RandomState(seed
                                                               & 0xffffffff)
    dtype = utils.numpy_dtype(dtype or np.int64)
    if not hasattr(logits, 'shape'):
        logits = np.array(logits, np.float32)
    probs = _softmax(logits)
    n = logits.shape[-1]
    return np.apply_along_axis(lambda p: rng.choice(n, p=p, size=num_samples),
                               1, probs)
Beispiel #6
0
    def sample(self, key, x0, T, nsamples, dt=0.01, noisy=False):
        """
        Run the Extended Kalman Filter algorithm. First, we integrate
        up to time T, then we obtain nsamples equally-spaced points. Finally,
        we transform the latent space to obtain the observations

        Parameters
        ----------
        key: jax.random.PRNGKey
            Initial seed
        x0: array(state_size)
            Initial state of simulation
        T: float
            Final time of integration
        nsamples: int
            Number of observations to take from the total integration
        dt: float
            integration step size
        noisy: bool
            Whether to (naively) add noise to the state space

        Returns
        -------
        * array(nsamples, state_size)
            State-space values
        * array(nsamples, obs_size)
            Observed-space values
        * int
            Number of observations skipped between one
            datapoint and the next
        """
        nsteps = ceil(T / dt)
        jump_size = ceil(nsteps / nsamples)
        correction = nsamples - ceil(nsteps / jump_size)
        nsteps += correction * jump_size

        key_state, key_obs = random.split(key)
        state_noise = random.multivariate_normal(key_state,
                                                 jnp.zeros(self.state_size),
                                                 self.Q, (nsteps, ))
        obs_noise = random.multivariate_normal(key_obs,
                                               jnp.zeros(self.obs_size),
                                               self.R, (nsteps, ))
        simulation = self._rk2(x0, self.fz, nsteps, dt)

        if noisy:
            simulation = simulation + jnp.sqrt(dt) * state_noise

        sample_state = simulation[::jump_size]
        sample_obs = jnp.apply_along_axis(
            self.fx, 1, sample_state) + obs_noise[:len(sample_state)]

        return sample_state, sample_obs, jump_size
Beispiel #7
0
def median_heuristic(data, distance, per_dimension=True):
    if isinstance(distance, str):
        dist_fn = lambda x: pdist(x, distance)
    else:
        dist_fn = distance
    if per_dimension is False:
        return np.median(dist_fn(data))
    else:

        def single_dim_heuristic(data_dim):
            return median_heuristic(data_dim[:, None],
                                    dist_fn,
                                    per_dimension=False)

        return np.apply_along_axis(single_dim_heuristic, 0, data)
Beispiel #8
0
    def get_posterior(self):
        # each stimulus response gets its own posterior distribution across categories
        # R    [Nlvl, Ni, Nf]
        # r    [Nlvl, Ni, Nf]
        #
        # single LPstm  [Nlvl]
        # PPstm         [Nlvl, Ni, NLvl]

        self.pStd = 1 / np.prod(np.sqrt(self.var), 2)
        if self.bMean == 1:
            R = self.r
        else:
            R = self.R

        #for k in range(self.Nlvl):
        #    for l in range(self.Ni):
        #        self.Y[k,l,:]=get_Y_dist(R[k,l,:],self.r,self.var,self.pStd)

        self.Y = np.apply_along_axis(lambda x: self.my_get_Y_dist(x), 2, R)

        Z = np.tile(np.sum(self.Y, 2)[:, :, np.newaxis], [1, 1, self.Nlvl])
        self.PPstm = self.Y / Z
Beispiel #9
0
def apply_along_axis(func1d, axis: int, arr, *args, **kwargs):
  arr = _remove_jaxarray(arr)
  return jnp.apply_along_axis(func1d, axis, arr, *args, **kwargs)
Beispiel #10
0
def ancom(
    table,
    grouping,
    alpha=0.05,
    tau=0.02,
    theta=0.1,
    multiple_comparisons_correction=None,
    significance_test=None,
):
    r"""Performs a differential abundance test using ANCOM.

    This is done by calculating pairwise log ratios between all features
    and performing a significance test to determine if there is a significant
    difference in feature ratios with respect to the variable of interest.

    In an experiment with only two treatments, this test tests the following
    hypothesis for feature :math:`i`

    .. math::

        H_{0i}: \mathbb{E}[\ln(u_i^{(1)})] = \mathbb{E}[\ln(u_i^{(2)})]

    where :math:`u_i^{(1)}` is the mean abundance for feature :math:`i` in the
    first group and :math:`u_i^{(2)}` is the mean abundance for feature
    :math:`i` in the second group.

    Parameters
    ----------
    table : pd.DataFrame
        A 2D matrix of strictly positive values (i.e. counts or proportions)
        where the rows correspond to samples and the columns correspond to
        features.
    grouping : pd.Series
        Vector indicating the assignment of samples to groups.  For example,
        these could be strings or integers denoting which group a sample
        belongs to.  It must be the same length as the samples in `table`.
        The index must be the same on `table` and `grouping` but need not be
        in the same order.
    alpha : float, optional
        Significance level for each of the statistical tests.
        This can can be anywhere between 0 and 1 exclusive.
    tau : float, optional
        A constant used to determine an appropriate cutoff.
        A value close to zero indicates a conservative cutoff.
        This can can be anywhere between 0 and 1 exclusive.
    theta : float, optional
        Lower bound for the proportion for the W-statistic.
        If all W-statistics are lower than theta, then no features
        will be detected to be differentially significant.
        This can can be anywhere between 0 and 1 exclusive.
    multiple_comparisons_correction : {None, 'holm-bonferroni'}, optional
        The multiple comparison correction procedure to run.  If None,
        then no multiple comparison correction procedure will be run.
        If 'holm-boniferroni' is specified, then the Holm-Boniferroni
        procedure [1]_ will be run.
    significance_test : function, optional
        A statistical significance function to test for significance between
        classes.  This function must be able to accept at least two 1D
        array_like arguments of floats and returns a test statistic and a
        p-value. By default ``scipy.stats.f_oneway`` is used.

    Returns
    -------
    pd.DataFrame
        A table of features, their W-statistics and whether the null hypothesis
        is rejected.

        `"W"` is the W-statistic, or number of features that a single feature
        is tested to be significantly different against.

        `"reject"` indicates if feature is significantly different or not.

    See Also
    --------
    multiplicative_replacement
    scipy.stats.ttest_ind
    scipy.stats.f_oneway
    scipy.stats.wilcoxon
    scipy.stats.kruskal

    Notes
    -----
    The developers of this method recommend the following significance tests
    ([2]_, Supplementary File 1, top of page 11): the standard parametric
    t-test (``scipy.stats.ttest_ind``) or one-way ANOVA
    (``scipy.stats.f_oneway``) if the number of groups is greater
    than 2, or non-parametric variants such as Wilcoxon rank sum
    (``scipy.stats.wilcoxon``) or Kruskal-Wallis (``scipy.stats.kruskal``)
    if the number of groups is greater than 2.  Because one-way ANOVA is
    equivalent to the standard t-test when the number of groups is two,
    we default to ``scipy.stats.f_oneway`` here, which can be used when
    there are two or more groups.  Users should refer to the documentation
    of these tests in SciPy to understand the assumptions made by each test.

    This method cannot handle any zero counts as input, since the logarithm
    of zero cannot be computed.  While this is an unsolved problem, many
    studies have shown promising results by replacing the zeros with pseudo
    counts. This can be also be done via the ``multiplicative_replacement``
    method.

    References
    ----------
    .. [1] Holm, S. "A simple sequentially rejective multiple test procedure".
       Scandinavian Journal of Statistics (1979), 6.
    .. [2] Mandal et al. "Analysis of composition of microbiomes: a novel
       method for studying microbial composition", Microbial Ecology in Health
       & Disease, (2015), 26.

    Examples
    --------
    First import all of the necessary modules:

    >>> import mushi.composition as cmp
    >>> import pandas as pd

    Now let's load in a pd.DataFrame with 6 samples and 7 unknown bacteria:

    >>> table = pd.DataFrame([[12, 11, 10, 10, 10, 10, 10],
    ...                       [9,  11, 12, 10, 10, 10, 10],
    ...                       [1,  11, 10, 11, 10, 5,  9],
    ...                       [22, 21, 9,  10, 10, 10, 10],
    ...                       [20, 22, 10, 10, 13, 10, 10],
    ...                       [23, 21, 14, 10, 10, 10, 10]],
    ...                      index=['s1','s2','s3','s4','s5','s6'],
    ...                      columns=['b1','b2','b3','b4','b5','b6','b7'])

    Then create a grouping vector.  In this scenario, there
    are only two classes, and suppose these classes correspond to the
    treatment due to a drug and a control.  The first three samples
    are controls and the last three samples are treatments.

    >>> grouping = pd.Series([0, 0, 0, 1, 1, 1],
    ...                      index=['s1','s2','s3','s4','s5','s6'])

    Now run ``ancom`` and see if there are any features that have any
    significant differences between the treatment and the control.

    >>> results = cmp.ancom(table, grouping) # doctest: +SKIP
    >>> results['W'] # doctest: +SKIP
    b1    0
    b2    4
    b3    1
    b4    1
    b5    1
    b6    0
    b7    1
    Name: W, dtype: np.int64

    The W-statistic is the number of features that a single feature is tested
    to be significantly different against.  In this scenario, `b2` was detected
    to have significantly different abundances compared to four of the other
    species. To summarize the results from the W-statistic, let's take a look
    at the results from the hypothesis test:

    >>> results['reject'] # doctest: +SKIP
    b1    False
    b2     True
    b3    False
    b4    False
    b5    False
    b6    False
    b7    False
    Name: reject, dtype: bool

    From this we can conclude that only `b2` was significantly
    different between the treatment and the control.

    """

    if not isinstance(table, pd.DataFrame):
        raise TypeError("`table` must be a `pd.DataFrame`, "
                        "not %r." % type(table).__name__)
    if not isinstance(grouping, pd.Series):
        raise TypeError("`grouping` must be a `pd.Series`,"
                        " not %r." % type(grouping).__name__)

    if np.any(table <= 0):
        raise ValueError(
            "Cannot handle zeros or negative values in `table`. "
            "Use pseudo counts or ``multiplicative_replacement``.")

    if not 0 < alpha < 1:
        raise ValueError("`alpha`=%f is not within 0 and 1." % alpha)

    if not 0 < tau < 1:
        raise ValueError("`tau`=%f is not within 0 and 1." % tau)

    if not 0 < theta < 1:
        raise ValueError("`theta`=%f is not within 0 and 1." % theta)

    if multiple_comparisons_correction is not None:
        if multiple_comparisons_correction != "holm-bonferroni":
            raise ValueError("%r is not an available option for "
                             "`multiple_comparisons_correction`." %
                             multiple_comparisons_correction)

    if (grouping.isnull()).any():
        raise ValueError("Cannot handle missing values in `grouping`.")

    if (table.isnull()).any().any():
        raise ValueError("Cannot handle missing values in `table`.")

    groups, _grouping = onp.unique(grouping, return_inverse=True)
    grouping = pd.Series(_grouping, index=grouping.index)
    num_groups = len(groups)

    if num_groups == len(grouping):
        raise ValueError(
            "All values in `grouping` are unique. This method cannot "
            "operate on a grouping vector with only unique values (e.g., "
            "there are no 'within' variance because each group of samples "
            "contains only a single sample).")

    if num_groups == 1:
        raise ValueError(
            "All values the `grouping` are the same. This method cannot "
            "operate on a grouping vector with only a single group of samples"
            "(e.g., there are no 'between' variance because there is only a "
            "single group).")

    if significance_test is None:
        significance_test = scipy.stats.f_oneway

    table_index_len = len(table.index)
    grouping_index_len = len(grouping.index)
    mat, cats = table.align(grouping, axis=0, join="inner")
    if len(mat) != table_index_len or len(cats) != grouping_index_len:
        raise ValueError("`table` index and `grouping` "
                         "index must be consistent.")

    n_feat = mat.shape[1]

    _logratio_mat = _log_compare(mat.values, cats.values, significance_test)
    logratio_mat = _logratio_mat + _logratio_mat.T

    # Multiple comparisons
    if multiple_comparisons_correction == "holm-bonferroni":
        logratio_mat = np.apply_along_axis(_holm_bonferroni, 1, logratio_mat)
    np.fill_diagonal(logratio_mat, 1)
    W = (logratio_mat < alpha).sum(axis=1)
    c_start = W.max() / n_feat
    if c_start < theta:
        reject = np.zeros_like(W, dtype=bool)
    else:
        # Select appropriate cutoff
        cutoff = c_start - np.linspace(0.05, 0.25, 5)
        prop_cut = np.array([(W > n_feat * cut).mean() for cut in cutoff])
        dels = np.abs(prop_cut - np.roll(prop_cut, -1))
        dels[-1] = 0

        if (dels[0] < tau) and (dels[1] < tau) and (dels[2] < tau):
            nu = cutoff[1]
        elif (dels[0] >= tau) and (dels[1] < tau) and (dels[2] < tau):
            nu = cutoff[2]
        elif (dels[1] >= tau) and (dels[2] < tau) and (dels[3] < tau):
            nu = cutoff[3]
        else:
            nu = cutoff[4]
        reject = W >= nu * n_feat
    labs = mat.columns
    return pd.DataFrame({
        "W": pd.Series(W, index=labs),
        "reject": pd.Series(reject, index=labs)
    })
nsamples = 70
x0 = jnp.array([0.5, -0.75])

# State noise
Qt = jnp.eye(2) * 0.001
# Observed noise
Rt = jnp.eye(2) * 0.01

key = random.PRNGKey(314)
ekf = ds.ContinuousExtendedKalmanFilter(fz, fx, Qt, Rt)
sample_state, sample_obs, jump = ekf.sample(key, x0, T, nsamples)
mu_hist, V_hist = ekf.estimate(sample_state, sample_obs, jump, dt)

vmin, vmax, step = -1.5, 1.5 + 0.5, 0.5
X = np.mgrid[-1:1.5:step, vmin:vmax:step][::-1]
X_dot = jnp.apply_along_axis(fz, 0, X)

fig, ax = plt.subplots()
ax.plot(*sample_state.T, label="state space")
ax.scatter(*sample_obs.T,
           marker="+",
           c="tab:green",
           s=60,
           label="observations")
field = ax.streamplot(*X, *X_dot, density=1.1, color="#ccccccaa")
ax.legend()
plt.axis("equal")
ax.set_title("State Space")
pml.savefig("ekf-state-space.pdf")

fig, ax = plt.subplots()
Beispiel #12
0
def mode(arr, axis=0, max_value=250):
    return jnp.apply_along_axis(
        lambda x: jnp.bincount(x, length=max_value).argmax(),
        axis=axis,
        arr=arr.astype(jnp.int32))