Beispiel #1
0
    def test_clr(self):
        cmat = clr(closure(self.data1))
        A = np.array([.2, .2, .6])
        B = np.array([.4, .4, .2])

        npt.assert_allclose(cmat,
                            [np.log(A / np.exp(np.log(A).mean())),
                             np.log(B / np.exp(np.log(B).mean()))])
        cmat = clr(closure(self.data2))
        A = np.array([.2, .2, .6])
        npt.assert_allclose(cmat,
                            np.log(A / np.exp(np.log(A).mean())))

        cmat = clr(closure(self.data5))
        A = np.array([.2, .2, .6])
        B = np.array([.4, .4, .2])

        npt.assert_allclose(cmat,
                            [np.log(A / np.exp(np.log(A).mean())),
                             np.log(B / np.exp(np.log(B).mean()))])
        with self.assertRaises(ValueError):
            clr(self.bad1)
        with self.assertRaises(ValueError):
            clr(self.bad2)

        # make sure that inplace modification is not occurring
        clr(self.data2)
        npt.assert_allclose(self.data2, np.array([2, 2, 6]))
Beispiel #2
0
    def test_multiplicative_replacement(self):
        amat = multiplicative_replacement(closure(self.data3))
        npt.assert_allclose(amat,
                            np.array([[0.087273, 0.174545, 0.261818,
                                       0.04, 0.436364],
                                      [0.092, 0.04, 0.04, 0.368, 0.46],
                                      [0.066667, 0.133333, 0.2,
                                       0.266667, 0.333333]]),
                            rtol=1e-5, atol=1e-5)

        amat = multiplicative_replacement(closure(self.data4))
        npt.assert_allclose(amat,
                            np.array([0.087273, 0.174545, 0.261818,
                                      0.04, 0.436364]),
                            rtol=1e-5, atol=1e-5)

        amat = multiplicative_replacement(closure(self.data6))
        npt.assert_allclose(amat,
                            np.array([[0.087273, 0.174545, 0.261818,
                                       0.04, 0.436364],
                                      [0.092, 0.04, 0.04, 0.368, 0.46],
                                      [0.066667, 0.133333, 0.2,
                                       0.266667, 0.333333]]),
                            rtol=1e-5, atol=1e-5)

        with self.assertRaises(ValueError):
            multiplicative_replacement(self.bad1)
        with self.assertRaises(ValueError):
            multiplicative_replacement(self.bad2)

        # make sure that inplace modification is not occurring
        multiplicative_replacement(self.data4)
        npt.assert_allclose(self.data4, np.array([1, 2, 3, 0, 5]))
    def test_multiplicative_replacement(self):
        amat = multiplicative_replacement(closure(self.cdata3))
        npt.assert_allclose(
            amat,
            np.array([[0.087273, 0.174545, 0.261818, 0.04, 0.436364],
                      [0.092, 0.04, 0.04, 0.368, 0.46],
                      [0.066667, 0.133333, 0.2, 0.266667, 0.333333]]),
            rtol=1e-5,
            atol=1e-5)

        amat = multiplicative_replacement(closure(self.cdata4))
        npt.assert_allclose(
            amat,
            np.array([0.087273, 0.174545, 0.261818, 0.04, 0.436364]),
            rtol=1e-5,
            atol=1e-5)

        amat = multiplicative_replacement(closure(self.cdata6))
        npt.assert_allclose(
            amat,
            np.array([[0.087273, 0.174545, 0.261818, 0.04, 0.436364],
                      [0.092, 0.04, 0.04, 0.368, 0.46],
                      [0.066667, 0.133333, 0.2, 0.266667, 0.333333]]),
            rtol=1e-5,
            atol=1e-5)

        with self.assertRaises(ValueError):
            multiplicative_replacement(self.bad1)
        with self.assertRaises(ValueError):
            multiplicative_replacement(self.bad2)

        # make sure that inplace modification is not occurring
        multiplicative_replacement(self.cdata4)
        npt.assert_allclose(self.cdata4, np.array([1, 2, 3, 0, 5]))
    def test_clr(self):
        cmat = clr(closure(self.cdata1))
        A = np.array([.2, .2, .6])
        B = np.array([.4, .4, .2])

        npt.assert_allclose(cmat, [
            np.log(A / np.exp(np.log(A).mean())),
            np.log(B / np.exp(np.log(B).mean()))
        ])
        cmat = clr(closure(self.cdata2))
        A = np.array([.2, .2, .6])
        npt.assert_allclose(cmat, np.log(A / np.exp(np.log(A).mean())))

        cmat = clr(closure(self.cdata5))
        A = np.array([.2, .2, .6])
        B = np.array([.4, .4, .2])

        npt.assert_allclose(cmat, [
            np.log(A / np.exp(np.log(A).mean())),
            np.log(B / np.exp(np.log(B).mean()))
        ])
        with self.assertRaises(ValueError):
            clr(self.bad1)
        with self.assertRaises(ValueError):
            clr(self.bad2)

        # make sure that inplace modification is not occurring
        clr(self.cdata2)
        npt.assert_allclose(self.cdata2, np.array([2, 2, 6]))
Beispiel #5
0
    def test_closure_warning(self):
        with self.assertRaises(ValueError):
            closure([0., 0., 0.])

        with self.assertRaises(ValueError):
            closure([[0., 0., 0.],
                     [0., 5., 5.]])
    def test_closure_warning(self):
        with self.assertRaises(ValueError):
            closure([0., 0., 0.])

        with self.assertRaises(ValueError):
            closure([[0., 0., 0.],
                     [0., 5., 5.]])
Beispiel #7
0
def generate_band_table(mu, sigma, gradient, n_species,
                        lam, n_contaminants, library_size=10000):
    """ Generates a band table with normal variables.

    Parameters
    ----------
    mu : pd.Series
        Vector of species optimal positions along gradient.
    sigma : float
        Variance of the species normal distribution.
    gradient : array
        Vector of gradient values.
    n_species : int
        Number of species to simulate.
    n_contaminants : int
       Number of contaminant species.
    lam : float
       Decay constant for contaminant urn (assumes that the contaminant urn
       follows an exponential distribution).

    Returns
    -------
    generator of
        pd.DataFrame
           Ground truth tables.
        pd.Series
           Metadata group categories, and sample information used
           for benchmarking.
        pd.Series
           Species actually differentially abundant.
    """
    xs = [norm.pdf(gradient, loc=mu[i], scale=sigma)
          for i in range(len(mu))]

    table = closure(np.vstack(xs).T)
    x = np.linspace(0, 1, n_contaminants)
    contaminant_urn = closure(expon.pdf(x, scale=lam))
    contaminant_urns = np.repeat(np.expand_dims(contaminant_urn, axis=0),
                                 table.shape[0], axis=0)
    table = np.hstack((table, contaminant_urns))
    s_ids = ['F%d' % i for i in range(n_species)]
    c_ids = ['X%d' % i for i in range(n_contaminants)]
    table = closure(table)

    metadata = pd.DataFrame({'gradient': gradient})
    metadata['n_diff'] = len(mu)
    metadata['n_contaminants'] = n_contaminants
    metadata['library_size'] = library_size
    # back calculate the beta
    metadata['effect_size'] = np.max(mu) / np.max(gradient)
    metadata.index = ['S%d' % i for i in range(len(metadata.index))]
    table = pd.DataFrame(table)
    table.index = ['S%d' % i for i in range(len(table.index))]
    table.columns = s_ids + c_ids
    ground_truth = list(table.columns)[:n_species]
    return table, metadata, ground_truth
Beispiel #8
0
    def setUp(self):
        data_dir = "../../data/tick/meshnick_tech_reps"
        biom_file = "%s/373_otu_table.biom" % data_dir
        meta_file = "%s/meta.txt" % data_dir

        table = load_table(biom_file)
        Z = 1
        mat = np.array(table._get_sparse_data().todense()).T
        x = np.ravel(mat[Z, :])
        self.tick_pvals = closure(np.array(x[x > 0]))
        self.uniform_pvals = closure(np.array([10000] * len(self.tick_pvals)))
        self.exponential_pvals = closure(
            np.exp(np.linspace(0, 4, len(self.tick_pvals))))
    def setUp(self):
        data_dir = "../../data/tick/meshnick_tech_reps"
        biom_file = "%s/373_otu_table.biom" % data_dir
        meta_file = "%s/meta.txt" % data_dir

        table = load_table(biom_file)
        Z = 1
        mat = np.array(table._get_sparse_data().todense()).T
        x = np.ravel(mat[Z, :])
        self.tick_pvals = closure(np.array(x[x > 0]))
        self.uniform_pvals = closure(np.array([10000] * len(self.tick_pvals)))
        self.exponential_pvals = closure(np.exp(
            np.linspace(0, 4,len(self.tick_pvals))))
Beispiel #10
0
 def test_exponential_uniform(self):
     samp_table = np.random.multinomial(n=500, pvals=self.exponential_pvals)
     bvals = brive(samp_table, replace_zeros=False)
     rel = closure(samp_table)
     m = bvals.sum()
     npt.assert_array_less(rel - bvals, 1.1 / 500)
     self.assertLess(m, 1 - robbins(samp_table))
Beispiel #11
0
    def _fit(self):
        """ fits and calc. the rclr  """

        X_ = self.X_.copy().astype(float)

        if (X_ < 0).any():
            raise ValueError('Array Contains Negative Values')

        if np.count_nonzero(np.isinf(X_)) != 0:
            raise ValueError('Data-table contains either np.inf or -np.inf')

        if np.count_nonzero(np.isnan(X_)) != 0:
            raise ValueError('Data-table contains nans')

        if np.count_nonzero(X_) == 0:
            warnings.warn("Data-table contains no zeros.", RuntimeWarning)

        X_log = np.log(closure(np.array(X_)))
        log_mask = np.array([True] * X_log.shape[0] * X_log.shape[1]).reshape(
            X_log.shape)
        log_mask[np.isfinite(X_log)] = False
        # sum of rows (features)
        m = np.ma.array(X_log, mask=log_mask)
        gm = m.mean(axis=-1, keepdims=True)
        m = (m - gm).squeeze().data
        m[~np.isfinite(X_log)] = np.nan
        self.X_sp = m
Beispiel #12
0
    def test_permutative_f_scaled(self):

        test_table = pd.DataFrame(
            closure([[12, 11, 10, 10, 10, 10, 10],
                     [9,  11, 12, 10, 10, 10, 10],
                     [1,  11, 10, 11, 10, 5,  9],
                     [2,  11, 10, 11, 10, 5,  9],
                     [221, 210, 9,  10, 10, 10, 10],
                     [220, 210, 9,  10, 10, 10, 10],
                     [200, 220, 10, 10, 13, 10, 10],
                     [230, 210, 14, 10, 10, 10, 10]]),
            index=['s1', 's2', 's3', 's4',
                   's5', 's6',  's7', 's8'],
            columns=['b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'b7'])
        test_cats = pd.Series([0, 0, 0, 0, 1, 1, 1, 1],
                              index=['s1', 's2', 's3', 's4',
                                     's5', 's6',  's7', 's8'])

        np.random.seed(0)
        original_table = copy.deepcopy(test_table)
        original_cats = copy.deepcopy(test_cats)
        result = ancom(test_table, test_cats,
                       significance_test='permutative-anova')
        # Test to make sure that the input table hasn't be altered
        assert_data_frame_almost_equal(original_table, test_table)
        # Test to make sure that the input table hasn't be altered
        pdt.assert_series_equal(original_cats, test_cats)
        exp = pd.DataFrame({'W': np.array([5, 5, 2, 2, 2, 2, 2]),
                            'reject': np.array([True, True, False, False,
                                                False, False, False],
                                               dtype=bool)},
                           index=['b1', 'b2', 'b3', 'b4',
                                  'b5', 'b6', 'b7'])
        assert_data_frame_almost_equal(result, exp)
Beispiel #13
0
def normal_noise(nf, ns, hodepth, kappa):
    """ uniform-lognormal-poisson normally dist. noise """
    x_noise = abs(normal(1, 0.2, (nf, ns)))
    mu = hodepth * closure(x_noise.T).T
    y_noise = np.vstack(
        [poisson(lognormal(np.log(mu[:, i]), kappa)) for i in range(ns)]).T
    return y_noise
def resample_counts(X, depth, kappa=1):
    mu = depth * closure(X)
    n_samples = len(X)
    new_samples = np.vstack([
        poisson(lognormal(np.log(mu[i, :]), kappa)) for i in range(n_samples)
    ])
    return new_samples
Beispiel #15
0
def train_count_parameters(data):
    """
    Given a noisy data, try to learn the count noise parameters.
    This assumes that there is only a single underlying urn.
    So the multinomial probabilties are just an aggregrate of all
    of the counts.

    Parameters
    ----------
    data : array_like
       A matrix of counts where there are `n` rows and `m` columns
       where `n` corresponds to the number of samples and `m`
       corresponds to the number of species.

    Returns
    -------
    lam: float
       Poisson parameter for generating sequencing depths.
    p: np.array
       Vector of multinomial probabilities.
    """
    depths = data.sum(axis=1)
    lam = depths.mean()
    p = closure(data.sum(axis=0))
    return lam, p
Beispiel #16
0
 def test_exponential_uniform(self):
     samp_table = np.random.multinomial(n=500,
                                        pvals=self.exponential_pvals)
     bvals = brive(samp_table, replace_zeros=False)
     rel = closure(samp_table)
     m = bvals.sum()
     npt.assert_array_less(rel-bvals, 1.1/500)
     self.assertLess(m, 1 - robbins(samp_table))
Beispiel #17
0
    def test_inverse_rclr(self):

        cmat = self._rclr.fit_transform(self.cdata1)
        npt.assert_allclose(closure(self.cdata1),
                            np.around(self._inv.fit_transform(cmat), 1))
        # inverse can not take zero, nan, or inf values (value error)

        pass
Beispiel #18
0
def Subsample(X_noise, spar, num_samples):
    """ yij ~ PLN( lambda_{ij}, /phi ) """
    # subsample
    mu = spar * closure(X_noise.T).T
    X_noise = np.vstack([poisson(lognormal(np.log(mu[:, i]), 1))
                         for i in range(num_samples)]).T
    # add sparsity

    return X_noise
Beispiel #19
0
    def test_composition_variable_features(self):
        gen = compositional_variable_features_generator(
            max_changing=2, fold_change=2, reps=5,
            intervals=2, n_species=5,
            fold_balance=False,
            n_contaminants=2, lam=0.1)

        table, metadata, truth = next(gen)
        table, metadata, truth = next(gen)

        exp_table = pd.DataFrame(
            closure(
                np.vstack((
                    np.array([0.142857]*2 + [0.071429]*3 +
                             [0.499977, 0.00002269]),
                    np.array([0.142857]*2 + [0.071429]*3 +
                             [0.499977, 0.00002269]),
                    np.array([0.142857]*2 + [0.071429]*3 +
                             [0.499977, 0.00002269]),
                    np.array([0.142857]*2 + [0.071429]*3 +
                             [0.499977, 0.00002269]),
                    np.array([0.142857]*2 + [0.071429]*3 +
                             [0.499977, 0.00002269]),
                    np.array([0.071429]*3 + [0.142857]*2 +
                             [0.499977, 0.00002269]),
                    np.array([0.071429]*3 + [0.142857]*2 +
                             [0.499977, 0.00002269]),
                    np.array([0.071429]*3 + [0.142857] *2+
                             [0.499977, 0.00002269]),
                    np.array([0.071429]*3 + [0.142857]*2 +
                             [0.499977, 0.00002269]),
                    np.array([0.071429]*3 + [0.142857]*2 +
                             [0.499977, 0.00002269])
                ))),
            index = ['S0', 'S1', 'S2', 'S3', 'S4',
                     'S5', 'S6', 'S7', 'S8', 'S9'],
            columns = ['F0', 'F1', 'F2', 'F3', 'F4', 'X0', 'X1']
        )

        pdt.assert_frame_equal(table, exp_table, check_less_precise=True)

        exp_metadata = pd.DataFrame(
            {'group': [0] * 5 + [1] * 5,
             'n_diff': [4] * 10,
             'effect_size': [2] * 10,
             'library_size': [10000] * 10
            },
            index = ['S0', 'S1', 'S2', 'S3', 'S4',
                     'S5', 'S6', 'S7', 'S8', 'S9'],
        )

        metadata = metadata.reindex_axis(sorted(metadata.columns), axis=1)
        exp_metadata = exp_metadata.reindex_axis(sorted(exp_metadata.columns), axis=1)
        pdt.assert_frame_equal(metadata, exp_metadata)

        exp_truth = ['F0', 'F1', 'F3', 'F4']
        self.assertListEqual(truth, exp_truth)
    def test_centralize(self):
        cmat = centralize(closure(self.data1))
        npt.assert_allclose(cmat,
                            np.array([[0.22474487, 0.22474487, 0.55051026],
                                      [0.41523958, 0.41523958, 0.16952085]]))
        cmat = centralize(closure(self.data5))
        npt.assert_allclose(cmat,
                            np.array([[0.22474487, 0.22474487, 0.55051026],
                                      [0.41523958, 0.41523958, 0.16952085]]))

        with self.assertRaises(ValueError):
            centralize(self.bad1)
        with self.assertRaises(ValueError):
            centralize(self.bad2)

        centralize(self.data1)
        npt.assert_allclose(self.data1,
                            np.array([[2, 2, 6],
                                      [4, 4, 2]]))
Beispiel #21
0
def gradient(nf, ns, kappa=0.1, depth=100, sigma=2.0, g_min=0, gmax=10):
    """ poisson-lognormal simulation """
    sigma = [sigma] * nf
    g = np.linspace(g_min, gmax, ns)
    mu = np.linspace(0, 10, nf)
    x = chain(g, mu=mu, sigma=sigma)
    mu = depth * closure(x.T).T
    y = np.vstack(
        [poisson(lognormal(np.log(mu[:, i]), kappa)) for i in range(ns)]).T
    return x, y
    def test_closure(self):

        npt.assert_allclose(closure(self.cdata1),
                            np.array([[.2, .2, .6], [.4, .4, .2]]))
        npt.assert_allclose(closure(self.cdata2), np.array([.2, .2, .6]))
        npt.assert_allclose(closure(self.cdata5),
                            np.array([[.2, .2, .6], [.4, .4, .2]]))
        with self.assertRaises(ValueError):
            closure(self.bad1)

        with self.assertRaises(ValueError):
            closure(self.bad2)

        # make sure that inplace modification is not occurring
        closure(self.cdata2)
        npt.assert_allclose(self.cdata2, np.array([2, 2, 6]))
    def test_centralize(self):
        cmat = centralize(closure(self.cdata1))
        npt.assert_allclose(
            cmat,
            np.array([[0.22474487, 0.22474487, 0.55051026],
                      [0.41523958, 0.41523958, 0.16952085]]))
        cmat = centralize(closure(self.cdata5))
        npt.assert_allclose(
            cmat,
            np.array([[0.22474487, 0.22474487, 0.55051026],
                      [0.41523958, 0.41523958, 0.16952085]]))

        with self.assertRaises(ValueError):
            centralize(self.bad1)
        with self.assertRaises(ValueError):
            centralize(self.bad2)

        # make sure that inplace modification is not occurring
        centralize(self.cdata1)
        npt.assert_allclose(self.cdata1, np.array([[2, 2, 6], [4, 4, 2]]))
    def test_power(self):
        pmat = power(closure(self.data1), 2)
        npt.assert_allclose(pmat,
                            np.array([[.04/.44, .04/.44, .36/.44],
                                      [.16/.36, .16/.36, .04/.36]]))

        pmat = power(closure(self.data2), 2)
        npt.assert_allclose(pmat, np.array([.04, .04, .36])/.44)

        pmat = power(closure(self.data5), 2)
        npt.assert_allclose(pmat,
                            np.array([[.04/.44, .04/.44, .36/.44],
                                      [.16/.36, .16/.36, .04/.36]]))

        with self.assertRaises(ValueError):
            power(self.bad1, 2)

        # make sure that inplace modification is not occurring
        power(self.data2, 4)
        npt.assert_allclose(self.data2, np.array([2, 2, 6]))
    def test_centralize(self):
        cmat = centralize(closure(self.cdata1))
        npt.assert_allclose(cmat,
                            np.array([[0.22474487, 0.22474487, 0.55051026],
                                      [0.41523958, 0.41523958, 0.16952085]]))
        cmat = centralize(closure(self.cdata5))
        npt.assert_allclose(cmat,
                            np.array([[0.22474487, 0.22474487, 0.55051026],
                                      [0.41523958, 0.41523958, 0.16952085]]))

        with self.assertRaises(ValueError):
            centralize(self.bad1)
        with self.assertRaises(ValueError):
            centralize(self.bad2)

        # make sure that inplace modification is not occurring
        centralize(self.cdata1)
        npt.assert_allclose(self.cdata1,
                            np.array([[2, 2, 6],
                                      [4, 4, 2]]))
Beispiel #26
0
def random_noise(nf, ns, hedepth, kappa):
    """ random uniform-lognormal-poisson normally dist. noise """
    x_noise = abs(normal(1, 0.2, (nf, ns)))
    err = np.ones_like(x_noise)
    i = randint(0, err.shape[0], 5000)
    j = randint(0, err.shape[1], 5000)
    err[i, j] = hedepth
    x_noise = abs(normal(x_noise, err))
    mu = hedepth * closure(x_noise.T).T
    y_noise = np.vstack(
        [poisson(lognormal(np.log(mu[:, i]), kappa)) for i in range(ns)]).T
    return y_noise
Beispiel #27
0
    def test_closure(self):

        npt.assert_allclose(closure(self.data1),
                            np.array([[.2, .2, .6],
                                      [.4, .4, .2]]))
        npt.assert_allclose(closure(self.data2),
                            np.array([.2, .2, .6]))
        npt.assert_allclose(closure(self.data5),
                            np.array([[.2, .2, .6],
                                      [.4, .4, .2]]))
        with self.assertRaises(ValueError):
            closure(self.bad1)

        with self.assertRaises(ValueError):
            closure(self.bad2)

        # make sure that inplace modification is not occurring
        closure(self.data2)
        npt.assert_allclose(self.data2, np.array([2, 2, 6]))
Beispiel #28
0
 def test_ilr_inv_basis(self):
     exp = closure(np.array([[1., 10.],
                             [1.14141414, 9.90909091],
                             [1.28282828, 9.81818182],
                             [1.42424242, 9.72727273],
                             [1.56565657, 9.63636364]]))
     basis = np.array([[0.80442968, 0.19557032]])
     table = np.array([[np.log(1/10)*np.sqrt(1/2),
                        np.log(1.14141414 / 9.90909091)*np.sqrt(1/2),
                        np.log(1.28282828 / 9.81818182)*np.sqrt(1/2),
                        np.log(1.42424242 / 9.72727273)*np.sqrt(1/2),
                        np.log(1.56565657 / 9.63636364)*np.sqrt(1/2)]]).T
     res = ilr_inv(table, basis=basis)
     npt.assert_allclose(res, exp)
Beispiel #29
0
def aitchison_transform_part(df, use_multiplicative_replacement = True):
    """
    Aitchison tranformation on df with all columns belonging to same batch.
    
    df should consist of all samples tagged together in one channel (i.e. A549_S_rep1 etc.)
    """
    if use_multiplicative_replacement == True:
        df_aitchison = multiplicative_replacement(df)
    else:
        df_aitchison = closure(df)
    df_idx = df.index
    df_col = df.columns
    df_aitchison = pd.DataFrame(df_aitchison, index = df_idx, columns = df_col)
    return df_aitchison
Beispiel #30
0
 def test_ilr_inv_basis(self):
     exp = closure(np.array([[1., 10.],
                             [1.14141414, 9.90909091],
                             [1.28282828, 9.81818182],
                             [1.42424242, 9.72727273],
                             [1.56565657, 9.63636364]]))
     basis = np.array([[0.80442968, 0.19557032]])
     table = np.array([[np.log(1/10)*np.sqrt(1/2),
                        np.log(1.14141414 / 9.90909091)*np.sqrt(1/2),
                        np.log(1.28282828 / 9.81818182)*np.sqrt(1/2),
                        np.log(1.42424242 / 9.72727273)*np.sqrt(1/2),
                        np.log(1.56565657 / 9.63636364)*np.sqrt(1/2)]]).T
     res = ilr_inv(table, basis=basis)
     npt.assert_allclose(res, exp)
Beispiel #31
0
def partition_metabolites(uU, sigmaU, uV, sigmaV, num_metabolites, latent_dim,
                          microbe_partition, metabolite_in, state):
    """ Split up a single chemical abundances into multiple subspecies.

    Parameters
    ----------
    uU, sigmaU, uV, sigmaV : int, int, int, int
        Parameters for the conditional probability matrix.
    num_microbes : int
        Number of strains to be represented
    num_metabolites : int
        Number of chemicals to be represented
    latent_dim : int
        Number of latent dimensions in conditional probability
        matrix.
    microbe_partition : np.array
        The input microbial abundances for multiple strains.
    metabolite_in : np.array
        The input intensities for a single chemicals
    state : numpy random state
        Random number generator

    Returns
    -------
    U: np.array
        Microbial latent variables.
    V: np.array
        Metabolomic latent variables.
    metabolites_out: np.array
        Multiple chemical abundances.
    """
    num_microbes = microbe_partition.shape[1]
    num_samples = len(metabolite_in)

    U = state.normal(uU, sigmaU, size=(num_microbes, latent_dim))
    V = state.normal(uV, sigmaV, size=(latent_dim, num_metabolites))

    # Randomly generate conditional probability matrices
    # Question : how to incorporate the existing abundances?
    probs = softmax(U @ V)

    # for each submicrobe strain, generate metabolite distribution
    metabolite_partition = closure(microbe_partition @ probs)

    # Return partitioned metabolites
    metabolites_out = np.multiply(metabolite_partition,
                                  metabolite_in.reshape(-1, 1))

    return U, V, metabolites_out
    def test_ilr_inv(self):
        mat = closure(self.cdata7)
        npt.assert_array_almost_equal(ilr_inv(ilr(mat)), mat)

        npt.assert_allclose(ilr_inv(np.identity(3)),
                            self.ortho1,
                            rtol=1e-04,
                            atol=1e-06)

        with self.assertRaises(ValueError):
            ilr_inv(self.cdata1, basis=self.cdata1)

        # make sure that inplace modification is not occurring
        ilr_inv(self.cdata1)
        npt.assert_allclose(self.cdata1, np.array([[2, 2, 6], [4, 4, 2]]))
    def test_ilr_inv(self):
        mat = closure(self.cdata7)
        npt.assert_array_almost_equal(ilr_inv(ilr(mat)), mat)

        npt.assert_allclose(ilr_inv(np.identity(3)), self.ortho1,
                            rtol=1e-04, atol=1e-06)

        with self.assertRaises(ValueError):
            ilr_inv(self.cdata1, basis=self.cdata1)

        # make sure that inplace modification is not occurring
        ilr_inv(self.cdata1)
        npt.assert_allclose(self.cdata1,
                            np.array([[2, 2, 6],
                                      [4, 4, 2]]))
    def test_ilr(self):
        mat = closure(self.cdata7)
        npt.assert_array_almost_equal(ilr(mat),
                                      np.array([0.70710678, 0.40824829]))

        # Should give same result as inner
        npt.assert_allclose(ilr(self.ortho1), np.identity(3),
                            rtol=1e-04, atol=1e-06)

        with self.assertRaises(ValueError):
            ilr(self.cdata1, basis=self.cdata1)

        # make sure that inplace modification is not occurring
        ilr(self.cdata1)
        npt.assert_allclose(self.cdata1,
                            np.array([[2, 2, 6],
                                      [4, 4, 2]]))
    def test_ilr(self):
        mat = closure(self.cdata7)
        npt.assert_array_almost_equal(ilr(mat),
                                      np.array([0.70710678, 0.40824829]))

        # Should give same result as inner
        npt.assert_allclose(ilr(self.ortho1),
                            np.identity(3),
                            rtol=1e-04,
                            atol=1e-06)

        with self.assertRaises(ValueError):
            ilr(self.cdata1, basis=self.cdata1)

        # make sure that inplace modification is not occurring
        ilr(self.cdata1)
        npt.assert_allclose(self.cdata1, np.array([[2, 2, 6], [4, 4, 2]]))
Beispiel #36
0
def multinomial_bioms(k, D, N, M, min_sv=0.11, max_sv=5.0, sigma_sq=0.1):
    """ Simulates biom tables from multinomial.

    Parameters
    ----------
    k : int
       Number of latent dimensions.
    D : int
       Number of microbes.
    N : int
       Number of samples.
    M : int
       Average sequencing depth.

    Returns
    -------
    dict of np.array
       Ground truth parameters.
    """
    dims, hdims, total = D, k, N
    eigs = min_sv + (max_sv - min_sv) * np.linspace(0, 1, hdims)
    eigvectors = ortho_group.rvs(dims - 1)[:, :hdims]
    W = np.matmul(eigvectors, np.diag(np.sqrt(eigs - sigma_sq)))
    sigma_sq = sigma_sq
    sigma = np.sqrt(sigma_sq)
    z = np.random.normal(size=(total, hdims))
    eta = np.random.normal(np.matmul(z, W.T), sigma).astype(np.float32)
    tree = random_linkage(D)
    Psi = _balance_basis(tree)[0]
    prob = closure(np.exp(eta @ Psi))
    depths = np.random.poisson(M, size=N)
    Y = np.vstack([np.random.multinomial(depths[i], prob[i])
                   for i in range(N)])
    return dict(
        sigma=sigma,
        W=W,
        Psi=Psi,
        tree=tree,
        eta=eta,
        z=z,
        Y=Y,
        depths=depths,
        eigs=eigs,
        eigvectors=eigvectors
    )
Beispiel #37
0
 def test_ancom_basic_proportions(self):
     # Converts from counts to proportions
     test_table = pd.DataFrame(closure(self.table1))
     original_table = copy.deepcopy(test_table)
     test_cats = pd.Series(self.cats1)
     original_cats = copy.deepcopy(test_cats)
     result = ancom(test_table,
                    test_cats,
                    multiple_comparisons_correction=None)
     # Test to make sure that the input table hasn't be altered
     assert_data_frame_almost_equal(original_table, test_table)
     # Test to make sure that the input table hasn't be altered
     pdt.assert_series_equal(original_cats, test_cats)
     exp = pd.DataFrame({'W': np.array([5, 5, 2, 2, 2, 2, 2]),
                         'reject': np.array([True, True, False, False,
                                             False, False, False],
                                            dtype=bool)})
     assert_data_frame_almost_equal(result, exp)
Beispiel #38
0
 def test_ancom_basic_proportions(self):
     # Converts from counts to proportions
     test_table = pd.DataFrame(closure(self.table1))
     original_table = copy.deepcopy(test_table)
     test_cats = pd.Series(self.cats1)
     original_cats = copy.deepcopy(test_cats)
     result = ancom(test_table,
                    test_cats,
                    multiple_comparisons_correction=None)
     # Test to make sure that the input table hasn't be altered
     assert_data_frame_almost_equal(original_table, test_table)
     # Test to make sure that the input table hasn't be altered
     pdt.assert_series_equal(original_cats, test_cats)
     exp = pd.DataFrame({'W': np.array([5, 5, 2, 2, 2, 2, 2]),
                         'reject': np.array([True, True, False, False,
                                             False, False, False],
                                            dtype=bool)})
     assert_data_frame_almost_equal(result, exp)
Beispiel #39
0
def split_balance(balance, tree):
    """ Splits a balance into its log ratio components.

    Parameters
    ----------
    balance : pd.Series
        A vector corresponding to a single balance.  These values
        that will be split into its numberator and denominator
        components.

    Returns
    -------
    pd.DataFrame
        Dataframe where the first column contains the numerator and the
        second column contains the denominator of the balance.

    Note
    ----
    The balance must have a name associated with it.
    """
    node = tree.find(balance.name)

    if node.is_tip():
        raise ValueError("%s is not a balance." % balance.name)

    left = node.children[0]
    right = node.children[1]
    if left.is_tip():
        L = 1
    else:
        L = len([n for n in left.tips()])
    if right.is_tip():
        R = 1
    else:
        R = len([n for n in right.tips()])
    b = np.expand_dims(balance.values, axis=1)
    # need to scale down by the number of children in subtrees
    b = np.exp(b / (np.sqrt((L * R) / (L + R))))
    o = np.ones((len(b), 1))
    k = np.hstack((b, o))
    p = closure(k)
    return pd.DataFrame(p,
                        columns=[left.name, right.name],
                        index=balance.index)
Beispiel #40
0
def split_balance(balance, tree):
    """ Splits a balance into its log ratio components.

    Parameters
    ----------
    balance : pd.Series
        A vector corresponding to a single balance.  These values
        that will be split into its numberator and denominator
        components.

    Returns
    -------
    pd.DataFrame
        Dataframe where the first column contains the numerator and the
        second column contains the denominator of the balance.

    Note
    ----
    The balance must have a name associated with it.
    """
    node = tree.find(balance.name)

    if node.is_tip():
        raise ValueError("%s is not a balance." % balance.name)

    left = node.children[0]
    right = node.children[1]
    if left.is_tip():
        L = 1
    else:
        L = len([n for n in left.tips()])
    if right.is_tip():
        R = 1
    else:
        R = len([n for n in right.tips()])
    b = np.expand_dims(balance.values, axis=1)
    # need to scale down by the number of children in subtrees
    b = np.exp(b / (np.sqrt((L*R) / (L + R))))
    o = np.ones((len(b), 1))
    k = np.hstack((b, o))
    p = closure(k)
    return pd.DataFrame(p, columns=[left.name, right.name],
                        index=balance.index)
Beispiel #41
0
 def test_multinomial_sample(self):
     rng = RandomState(0)
     X = np.array([[
         8.76415025e-03, 4.97385694e-02, 1.40955938e-01, 1.99471140e-01,
         1.40955938e-01, 4.97385694e-02, 8.76415025e-03, 7.71139498e-04,
         3.38815049e-05, 7.43359757e-07
     ],
                   [
                       7.43359757e-07, 3.38815049e-05, 7.71139498e-04,
                       8.76415025e-03, 4.97385694e-02, 1.40955938e-01,
                       1.99471140e-01, 1.40955938e-01, 4.97385694e-02,
                       8.76415025e-03
                   ]])
     X = closure(X)
     lam = 5
     res = multinomial_sample(X, lam, rng)
     exp = np.array([[0, 2, 3, 3, 0, 1, 0, 0, 0, 0],
                     [0, 0, 0, 0, 0, 3, 1, 1, 0, 0]])
     npt.assert_allclose(res, exp)
Beispiel #42
0
    def test_ilr_basis_isomorphism(self):
        # tests to make sure that the isomorphism holds
        # with the introduction of the basis.
        basis = np.array([[0.80442968, 0.19557032]])
        table = np.array([[np.log(1/10)*np.sqrt(1/2),
                           np.log(1.14141414 / 9.90909091)*np.sqrt(1/2),
                           np.log(1.28282828 / 9.81818182)*np.sqrt(1/2),
                           np.log(1.42424242 / 9.72727273)*np.sqrt(1/2),
                           np.log(1.56565657 / 9.63636364)*np.sqrt(1/2)]]).T
        res = ilr(ilr_inv(table, basis=basis), basis=basis)
        npt.assert_allclose(res, table.squeeze())

        table = np.array([[1., 10.],
                          [1.14141414, 9.90909091],
                          [1.28282828, 9.81818182],
                          [1.42424242, 9.72727273],
                          [1.56565657, 9.63636364]])

        res = ilr_inv(np.atleast_2d(ilr(table, basis=basis)).T, basis=basis)
        npt.assert_allclose(res, closure(table.squeeze()))
    def test_ilr_basis_isomorphism(self):
        # tests to make sure that the isomorphism holds
        # with the introduction of the basis.
        basis = np.array([[0.80442968, 0.19557032]])
        table = np.array([[
            np.log(1 / 10) * np.sqrt(1 / 2),
            np.log(1.14141414 / 9.90909091) * np.sqrt(1 / 2),
            np.log(1.28282828 / 9.81818182) * np.sqrt(1 / 2),
            np.log(1.42424242 / 9.72727273) * np.sqrt(1 / 2),
            np.log(1.56565657 / 9.63636364) * np.sqrt(1 / 2)
        ]]).T
        res = ilr(ilr_inv(table, basis=basis), basis=basis)
        npt.assert_allclose(res, table.squeeze())

        table = np.array([[1., 10.], [1.14141414, 9.90909091],
                          [1.28282828, 9.81818182], [1.42424242, 9.72727273],
                          [1.56565657, 9.63636364]])

        res = ilr_inv(np.atleast_2d(ilr(table, basis=basis)).T, basis=basis)
        npt.assert_allclose(res, closure(table.squeeze()))
Beispiel #44
0
def variation_matrix(X):
    """ Calculate Aitchison variation matrix.

    This calculates the Aitchison variation matrix.  Given a compositional
    matrix :math:`X`, and columns :math:`i` and :math:`j`, the :math:`ij` entry
    in the variation matrix of :math:`X` is given by

    .. math:
        V_{ij} = \frac{1}{2} var(\ln \frac{x_i}{x_j})

    Parameters
    ----------
    X : pd.DataFrame
        Contingency table where there are n rows corresponding to samples
        and p features corresponding to columns.

    Returns
    -------
    skbio.DistanceMatrix
        Total variation matrix of size n x n.

    References
    ----------
    .. [1] V. Pawlowsky-Glahn, J. J. Egozcue, R. Tolosana-Delgado (2015),
       Modeling and Analysis of Compositional Data, Wiley, Chichester, UK

    .. [2] J. J. Egozcue, V. Pawlowsky-Glahn (2004), Groups of Parts and
       Their Balances in Compositional Data Analysis, Mathematical Geology
    """
    v = np.zeros((X.shape[1], X.shape[1]))
    x = closure(X)
    for i in range(X.shape[1]):
        for j in range(i):
            v[i, j] = np.var(np.log(x[:, i]) - np.log(x[:, j]))
    # Making matrix symmetry since V(ln (x/y) ) = V(ln (y/x) )
    # Also dividing by 2, to ensure unit norm for balances.
    # See Eqn 4 in [2]
    return DistanceMatrix((v + v.T) / 2, ids=X.columns)
Beispiel #45
0
    def split_balance(self, balance_name):
        """ Splits a balance into its log ratio components.

        Parameters
        ----------
        node : str
             Name of internal node in the tree to be retrieved for

        Returns
        -------
        pd.DataFrame
            Dataframe where the first column contains the numerator and the
            second column contains the denominator of the balance.
        """
        node = self.tree.find(balance_name)

        if node.is_tip():
            raise ValueError("%s is not a balance." % balance_name)

        left = node.children[0]
        right = node.children[1]
        if left.is_tip():
            L = 1
        else:
            L = len([n for n in left.tips()])
        if right.is_tip():
            R = 1
        else:
            R = len([n for n in right.tips()])
        b = np.expand_dims(self.balances[balance_name].values, axis=1)
        # need to scale down by the number of children in subtrees
        b = np.exp(b / (np.sqrt((L * R) / (L + R))))
        o = np.ones((len(b), 1))
        k = np.hstack((b, o))
        p = closure(k)
        return pd.DataFrame(p,
                            columns=[left.name, right.name],
                            index=self.balances.index)
Beispiel #46
0
    def test_perturb_inv(self):
        pmat = perturb_inv(closure(self.data1),
                           closure([.1, .1, .1]))
        imat = perturb(closure(self.data1),
                       closure([10, 10, 10]))
        npt.assert_allclose(pmat, imat)
        pmat = perturb_inv(closure(self.data1),
                           closure([1, 1, 1]))
        npt.assert_allclose(pmat,
                            closure([[.2, .2, .6],
                                     [.4, .4, .2]]))
        pmat = perturb_inv(closure(self.data5),
                           closure([.1, .1, .1]))
        imat = perturb(closure(self.data1), closure([10, 10, 10]))
        npt.assert_allclose(pmat, imat)

        with self.assertRaises(ValueError):
            perturb_inv(closure(self.data1), self.bad1)

        # make sure that inplace modification is not occurring
        perturb_inv(self.data2, [1, 2, 3])
        npt.assert_allclose(self.data2, np.array([2, 2, 6]))
Beispiel #47
0
    def test_perturb(self):
        pmat = perturb(closure(self.data1),
                       closure(np.array([1, 1, 1])))
        npt.assert_allclose(pmat,
                            np.array([[.2, .2, .6],
                                      [.4, .4, .2]]))

        pmat = perturb(closure(self.data1),
                       closure(np.array([10, 10, 20])))
        npt.assert_allclose(pmat,
                            np.array([[.125, .125, .75],
                                      [1./3, 1./3, 1./3]]))

        pmat = perturb(closure(self.data1),
                       closure(np.array([10, 10, 20])))
        npt.assert_allclose(pmat,
                            np.array([[.125, .125, .75],
                                      [1./3, 1./3, 1./3]]))

        pmat = perturb(closure(self.data2),
                       closure([1, 2, 1]))
        npt.assert_allclose(pmat, np.array([1./6, 2./6, 3./6]))

        pmat = perturb(closure(self.data5),
                       closure(np.array([1, 1, 1])))
        npt.assert_allclose(pmat,
                            np.array([[.2, .2, .6],
                                      [.4, .4, .2]]))

        with self.assertRaises(ValueError):
            perturb(closure(self.data5), self.bad1)

        # make sure that inplace modification is not occurring
        perturb(self.data2, [1, 2, 3])
        npt.assert_allclose(self.data2, np.array([2, 2, 6]))