コード例 #1
0
ファイル: test_misc.py プロジェクト: BANSHEE-/scikit-bio
    def test_too_few_permutations(self):
        obs = p_value_to_str(self.p_value, 9)
        self.assertEqual(obs, 'Too few permutations to compute p-value '
                              '(permutations = 9)')

        obs = p_value_to_str(self.p_value, 1)
        self.assertEqual(obs, 'Too few permutations to compute p-value '
                              '(permutations = 1)')

        obs = p_value_to_str(self.p_value, 0)
        self.assertEqual(obs, 'Too few permutations to compute p-value '
                              '(permutations = 0)')
コード例 #2
0
ファイル: test_misc.py プロジェクト: BANSHEE-/scikit-bio
    def test_valid_input(self):
        obs = p_value_to_str(self.p_value, 100)
        self.assertEqual(obs, '0.12')

        obs = p_value_to_str(self.p_value, 250)
        self.assertEqual(obs, '0.12')

        obs = p_value_to_str(self.p_value, 1000)
        self.assertEqual(obs, '0.119')

        obs = p_value_to_str(0.0055623489, 999)
        self.assertEqual(obs, '0.006')
コード例 #3
0
ファイル: _mantel.py プロジェクト: BANSHEE-/scikit-bio
def pwmantel(dms, labels=None, strict=True, lookup=None, method='pearson',
             permutations=999, alternative='two-sided'):
    """Run Mantel tests for every pair of distance matrices.

    Runs a Mantel test for each pair of distance matrices and collates the
    results in a data frame. Distance matrices do not need to be in the same
    ID order (contrary to how the ``mantel`` function behaves). Distance
    matrices will be re-ordered prior to running each pairwise test, and if
    ``strict=False``, IDs that don't match between a pair of distance matrices
    will be dropped prior to running the test (otherwise a ``ValueError`` will
    be raised if there are non-matching IDs between any pair of distance
    matrices).

    Parameters
    ----------
    dms : iterable of DistanceMatrix objects
        DistanceMatrix instances to perform pairwise Mantel tests upon.
    labels : iterable of str or int, optional
        Labels for each ``DistanceMatrix`` in `dms`. These are
        used in the results data frame to identify the pair of distance
        matrices used in a pairwise Mantel test. If ``None``, defaults to
        monotonically-increasing integers starting at zero.
    strict : bool, optional
        If ``True``, raises a ``ValueError`` if IDs are found that do not exist
        in both distance matrices for the current pairwise test. If ``False``,
        these "extra" (nonmatching) IDs are discarded before running the
        pairwise Mantel test.
    lookup : dict, optional
        Maps each ID in the distance matrices to a new ID. Used to match up IDs
        across distance matrices prior to running the Mantel test. If the IDs
        already match between the distance matrices in `dms`, this parameter is
        not necessary.
    method : {'pearson', 'spearman'}
        Correlation method. See ``mantel`` function for more details.
    permutations : int, optional
        Number of permutations. See ``mantel`` function for more details.
    alternative : {'two-sided', 'greater', 'less'}
        Alternative hypothesis. See ``mantel`` function for more details.

    Returns
    -------
    pandas.DataFrame
        Data frame containing the results of each pairwise test (one per row).
        Includes the number of objects considered in each test as column ``n``
        (after applying `lookup` and filtering non-matching IDs if
        ``strict=False``). Column ``p-value`` has the p-values formatted as
        strings with the correct number of decimal places, or ``N/A`` if a
        p-value could not be computed.

    See Also
    --------
    mantel

    """
    num_dms = len(dms)

    if num_dms < 2:
        raise ValueError("Must provide at least two distance matrices.")

    for dm in dms:
        if not isinstance(dm, DistanceMatrix):
            raise TypeError("Must provide DistanceMatrix instances as input.")

    if labels is None:
        labels = range(num_dms)
    else:
        if num_dms != len(labels):
            raise ValueError("Number of labels must match the number of "
                             "distance matrices.")
        if len(set(labels)) != len(labels):
            raise ValueError("Labels must be unique.")

    num_combs = scipy.misc.comb(num_dms, 2, exact=True)
    results_dtype = [('dm1', object), ('dm2', object), ('statistic', float),
                     ('p-value', object), ('n', int), ('method', object),
                     ('permutations', int), ('alternative', object)]
    results = np.empty(num_combs, dtype=results_dtype)

    for i, pair in enumerate(combinations(zip(labels, dms), 2)):
        (xlabel, x), (ylabel, y) = pair

        x, y = _order_dms(x, y, strict=strict, lookup=lookup)

        stat, p_val = mantel(x, y, method=method, permutations=permutations,
                             alternative=alternative)

        p_val_str = p_value_to_str(p_val, permutations)
        results[i] = (xlabel, ylabel, stat, p_val_str, x.shape[0], method,
                      permutations, alternative)

    return pd.DataFrame.from_records(results, index=('dm1', 'dm2'))
コード例 #4
0
ファイル: base.py プロジェクト: BANSHEE-/scikit-bio
    def _format_data(self):
        p_value_str = p_value_to_str(self.p_value, self.permutations)

        return (self.short_method_name, '%d' % self.sample_size,
                '%d' % len(self.groups), str(self.statistic), p_value_str,
                '%d' % self.permutations)
コード例 #5
0
ファイル: test_misc.py プロジェクト: BANSHEE-/scikit-bio
    def test_missing_or_invalid_p_value(self):
        obs = p_value_to_str(None, 0)
        self.assertEqual(obs, 'N/A')

        obs = p_value_to_str(np.nan, 0)
        self.assertEqual(obs, 'N/A')