コード例 #1
0
    def test_too_few_permutations(self):
        obs = p_value_to_str(self.p_value, 9)
        self.assertEqual(obs, 'Too few permutations to compute p-value '
                              '(permutations = 9)')

        obs = p_value_to_str(self.p_value, 1)
        self.assertEqual(obs, 'Too few permutations to compute p-value '
                              '(permutations = 1)')

        obs = p_value_to_str(self.p_value, 0)
        self.assertEqual(obs, 'Too few permutations to compute p-value '
                              '(permutations = 0)')
コード例 #2
0
    def test_valid_input(self):
        obs = p_value_to_str(self.p_value, 100)
        self.assertEqual(obs, '0.12')

        obs = p_value_to_str(self.p_value, 250)
        self.assertEqual(obs, '0.12')

        obs = p_value_to_str(self.p_value, 1000)
        self.assertEqual(obs, '0.119')

        obs = p_value_to_str(0.0055623489, 999)
        self.assertEqual(obs, '0.006')
コード例 #3
0
    def test_valid_input(self):
        obs = p_value_to_str(self.p_value, 100)
        self.assertEqual(obs, '0.12')

        obs = p_value_to_str(self.p_value, 250)
        self.assertEqual(obs, '0.12')

        obs = p_value_to_str(self.p_value, 1000)
        self.assertEqual(obs, '0.119')

        obs = p_value_to_str(0.0055623489, 999)
        self.assertEqual(obs, '0.006')
コード例 #4
0
    def test_too_few_permutations(self):
        obs = p_value_to_str(self.p_value, 9)
        self.assertEqual(
            obs, 'Too few permutations to compute p-value '
            '(permutations = 9)')

        obs = p_value_to_str(self.p_value, 1)
        self.assertEqual(
            obs, 'Too few permutations to compute p-value '
            '(permutations = 1)')

        obs = p_value_to_str(self.p_value, 0)
        self.assertEqual(
            obs, 'Too few permutations to compute p-value '
            '(permutations = 0)')
コード例 #5
0
def run_mantel_test(method,
                    fps,
                    distmats,
                    num_perms,
                    tail_type,
                    comment,
                    control_dm_fp=None,
                    control_dm=None,
                    sample_id_map=None):
    """Runs a Mantel test on all pairs of distance matrices.

    Returns a string suitable for writing out to a file containing the results
    of the test.

    WARNING: Only symmetric, hollow distance matrices may be used as input.
    Asymmetric distance matrices, such as those obtained by the UniFrac Gain
    metric (i.e. beta_diversity.py -m unifrac_g), should not be used as input.

    Arguments:
        method - which Mantel test to run (either 'mantel' or 'partial_mantel')
        fps - list of filepaths of the distance matrices
        distmats - list of tuples containing dm labels and dm data (i.e. the
            output of parse_distmat)
        num_perms - the number of permutations to use to calculate the
            p-value(s)
        tail_type - the type of tail test to use when calculating the
            p-value(s). Can be 'two-sided', 'greater', or 'less'. Only applies
            when method is mantel
        comment - comment string to add to the beginning of the results string
        control_dm_fp - filepath of the control distance matrix. Only applies
            when method is partial_mantel (it is required then)
        control_dm - tuple containing control distance matrix labels and matrix
            data. Only applies when method is partial_mantel (it is required
            then)
        sample_id_map - dict mapping sample IDs (i.e. what is expected by
            make_compatible_distance_matrices)
    """
    if len(fps) != len(distmats):
        raise ValueError("Must provide the same number of filepaths as there "
                         "are distance matrices.")
    if comment is None:
        comment = ''
    result = comment

    if method == 'mantel':
        result += 'DM1\tDM2\tNumber of entries\tMantel r statistic\t' + \
                  'p-value\tNumber of permutations\tTail type\n'
    elif method == 'partial_mantel':
        if not control_dm_fp or not control_dm:
            raise ValueError("You must provide a control matrix filepath and "
                             "control matrix when running the partial Mantel "
                             "test.")
        result += 'DM1\tDM2\tCDM\tNumber of entries\t' + \
            'Mantel r statistic\tp-value\tNumber of permutations\t' +\
            'Tail type\n'
    else:
        raise ValueError("Invalid method '%s'. Must be either 'mantel' or "
                         "'partial_mantel'." % method)

    # Loop over all pairs of dms.
    for i, (fp1, (dm1_labels, dm1_data)) in enumerate(zip(fps, distmats)):
        for fp2, (dm2_labels, dm2_data) in zip(fps, distmats)[i + 1:]:
            # Make the current pair of distance matrices compatible by only
            # keeping samples that match between them, and ordering them by
            # the same sample IDs.
            (dm1_labels, dm1_data), (dm2_labels, dm2_data) = \
                make_compatible_distance_matrices((dm1_labels, dm1_data),
                                                  (dm2_labels, dm2_data), lookup=sample_id_map)
            if method == 'partial_mantel':
                # We need to intersect three sets (three matrices).
                (dm1_labels, dm1_data), (cdm_labels, cdm_data) = \
                    make_compatible_distance_matrices(
                        (dm1_labels, dm1_data), control_dm,
                        lookup=sample_id_map)
                (dm1_labels, dm1_data), (dm2_labels, dm2_data) = \
                    make_compatible_distance_matrices(
                        (dm1_labels, dm1_data), (dm2_labels, dm2_data),
                        lookup=sample_id_map)
                if len(dm1_labels) < 3:
                    result += '%s\t%s\t%s\t%d\tToo few samples\n' % (
                        fp1, fp2, control_dm_fp, len(dm1_labels))
                    continue
            elif len(dm1_labels) < 3:
                result += '%s\t%s\t%d\tToo few samples\n' % (fp1, fp2,
                                                             len(dm1_labels))
                continue

            dm1 = DistanceMatrix(dm1_data, dm1_labels)
            dm2 = DistanceMatrix(dm2_data, dm2_labels)

            if method == 'mantel':
                corr_coeff, p_value, n = mantel(dm1,
                                                dm2,
                                                method='pearson',
                                                permutations=num_perms,
                                                alternative=tail_type,
                                                strict=True)
                p_str = p_value_to_str(p_value, num_perms)
                result += "%s\t%s\t%d\t%.5f\t%s\t%d\t%s\n" % (
                    fp1, fp2, n, corr_coeff, p_str, num_perms, tail_type)
            elif method == 'partial_mantel':
                cdm = DistanceMatrix(cdm_data, cdm_labels)
                results = PartialMantel(dm1, dm2, cdm)(num_perms)
                p_str = p_value_to_str(results['mantel_p'], num_perms)
                result += "%s\t%s\t%s\t%d\t%.5f\t%s\t%d\t%s\n" % (
                    fp1, fp2, control_dm_fp, len(dm1_labels),
                    results['mantel_r'], p_str, num_perms, 'greater')
    return result
コード例 #6
0
def run_mantel_correlogram(fps,
                           distmats,
                           num_perms,
                           comment,
                           alpha,
                           sample_id_map=None,
                           variable_size_distance_classes=False):
    """Runs a Mantel correlogram analysis on all pairs of distance matrices.

    Returns a string suitable for writing out to a file containing the results
    of the test, a list of correlogram filepath names, and a list of matplotlib
    Figure objects representing each correlogram.

    The correlogram filepaths can have an extension string appended to the end
    of them and then be used to save each of the correlogram Figures to a file.
    Each correlogram filepath will be a combination of the two distance matrix
    filepaths that were used to create it.

    WARNING: Only symmetric, hollow distance matrices may be used as input.
    Asymmetric distance matrices, such as those obtained by the UniFrac Gain
    metric (i.e. beta_diversity.py -m unifrac_g), should not be used as input.

    Arguments:
        fps - list of filepaths of the distance matrices
        distmats - list of tuples containing dm labels and dm data (i.e. the
            output of parse_distmat)
        num_perms - the number of permutations to use to calculate the
            p-value(s)
        comment - comment string to add to the beginning of the results string
        alpha - the alpha value to use to determine significance in the
            correlogram plots
        sample_id_map - dict mapping sample IDs (i.e. what is expected by
            make_compatible_distance_matrices)
        variable_size_distance_classes - create distance classes that vary in
            size (i.e. width) but have the same number of distances in each
            class
    """
    if len(fps) != len(distmats):
        raise ValueError("Must provide the same number of filepaths as there "
                         "are distance matrices.")
    if comment is None:
        comment = ''
    result = comment + 'DM1\tDM2\tNumber of entries\t' + \
                       'Number of permutations\tClass index\t' + \
                       'Number of distances\tMantel r statistic\t' + \
                       'p-value\tp-value (Bonferroni corrected)\tTail type\n'
    correlogram_fps = []
    correlograms = []

    # Loop over all pairs of dms.
    for i, (fp1, (dm1_labels, dm1_data)) in enumerate(zip(fps, distmats)):
        for fp2, (dm2_labels, dm2_data) in zip(fps, distmats)[i + 1:]:
            # Make the current pair of distance matrices compatible by only
            # keeping samples that match between them, and ordering them by
            # the same sample IDs.
            (dm1_labels, dm1_data), (dm2_labels, dm2_data) = \
                make_compatible_distance_matrices((dm1_labels, dm1_data),
                                                  (dm2_labels, dm2_data), lookup=sample_id_map)
            if len(dm1_labels) < 3:
                result += '%s\t%s\t%d\tToo few samples\n' % (fp1, fp2,
                                                             len(dm1_labels))
                continue

            dm1 = DistanceMatrix(dm1_data, dm1_labels)
            dm2 = DistanceMatrix(dm2_data, dm2_labels)

            # Create an instance of our Mantel correlogram test and run it with
            # the specified number of permutations.
            mc = MantelCorrelogram(
                dm1,
                dm2,
                alpha=alpha,
                variable_size_distance_classes=variable_size_distance_classes)
            results = mc(num_perms)

            # Generate a name for the current correlogram and save it and the
            # correlogram itself.
            dm1_name = path.basename(fp1)
            dm2_name = path.basename(fp2)
            correlogram_fps.append('_'.join((dm1_name, 'AND', dm2_name,
                                             'mantel_correlogram')) + '.')
            correlograms.append(results['correlogram_plot'])

            # Iterate over the results and write them to the text file.
            first_time = True
            for class_idx, num_dist, r, p, p_corr in zip(
                    results['class_index'], results['num_dist'],
                    results['mantel_r'], results['mantel_p'],
                    results['mantel_p_corr']):
                # Format p-values and figure out which tail type we have based
                # on the sign of r.
                p_str = None
                if p is not None:
                    p_str = p_value_to_str(p, num_perms)
                p_corr_str = None
                if p_corr is not None:
                    p_corr_str = p_value_to_str(p_corr, num_perms)
                if r is None:
                    tail_type = None
                elif r < 0:
                    tail_type = 'less'
                else:
                    tail_type = 'greater'

                if first_time:
                    result += '%s\t%s\t%d\t%d\t%s\t%d\t%s\t%s\t%s\t%s\n' % (
                        fp1, fp2, len(dm1_labels), num_perms, class_idx,
                        num_dist, r, p_str, p_corr_str, tail_type)
                    first_time = False
                else:
                    result += '\t\t\t\t%s\t%d\t%s\t%s\t%s\t%s\n' % (
                        class_idx, num_dist, r, p_str, p_corr_str, tail_type)
    return result, correlogram_fps, correlograms
コード例 #7
0
ファイル: _mantel.py プロジェクト: kschwarzberg/scikit-bio
def pwmantel(dms, labels=None, strict=True, lookup=None, method='pearson',
             permutations=999, alternative='two-sided'):
    """Run Mantel tests for every pair of distance matrices.

    Runs a Mantel test for each pair of distance matrices and collates the
    results in a data frame. Distance matrices do not need to be in the same
    ID order (contrary to how the ``mantel`` function behaves). Distance
    matrices will be re-ordered prior to running each pairwise test, and if
    ``strict=False``, IDs that don't match between a pair of distance matrices
    will be dropped prior to running the test (otherwise a ``ValueError`` will
    be raised if there are non-matching IDs between any pair of distance
    matrices).

    Parameters
    ----------
    dms : iterable of DistanceMatrix objects
        DistanceMatrix instances to perform pairwise Mantel tests upon.
    labels : iterable of str or int, optional
        Labels for each ``DistanceMatrix`` in `dms`. These are
        used in the results data frame to identify the pair of distance
        matrices used in a pairwise Mantel test. If ``None``, defaults to
        monotonically-increasing integers starting at zero.
    strict : bool, optional
        If ``True``, raises a ``ValueError`` if IDs are found that do not exist
        in both distance matrices for the current pairwise test. If ``False``,
        these "extra" (nonmatching) IDs are discarded before running the
        pairwise Mantel test.
    lookup : dict, optional
        Maps each ID in the distance matrices to a new ID. Used to match up IDs
        across distance matrices prior to running the Mantel test. If the IDs
        already match between the distance matrices in `dms`, this parameter is
        not necessary.
    method : {'pearson', 'spearman'}
        Correlation method. See ``mantel`` function for more details.
    permutations : int, optional
        Number of permutations. See ``mantel`` function for more details.
    alternative : {'two-sided', 'greater', 'less'}
        Alternative hypothesis. See ``mantel`` function for more details.

    Returns
    -------
    pandas.DataFrame
        Data frame containing the results of each pairwise test (one per row).
        Includes the number of objects considered in each test as column ``n``
        (after applying `lookup` and filtering non-matching IDs if
        ``strict=False``). Column ``p-value`` has the p-values formatted as
        strings with the correct number of decimal places, or ``N/A`` if a
        p-value could not be computed.

    See Also
    --------
    mantel

    """
    num_dms = len(dms)

    if num_dms < 2:
        raise ValueError("Must provide at least two distance matrices.")

    for dm in dms:
        if not isinstance(dm, DistanceMatrix):
            raise TypeError("Must provide DistanceMatrix instances as input.")

    if labels is None:
        labels = range(num_dms)
    else:
        if num_dms != len(labels):
            raise ValueError("Number of labels must match the number of "
                             "distance matrices.")
        if len(set(labels)) != len(labels):
            raise ValueError("Labels must be unique.")

    num_combs = scipy.misc.comb(num_dms, 2, exact=True)
    results_dtype = [('dm1', object), ('dm2', object), ('statistic', float),
                     ('p-value', object), ('n', int), ('method', object),
                     ('permutations', int), ('alternative', object)]
    results = np.empty(num_combs, dtype=results_dtype)

    for i, pair in enumerate(combinations(zip(labels, dms), 2)):
        (xlabel, x), (ylabel, y) = pair

        x, y = _order_dms(x, y, strict=strict, lookup=lookup)

        stat, p_val = mantel(x, y, method=method, permutations=permutations,
                             alternative=alternative)

        p_val_str = p_value_to_str(p_val, permutations)
        results[i] = (xlabel, ylabel, stat, p_val_str, x.shape[0], method,
                      permutations, alternative)

    return pd.DataFrame.from_records(results, index=('dm1', 'dm2'))
コード例 #8
0
ファイル: _base.py プロジェクト: creageng/scikit-bio
    def _format_data(self):
        p_value_str = p_value_to_str(self.p_value, self.permutations)

        return (self.short_method_name, '%d' % self.sample_size,
                '%d' % len(self.groups), str(self.statistic), p_value_str,
                '%d' % self.permutations)
コード例 #9
0
    def test_missing_or_invalid_p_value(self):
        obs = p_value_to_str(None, 0)
        self.assertEqual(obs, 'N/A')

        obs = p_value_to_str(np.nan, 0)
        self.assertEqual(obs, 'N/A')
コード例 #10
0
    def _format_data(self):
        p_value_str = p_value_to_str(self.p_value, self.permutations)

        return (self.short_method_name,
                '%d' % self.sample_size, '%d' % len(self.groups),
                str(self.statistic), p_value_str, '%d' % self.permutations)
コード例 #11
0
def run_mantel_test(method, fps, distmats, num_perms, tail_type, comment,
                    control_dm_fp=None, control_dm=None,
                    sample_id_map=None):
    """Runs a Mantel test on all pairs of distance matrices.

    Returns a string suitable for writing out to a file containing the results
    of the test.

    WARNING: Only symmetric, hollow distance matrices may be used as input.
    Asymmetric distance matrices, such as those obtained by the UniFrac Gain
    metric (i.e. beta_diversity.py -m unifrac_g), should not be used as input.

    Arguments:
        method - which Mantel test to run (either 'mantel' or 'partial_mantel')
        fps - list of filepaths of the distance matrices
        distmats - list of tuples containing dm labels and dm data (i.e. the
            output of parse_distmat)
        num_perms - the number of permutations to use to calculate the
            p-value(s)
        tail_type - the type of tail test to use when calculating the
            p-value(s). Can be 'two-sided', 'greater', or 'less'. Only applies
            when method is mantel
        comment - comment string to add to the beginning of the results string
        control_dm_fp - filepath of the control distance matrix. Only applies
            when method is partial_mantel (it is required then)
        control_dm - tuple containing control distance matrix labels and matrix
            data. Only applies when method is partial_mantel (it is required
            then)
        sample_id_map - dict mapping sample IDs (i.e. what is expected by
            make_compatible_distance_matrices)
    """
    if len(fps) != len(distmats):
        raise ValueError("Must provide the same number of filepaths as there "
                         "are distance matrices.")
    if comment is None:
        comment = ''
    result = comment

    if method == 'mantel':
        result += 'DM1\tDM2\tNumber of entries\tMantel r statistic\t' + \
                  'p-value\tNumber of permutations\tTail type\n'
    elif method == 'partial_mantel':
        if not control_dm_fp or not control_dm:
            raise ValueError("You must provide a control matrix filepath and "
                             "control matrix when running the partial Mantel "
                             "test.")
        result += 'DM1\tDM2\tCDM\tNumber of entries\t' + \
            'Mantel r statistic\tp-value\tNumber of permutations\t' +\
            'Tail type\n'
    else:
        raise ValueError("Invalid method '%s'. Must be either 'mantel' or "
                         "'partial_mantel'." % method)

    # Loop over all pairs of dms.
    for i, (fp1, (dm1_labels, dm1_data)) in enumerate(zip(fps, distmats)):
        for fp2, (dm2_labels, dm2_data) in zip(fps, distmats)[i + 1:]:
            # Make the current pair of distance matrices compatible by only
            # keeping samples that match between them, and ordering them by
            # the same sample IDs.
            (dm1_labels, dm1_data), (dm2_labels, dm2_data) = \
                make_compatible_distance_matrices((dm1_labels, dm1_data),
                                                  (dm2_labels, dm2_data), lookup=sample_id_map)
            if method == 'partial_mantel':
                # We need to intersect three sets (three matrices).
                (dm1_labels, dm1_data), (cdm_labels, cdm_data) = \
                    make_compatible_distance_matrices(
                        (dm1_labels, dm1_data), control_dm,
                        lookup=sample_id_map)
                (dm1_labels, dm1_data), (dm2_labels, dm2_data) = \
                    make_compatible_distance_matrices(
                        (dm1_labels, dm1_data), (dm2_labels, dm2_data),
                        lookup=sample_id_map)
                if len(dm1_labels) < 3:
                    result += '%s\t%s\t%s\t%d\tToo few samples\n' % (fp1,
                                                                     fp2, control_dm_fp, len(dm1_labels))
                    continue
            elif len(dm1_labels) < 3:
                result += '%s\t%s\t%d\tToo few samples\n' % (fp1, fp2,
                                                             len(dm1_labels))
                continue

            dm1 = DistanceMatrix(dm1_data, dm1_labels)
            dm2 = DistanceMatrix(dm2_data, dm2_labels)

            if method == 'mantel':
                corr_coeff, p_value, n = mantel(dm1, dm2, method='pearson',
                                 permutations=num_perms, alternative=tail_type,
                                 strict=True)
                p_str = p_value_to_str(p_value, num_perms)
                result += "%s\t%s\t%d\t%.5f\t%s\t%d\t%s\n" % (
                    fp1, fp2, n, corr_coeff, p_str, num_perms, tail_type)
            elif method == 'partial_mantel':
                cdm = DistanceMatrix(cdm_data, cdm_labels)
                results = PartialMantel(dm1, dm2, cdm)(num_perms)
                p_str = p_value_to_str(results['mantel_p'], num_perms)
                result += "%s\t%s\t%s\t%d\t%.5f\t%s\t%d\t%s\n" % (
                    fp1, fp2, control_dm_fp, len(dm1_labels),
                    results['mantel_r'], p_str, num_perms, 'greater')
    return result
コード例 #12
0
def run_mantel_correlogram(fps, distmats, num_perms, comment, alpha,
                           sample_id_map=None,
                           variable_size_distance_classes=False):
    """Runs a Mantel correlogram analysis on all pairs of distance matrices.

    Returns a string suitable for writing out to a file containing the results
    of the test, a list of correlogram filepath names, and a list of matplotlib
    Figure objects representing each correlogram.

    The correlogram filepaths can have an extension string appended to the end
    of them and then be used to save each of the correlogram Figures to a file.
    Each correlogram filepath will be a combination of the two distance matrix
    filepaths that were used to create it.

    WARNING: Only symmetric, hollow distance matrices may be used as input.
    Asymmetric distance matrices, such as those obtained by the UniFrac Gain
    metric (i.e. beta_diversity.py -m unifrac_g), should not be used as input.

    Arguments:
        fps - list of filepaths of the distance matrices
        distmats - list of tuples containing dm labels and dm data (i.e. the
            output of parse_distmat)
        num_perms - the number of permutations to use to calculate the
            p-value(s)
        comment - comment string to add to the beginning of the results string
        alpha - the alpha value to use to determine significance in the
            correlogram plots
        sample_id_map - dict mapping sample IDs (i.e. what is expected by
            make_compatible_distance_matrices)
        variable_size_distance_classes - create distance classes that vary in
            size (i.e. width) but have the same number of distances in each
            class
    """
    if len(fps) != len(distmats):
        raise ValueError("Must provide the same number of filepaths as there "
                         "are distance matrices.")
    if comment is None:
        comment = ''
    result = comment + 'DM1\tDM2\tNumber of entries\t' + \
                       'Number of permutations\tClass index\t' + \
                       'Number of distances\tMantel r statistic\t' + \
                       'p-value\tp-value (Bonferroni corrected)\tTail type\n'
    correlogram_fps = []
    correlograms = []

    # Loop over all pairs of dms.
    for i, (fp1, (dm1_labels, dm1_data)) in enumerate(zip(fps, distmats)):
        for fp2, (dm2_labels, dm2_data) in zip(fps, distmats)[i + 1:]:
            # Make the current pair of distance matrices compatible by only
            # keeping samples that match between them, and ordering them by
            # the same sample IDs.
            (dm1_labels, dm1_data), (dm2_labels, dm2_data) = \
                make_compatible_distance_matrices((dm1_labels, dm1_data),
                                                  (dm2_labels, dm2_data), lookup=sample_id_map)
            if len(dm1_labels) < 3:
                result += '%s\t%s\t%d\tToo few samples\n' % (fp1, fp2,
                                                             len(dm1_labels))
                continue

            dm1 = DistanceMatrix(dm1_data, dm1_labels)
            dm2 = DistanceMatrix(dm2_data, dm2_labels)

            # Create an instance of our Mantel correlogram test and run it with
            # the specified number of permutations.
            mc = MantelCorrelogram(dm1, dm2, alpha=alpha,
                                   variable_size_distance_classes=variable_size_distance_classes)
            results = mc(num_perms)

            # Generate a name for the current correlogram and save it and the
            # correlogram itself.
            dm1_name = path.basename(fp1)
            dm2_name = path.basename(fp2)
            correlogram_fps.append('_'.join((dm1_name, 'AND', dm2_name,
                                             'mantel_correlogram')) + '.')
            correlograms.append(results['correlogram_plot'])

            # Iterate over the results and write them to the text file.
            first_time = True
            for class_idx, num_dist, r, p, p_corr in zip(
                    results['class_index'], results['num_dist'],
                    results['mantel_r'], results['mantel_p'],
                    results['mantel_p_corr']):
                # Format p-values and figure out which tail type we have based
                # on the sign of r.
                p_str = None
                if p is not None:
                    p_str = p_value_to_str(p, num_perms)
                p_corr_str = None
                if p_corr is not None:
                    p_corr_str = p_value_to_str(p_corr, num_perms)
                if r is None:
                    tail_type = None
                elif r < 0:
                    tail_type = 'less'
                else:
                    tail_type = 'greater'

                if first_time:
                    result += '%s\t%s\t%d\t%d\t%s\t%d\t%s\t%s\t%s\t%s\n' % (
                        fp1, fp2, len(dm1_labels), num_perms, class_idx,
                        num_dist, r, p_str, p_corr_str, tail_type)
                    first_time = False
                else:
                    result += '\t\t\t\t%s\t%d\t%s\t%s\t%s\t%s\n' % (class_idx,
                                                                    num_dist, r, p_str, p_corr_str, tail_type)
    return result, correlogram_fps, correlograms
コード例 #13
0
def pwmantel(dms,
             labels=None,
             strict=True,
             lookup=None,
             method='pearson',
             permutations=999,
             alternative='two-sided'):
    """Run Mantel tests for every pair of distance matrices.

    Runs a Mantel test for each pair of distance matrices and collates the
    results in a data frame. Distance matrices do not need to be in the same
    ID order (contrary to how the ``mantel`` function behaves). Distance
    matrices will be re-ordered prior to running each pairwise test, and if
    ``strict=False``, IDs that don't match between a pair of distance matrices
    will be dropped prior to running the test (otherwise a ``ValueError`` will
    be raised if there are non-matching IDs between any pair of distance
    matrices).

    Parameters
    ----------
    dms : iterable of DistanceMatrix objects
        DistanceMatrix instances to perform pairwise Mantel tests upon.
    labels : iterable of str or int, optional
        Labels for each ``DistanceMatrix`` in `dms`. These are
        used in the results data frame to identify the pair of distance
        matrices used in a pairwise Mantel test. If ``None``, defaults to
        monotonically-increasing integers starting at zero.
    strict : bool, optional
        If ``True``, raises a ``ValueError`` if IDs are found that do not exist
        in both distance matrices for the current pairwise test. If ``False``,
        these "extra" (nonmatching) IDs are discarded before running the
        pairwise Mantel test.
    lookup : dict, optional
        Maps each ID in the distance matrices to a new ID. Used to match up IDs
        across distance matrices prior to running the Mantel test. If the IDs
        already match between the distance matrices in `dms`, this parameter is
        not necessary.
    method : {'pearson', 'spearman'}
        Correlation method. See ``mantel`` function for more details.
    permutations : int, optional
        Number of permutations. See ``mantel`` function for more details.
    alternative : {'two-sided', 'greater', 'less'}
        Alternative hypothesis. See ``mantel`` function for more details.

    Returns
    -------
    pandas.DataFrame
        Data frame containing the results of each pairwise test (one per row).
        Includes the number of objects considered in each test as column ``n``
        (after applying `lookup` and filtering non-matching IDs if
        ``strict=False``). Column ``p-value`` has the p-values formatted as
        strings with the correct number of decimal places, or ``N/A`` if a
        p-value could not be computed.

    See Also
    --------
    mantel

    """
    num_dms = len(dms)

    if num_dms < 2:
        raise ValueError("Must provide at least two distance matrices.")

    for dm in dms:
        if not isinstance(dm, DistanceMatrix):
            raise TypeError("Must provide DistanceMatrix instances as input.")

    if labels is None:
        labels = range(num_dms)
    else:
        if num_dms != len(labels):
            raise ValueError("Number of labels must match the number of "
                             "distance matrices.")
        if len(set(labels)) != len(labels):
            raise ValueError("Labels must be unique.")

    num_combs = scipy.misc.comb(num_dms, 2, exact=True)
    results_dtype = [('dm1', object), ('dm2', object), ('statistic', float),
                     ('p-value', object), ('n', int), ('method', object),
                     ('permutations', int), ('alternative', object)]
    results = np.empty(num_combs, dtype=results_dtype)

    for i, pair in enumerate(combinations(zip(labels, dms), 2)):
        (xlabel, x), (ylabel, y) = pair

        x, y = _order_dms(x, y, strict=strict, lookup=lookup)

        stat, p_val = mantel(x,
                             y,
                             method=method,
                             permutations=permutations,
                             alternative=alternative)

        p_val_str = p_value_to_str(p_val, permutations)
        results[i] = (xlabel, ylabel, stat, p_val_str, x.shape[0], method,
                      permutations, alternative)

    return pd.DataFrame.from_records(results, index=('dm1', 'dm2'))
コード例 #14
0
    def test_missing_or_invalid_p_value(self):
        obs = p_value_to_str(None, 0)
        self.assertEqual(obs, 'N/A')

        obs = p_value_to_str(np.nan, 0)
        self.assertEqual(obs, 'N/A')
コード例 #15
0
def pwmantel(dms, labels=None, method='pearson', permutations=999,
             alternative='two-sided', strict=True, lookup=None):
    """Run Mantel tests for every pair of distance matrices.

    Runs a Mantel test for each pair of distance matrices and collates the
    results in a ``DataFrame``. Distance matrices do not need to be in the same
    ID order if they are ``DistanceMatrix`` instances. Distance matrices will
    be re-ordered prior to running each pairwise test, and if ``strict=False``,
    IDs that don't match between a pair of distance matrices will be dropped
    prior to running the test (otherwise a ``ValueError`` will be raised if
    there are nonmatching IDs between any pair of distance matrices).

    Parameters
    ----------
    dms : iterable of DistanceMatrix objects or array_like objects
        Distance matrices to perform pairwise Mantel tests upon. If they are
        ``array_like`` (but not ``DistanceMatrix`` instances), no
        reordering/matching of IDs will be performed.
    labels : iterable of str or int, optional
        Labels for each distance matrix in `dms`. These are used in the results
        ``DataFrame`` to identify the pair of distance matrices used in a
        pairwise Mantel test. If ``None``, defaults to monotonically-increasing
        integers starting at zero.
    method : {'pearson', 'spearman'}
        Correlation method. See ``mantel`` function for more details.
    permutations : int, optional
        Number of permutations. See ``mantel`` function for more details.
    alternative : {'two-sided', 'greater', 'less'}
        Alternative hypothesis. See ``mantel`` function for more details.
    strict : bool, optional
        Handling of nonmatching IDs. See ``mantel`` function for more details.
    lookup : dict, optional
        Map existing IDs to new IDs. See ``mantel`` function for more details.

    Returns
    -------
    pandas.DataFrame
        ``DataFrame`` containing the results of each pairwise test (one per
        row). Includes the number of objects considered in each test as column
        ``n`` (after applying `lookup` and filtering nonmatching IDs if
        ``strict=False``). Column ``p-value`` has the p-values formatted as
        strings with the correct number of decimal places, or ``N/A`` if a
        p-value could not be computed.

    See Also
    --------
    mantel

    Examples
    --------
    Import the functionality we'll use in the following examples. The call to
    ``pd.set_option`` ensures consistent ``DataFrame`` formatting across
    different versions of pandas. This call is not necessary for normal
    use; it is only included here so that the doctests will pass.

    >>> import pandas as pd
    >>> from skbio import DistanceMatrix
    >>> from skbio.stats.distance import pwmantel
    >>> try:
    ...     # not necessary for normal use
    ...     pd.set_option('show_dimensions', True)
    ... except KeyError:
    ...     pass

    Define three 3x3 distance matrices:

    >>> x = DistanceMatrix([[0, 1, 2],
    ...                     [1, 0, 3],
    ...                     [2, 3, 0]])
    >>> y = DistanceMatrix([[0, 2, 7],
    ...                     [2, 0, 6],
    ...                     [7, 6, 0]])
    >>> z = DistanceMatrix([[0, 5, 6],
    ...                     [5, 0, 1],
    ...                     [6, 1, 0]])

    Run Mantel tests for each pair of distance matrices (there are 3 possible
    pairs):

    >>> pwmantel((x, y, z), labels=('x', 'y', 'z'),
    ...          permutations=0) # doctest: +NORMALIZE_WHITESPACE
                 statistic p-value  n   method  permutations alternative
    dm1 dm2
    x   y     0.755929     N/A  3  pearson             0   two-sided
        z    -0.755929     N/A  3  pearson             0   two-sided
    y   z    -0.142857     N/A  3  pearson             0   two-sided
    <BLANKLINE>
    [3 rows x 6 columns]

    Note that we passed ``permutations=0`` to suppress significance tests; the
    p-values in the output are labelled ``N/A``.

    """
    num_dms = len(dms)

    if num_dms < 2:
        raise ValueError("Must provide at least two distance matrices.")

    if labels is None:
        labels = range(num_dms)
    else:
        if num_dms != len(labels):
            raise ValueError("Number of labels must match the number of "
                             "distance matrices.")
        if len(set(labels)) != len(labels):
            raise ValueError("Labels must be unique.")

    num_combs = scipy.misc.comb(num_dms, 2, exact=True)
    results_dtype = [('dm1', object), ('dm2', object), ('statistic', float),
                     ('p-value', object), ('n', int), ('method', object),
                     ('permutations', int), ('alternative', object)]
    results = np.empty(num_combs, dtype=results_dtype)

    for i, pair in enumerate(combinations(zip(labels, dms), 2)):
        (xlabel, x), (ylabel, y) = pair

        stat, p_val, n = mantel(x, y, method=method, permutations=permutations,
                                alternative=alternative, strict=strict,
                                lookup=lookup)

        p_val_str = p_value_to_str(p_val, permutations)
        results[i] = (xlabel, ylabel, stat, p_val_str, n, method, permutations,
                      alternative)

    return pd.DataFrame.from_records(results, index=('dm1', 'dm2'))