Python MetadataCategory.replace Examples

Programming Language: Python

Namespace/Package Name: qiime2

Class/Type: MetadataCategory

Method/Function: replace

Examples at hotexamples.com: 4

Python MetadataCategory.replace - 4 examples found. These are the top rated real world Python examples of qiime2.MetadataCategory.replace extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

to_series(12)

MetadataCategory(10)

replace(2)

groupby(1)

Example #1

Show file

def beta_group_significance(output_dir: str,
                            distance_matrix: skbio.DistanceMatrix,
                            metadata: qiime2.MetadataCategory,
                            method: str='permanova',
                            permutations: int=999) -> None:
    try:
        beta_group_significance_fn = _beta_group_significance_fns[method]
    except KeyError:
        raise ValueError('Unknown group significance method %s. The available '
                         'options are %s.' %
                         (method,
                          ', '.join(_beta_group_significance_fns)))

    # Cast metadata to numeric (if applicable), which gives better sorting
    # in boxplots. Then filter any samples that are not in the distance matrix,
    # and drop samples with have no data for this metadata
    # category, including those with empty strings as values.
    metadata = pd.to_numeric(metadata.to_series(), errors='ignore')
    metadata = metadata.loc[list(distance_matrix.ids)]
    metadata = metadata.replace(r'', numpy.nan).dropna()

    # filter the distance matrix to exclude samples that were dropped from
    # the metadata, and keep track of how many samples survived the filtering
    # so that information can be presented to the user.
    initial_dm_length = distance_matrix.shape[0]
    distance_matrix = distance_matrix.filter(metadata.index)
    filtered_dm_length = distance_matrix.shape[0]

    # Run the significance test
    result = beta_group_significance_fn(distance_matrix, metadata,
                                        permutations=permutations)

    # Generate distance boxplots
    sns.set_style("white")
    # Identify the groups, then compute the within group distances and the
    # between group distances, and generate one boxplot per group.
    # groups will be an OrderedDict mapping group id to the sample ids in that
    # group. The order is used both on the x-axis, and in the layout of the
    # boxplots in the visualization.
    groupings = collections.OrderedDict(
        [(id, list(series.index))
         for id, series in sorted(metadata.groupby(metadata))])

    for group_id in groupings:
        group_distances, x_ticklabels = \
            _get_distance_boxplot_data(distance_matrix, group_id, groupings)

        ax = sns.boxplot(data=group_distances, flierprops={
            'marker': 'o', 'markeredgecolor': 'black', 'markeredgewidth': 0.5,
            'alpha': 0.5})
        ax.set_xticklabels(x_ticklabels, rotation=90)
        ax.set_xlabel('Group')
        ax.set_ylabel('Distance')
        ax.set_title('Distances to %s' % group_id)
        # change the color of the boxes to white
        for box in ax.artists:
            box.set_facecolor('white')
        sns.despine()
        plt.tight_layout()
        fig = ax.get_figure()
        fig.savefig(os.path.join(output_dir, '%s-boxplots.png' %
                                 urllib.parse.quote_plus(str(group_id))))
        fig.savefig(os.path.join(output_dir, '%s-boxplots.pdf' %
                                 urllib.parse.quote_plus(str(group_id))))
        fig.clear()

    result = result.to_frame().to_html(classes="table table-striped "
                                       "table-hover")
    result = result.replace('border="1"', 'border="0"')
    index = os.path.join(
        TEMPLATES, 'beta_group_significance_assets', 'index.html')
    q2templates.render(index, output_dir, context={
        'initial_dm_length': initial_dm_length,
        'filtered_dm_length': filtered_dm_length,
        'method': method,
        'groupings': groupings,
        'result': result
    })

Example #2

Show file

File: _visualizer.py Project: jairideout/diversity

def beta_correlation(output_dir: str,
                     distance_matrix: skbio.DistanceMatrix,
                     metadata: qiime2.MetadataCategory,
                     method: str='spearman',
                     permutations: int=999) -> None:
    test_statistics = {'spearman': 'rho', 'pearson': 'r'}
    alt_hypothesis = 'two-sided'
    try:
        metadata = pd.to_numeric(metadata.to_series(), errors='raise')
    except ValueError as e:
        raise ValueError('Only numeric data can be used with the Mantel test. '
                         'Non-numeric data was encountered in the sample '
                         'metadata. Orignal error message follows:\n%s' %
                         str(e))

    initial_metadata_length = len(metadata)
    metadata = metadata.loc[list(distance_matrix.ids)]
    metadata = metadata.replace(r'', numpy.nan).dropna()
    filtered_metadata_length = len(metadata)

    ids_with_missing_metadata = set(distance_matrix.ids) - set(metadata.index)
    if len(ids_with_missing_metadata) > 0:
        raise ValueError('All samples in distance matrix must be present '
                         'and contain data in the sample metadata. The '
                         'following samples were present in the distance '
                         'matrix, but were missing from the sample metadata '
                         'or had no data: %s' %
                         ', '.join(ids_with_missing_metadata))

    metadata_distances = _metadata_distance(metadata)
    r, p, n = skbio.stats.distance.mantel(
        distance_matrix, metadata_distances, method=method,
        permutations=permutations, alternative=alt_hypothesis, strict=True)

    result = pd.Series([method.title(), n, permutations, alt_hypothesis,
                        metadata.name, r, p],
                       index=['Method', 'Sample size', 'Permutations',
                              'Alternative hypothesis', 'Metadata category',
                              '%s %s' % (method.title(),
                                         test_statistics[method]),
                              'p-value'],
                       name='Mantel test results')
    result_html = result.to_frame().to_html(classes=("table table-striped "
                                                     "table-hover"))
    result_html = result_html.replace('border="1"', 'border="0"')

    scatter_data = []
    for id1, id2 in itertools.combinations(distance_matrix.ids, 2):
        scatter_data.append((distance_matrix[id1, id2],
                             metadata_distances[id1, id2]))
    x = 'Input distance'
    y = 'Euclidean distance of\n%s' % metadata.name
    scatter_data = pd.DataFrame(scatter_data, columns=[x, y])
    fig = sns.regplot(x=x, y=y, data=scatter_data, fit_reg=False).get_figure()
    fig.savefig(os.path.join(output_dir, 'beta-correlation-scatter.png'))
    fig.savefig(os.path.join(output_dir, 'beta-correlation-scatter.pdf'))

    index = os.path.join(
        TEMPLATES, 'beta_correlation_assets', 'index.html')
    q2templates.render(index, output_dir, context={
        'initial_metadata_length': initial_metadata_length,
        'filtered_metadata_length': filtered_metadata_length,
        'result': result_html
    })

Example #3

Show file

def beta_correlation(output_dir: str,
                     distance_matrix: skbio.DistanceMatrix,
                     metadata: qiime2.MetadataCategory,
                     method: str = 'spearman',
                     permutations: int = 999) -> None:
    test_statistics = {'spearman': 'rho', 'pearson': 'r'}
    alt_hypothesis = 'two-sided'
    try:
        metadata = pd.to_numeric(metadata.to_series(), errors='raise')
    except ValueError as e:
        raise ValueError('Only numeric data can be used with the Mantel test. '
                         'Non-numeric data was encountered in the sample '
                         'metadata. Orignal error message follows:\n%s' %
                         str(e))

    initial_metadata_length = len(metadata)
    metadata = metadata.loc[list(distance_matrix.ids)]
    metadata = metadata.replace(r'', numpy.nan).dropna()
    filtered_metadata_length = len(metadata)

    ids_with_missing_metadata = set(distance_matrix.ids) - set(metadata.index)
    if len(ids_with_missing_metadata) > 0:
        raise ValueError('All samples in distance matrix must be present '
                         'and contain data in the sample metadata. The '
                         'following samples were present in the distance '
                         'matrix, but were missing from the sample metadata '
                         'or had no data: %s' %
                         ', '.join(ids_with_missing_metadata))

    metadata_distances = _metadata_distance(metadata)
    r, p, n = skbio.stats.distance.mantel(distance_matrix,
                                          metadata_distances,
                                          method=method,
                                          permutations=permutations,
                                          alternative=alt_hypothesis,
                                          strict=True)

    result = pd.Series(
        [method.title(), n, permutations, alt_hypothesis, metadata.name, r, p],
        index=[
            'Method', 'Sample size', 'Permutations', 'Alternative hypothesis',
            'Metadata category',
            '%s %s' % (method.title(), test_statistics[method]), 'p-value'
        ],
        name='Mantel test results')
    result_html = result.to_frame().to_html(classes=("table table-striped "
                                                     "table-hover"))
    result_html = result_html.replace('border="1"', 'border="0"')

    scatter_data = []
    for id1, id2 in itertools.combinations(distance_matrix.ids, 2):
        scatter_data.append(
            (distance_matrix[id1, id2], metadata_distances[id1, id2]))
    x = 'Input distance'
    y = 'Euclidean distance of\n%s' % metadata.name
    plt.figure()
    scatter_data = pd.DataFrame(scatter_data, columns=[x, y])
    sns.regplot(x=x, y=y, data=scatter_data, fit_reg=False)
    plt.savefig(os.path.join(output_dir, 'beta-correlation-scatter.png'))
    plt.savefig(os.path.join(output_dir, 'beta-correlation-scatter.pdf'))

    index = os.path.join(TEMPLATES, 'beta_correlation_assets', 'index.html')
    q2templates.render(index,
                       output_dir,
                       context={
                           'initial_metadata_length': initial_metadata_length,
                           'filtered_metadata_length':
                           filtered_metadata_length,
                           'result': result_html
                       })

Example #4

Show file

File: _visualizer.py Project: jairideout/diversity

def beta_group_significance(output_dir: str,
                            distance_matrix: skbio.DistanceMatrix,
                            metadata: qiime2.MetadataCategory,
                            method: str='permanova',
                            pairwise: bool=False,
                            permutations: int=999) -> None:
    try:
        beta_group_significance_fn = _beta_group_significance_fns[method]
    except KeyError:
        raise ValueError('Unknown group significance method %s. The available '
                         'options are %s.' %
                         (method,
                          ', '.join(_beta_group_significance_fns)))

    # Cast metadata to numeric (if applicable), which gives better sorting
    # in boxplots. Then filter any samples that are not in the distance matrix,
    # and drop samples with have no data for this metadata
    # category, including those with empty strings as values.
    metadata = pd.to_numeric(metadata.to_series(), errors='ignore')
    metadata = metadata.loc[list(distance_matrix.ids)]
    metadata = metadata.replace(r'', numpy.nan).dropna()

    # filter the distance matrix to exclude samples that were dropped from
    # the metadata, and keep track of how many samples survived the filtering
    # so that information can be presented to the user.
    initial_dm_length = distance_matrix.shape[0]
    distance_matrix = distance_matrix.filter(metadata.index)
    filtered_dm_length = distance_matrix.shape[0]

    # Run the significance test
    result = beta_group_significance_fn(distance_matrix, metadata,
                                        permutations=permutations)

    # Generate distance boxplots
    sns.set_style("white")
    # Identify the groups, then compute the within group distances and the
    # between group distances, and generate one boxplot per group.
    # groups will be an OrderedDict mapping group id to the sample ids in that
    # group. The order is used both on the x-axis, and in the layout of the
    # boxplots in the visualization.
    groupings = collections.OrderedDict(
        [(id, list(series.index))
         for id, series in sorted(metadata.groupby(metadata))])

    for group_id in groupings:
        group_distances, x_ticklabels = \
            _get_distance_boxplot_data(distance_matrix, group_id, groupings)

        ax = sns.boxplot(data=group_distances, flierprops={
            'marker': 'o', 'markeredgecolor': 'black', 'markeredgewidth': 0.5,
            'alpha': 0.5})
        ax.set_xticklabels(x_ticklabels, rotation=90)
        ax.set_xlabel('Group')
        ax.set_ylabel('Distance')
        ax.set_title('Distances to %s' % group_id)
        # change the color of the boxes to white
        for box in ax.artists:
            box.set_facecolor('white')
        sns.despine()
        plt.tight_layout()
        fig = ax.get_figure()
        fig.savefig(os.path.join(output_dir, '%s-boxplots.png' %
                                 urllib.parse.quote_plus(str(group_id))))
        fig.savefig(os.path.join(output_dir, '%s-boxplots.pdf' %
                                 urllib.parse.quote_plus(str(group_id))))
        fig.clear()

    result_html = result.to_frame().to_html(classes=("table table-striped "
                                                     "table-hover"))
    result_html = result_html.replace('border="1"', 'border="0"')

    if pairwise:
        pairwise_results = []
        for group1_id, group2_id in itertools.combinations(groupings, 2):
            pairwise_result = \
                _get_pairwise_group_significance_stats(
                    distance_matrix=distance_matrix,
                    group1_id=group1_id,
                    group2_id=group2_id,
                    groupings=groupings,
                    metadata=metadata,
                    beta_group_significance_fn=beta_group_significance_fn,
                    permutations=permutations)
            pairwise_results.append([group1_id,
                                     group2_id,
                                     pairwise_result['sample size'],
                                     permutations,
                                     pairwise_result['test statistic'],
                                     pairwise_result['p-value']])
        columns = ['Group 1', 'Group 2', 'Sample size', 'Permutations',
                   result['test statistic name'], 'p-value']
        pairwise_results = pd.DataFrame(pairwise_results, columns=columns)
        pairwise_results.set_index(['Group 1', 'Group 2'], inplace=True)
        pairwise_results['q-value'] = multipletests(
            pairwise_results['p-value'], method='fdr_bh')[1]
        pairwise_results.sort_index(inplace=True)
        pairwise_path = os.path.join(
            output_dir, '%s-pairwise.csv' % method)
        pairwise_results.to_csv(pairwise_path)

        pairwise_results_html = pairwise_results.to_html(
            classes=("table table-striped table-hover"))
        pairwise_results_html = pairwise_results_html.replace(
            'border="1"', 'border="0"')
    else:
        pairwise_results_html = None

    index = os.path.join(
        TEMPLATES, 'beta_group_significance_assets', 'index.html')
    q2templates.render(index, output_dir, context={
        'initial_dm_length': initial_dm_length,
        'filtered_dm_length': filtered_dm_length,
        'method': method,
        'groupings': groupings,
        'result': result_html,
        'pairwise_results': pairwise_results_html
    })