Example #1
0
def ancom(output_dir: str,
          table: pd.DataFrame,
          metadata: qiime2.CategoricalMetadataColumn,
          transform_function: str = 'clr',
          difference_function: str = None) -> None:

    index_fp = os.path.join(output_dir, 'index.html')

    metadata = metadata.filter_ids(table.index)
    if metadata.has_missing_values():
        missing_data_sids = metadata.get_ids(where_values_missing=True)
        missing_data_sids = ', '.join(sorted(missing_data_sids))
        raise ValueError('Metadata column is missing values for the '
                         'following samples. Values need to be added for '
                         'these samples, or the samples need to be removed '
                         'from the table: %s' % missing_data_sids)

    ancom_results = skbio_ancom(table,
                                metadata.to_series(),
                                significance_test=f_oneway)
    # scikit-bio 0.4.2 returns a single tuple from ancom, and scikit-bio 0.5.0
    # returns two tuples. We want to support both scikit-bio versions, so we
    # tuplize ancom_result to support both. Similarly, the "reject" column
    # was renamed in scikit-bio 0.5.0, so we apply a rename here (which does
    # nothing if a column called "reject" isn't found).
    ancom_results = qiime2.core.util.tuplize(ancom_results)
    ancom_results[0].sort_values(by='W', ascending=False)
    ancom_results[0].rename(columns={'reject': 'Reject null hypothesis'},
                            inplace=True)

    ancom_results[0].to_csv(os.path.join(output_dir, 'ancom.csv'),
                            header=True,
                            index=True)

    html = _volcanoplot(output_dir, table, metadata, ancom_results[0],
                        transform_function, difference_function)

    significant_features = ancom_results[0][ancom_results[0]
                                            ['Reject null hypothesis']]
    significant_features_present = not significant_features.empty
    insignificant_div = ('<div>No significant features identified!</div>')

    with open(index_fp, 'w') as index_f:
        index_f.write('<html>\n')
        if html is not None:
            index_f.write('<head>\n')
            index_f.write(INLINE.render())
            index_f.write('</head>\n')
        index_f.write('<body>\n')
        index_f.write('<h1>ANCOM statistical results</h1>\n')
        index_f.write('<a href="ancom.csv">Download complete table as CSV</a>'
                      '<br>\n')
        if significant_features_present:
            index_f.write(
                q2templates.df_to_html(significant_features['W'].to_frame(),
                                       border=None,
                                       classes=None))
        else:
            index_f.write(insignificant_div)
        if len(ancom_results) == 2:
            ancom_results[1].to_csv(os.path.join(output_dir,
                                                 'percent-abundances.csv'),
                                    header=True,
                                    index=True)
            index_f.write(('<h1>Percentile abundances of features '
                           'by group</h1>\n'))
            index_f.write(('<a href="percent-abundances.csv">'
                           'Download complete table as CSV</a><br>\n'))
            if significant_features_present:
                index_f.write(
                    q2templates.df_to_html(
                        ancom_results[1].loc[significant_features.index],
                        border=None,
                        classes=None))
            else:
                index_f.write(insignificant_div)
        if html is not None:
            index_f.write(html[1])
            index_f.write(html[0])
        else:
            index_f.write('<p>Unable to generate volcano plot, please check '
                          'the ANCOM statistical results (above).</p>\n')
        index_f.write('</body></html>\n')
Example #2
0
def ancom(output_dir: str,
          table: pd.DataFrame,
          metadata: qiime2.CategoricalMetadataColumn,
          transform_function: str = 'clr',
          difference_function: str = None) -> None:
    metadata = metadata.filter_ids(table.index)
    if metadata.has_missing_values():
        missing_data_sids = metadata.get_ids(where_values_missing=True)
        missing_data_sids = ', '.join(sorted(missing_data_sids))
        raise ValueError('Metadata column is missing values for the '
                         'following samples. Values need to be added for '
                         'these samples, or the samples need to be removed '
                         'from the table: %s' % missing_data_sids)
    ancom_results = skbio_ancom(table,
                                metadata.to_series(),
                                significance_test=f_oneway)
    ancom_results[0].sort_values(by='W', ascending=False, inplace=True)
    ancom_results[0].rename(columns={'reject': 'Reject null hypothesis'},
                            inplace=True)
    significant_features = ancom_results[0][ancom_results[0]
                                            ['Reject null hypothesis']]

    context = dict()
    if not significant_features.empty:
        context['significant_features'] = q2templates.df_to_html(
            significant_features['W'].to_frame())
        context['percent_abundances'] = q2templates.df_to_html(
            ancom_results[1].loc[significant_features.index])

    metadata = metadata.to_series()
    cats = list(set(metadata))
    transform_function_name = transform_function
    transform_function = _transform_functions[transform_function]
    transformed_table = table.apply(transform_function,
                                    axis=1,
                                    result_type='broadcast')

    if difference_function is None:
        if len(cats) == 2:
            difference_function = 'mean_difference'
        else:  # len(categories) > 2
            difference_function = 'f_statistic'

    _d_func = _difference_functions[difference_function]

    def diff_func(x):
        args = _d_func(*[x[metadata == c] for c in cats])
        if isinstance(args, tuple):
            return args[0]
        else:
            return args

    # effectively doing a groupby operation wrt to the metadata
    fold_change = transformed_table.apply(diff_func, axis=0)
    if not pd.isnull(fold_change).all():
        pre_filtered_ids = set(fold_change.index)
        with pd.option_context('mode.use_inf_as_na', True):
            fold_change = fold_change.dropna(axis=0)
        filtered_ids = pre_filtered_ids - set(fold_change.index)
        filtered_ancom_results = ancom_results[0].drop(labels=filtered_ids)

        volcano_results = pd.DataFrame({
            transform_function_name: fold_change,
            'W': filtered_ancom_results.W
        })
        volcano_results.index.name = 'id'
        volcano_results.to_csv(os.path.join(output_dir, 'data.tsv'),
                               header=True,
                               index=True,
                               sep='\t')
        volcano_results = volcano_results.reset_index(drop=False)

        spec = {
            '$schema':
            'https://vega.github.io/schema/vega/v4.json',
            'width':
            300,
            'height':
            300,
            'data': [{
                'name': 'values',
                'values': volcano_results.to_dict(orient='records')
            }],
            'scales': [{
                'name': 'xScale',
                'domain': {
                    'data': 'values',
                    'field': transform_function_name
                },
                'range': 'width'
            }, {
                'name': 'yScale',
                'domain': {
                    'data': 'values',
                    'field': 'W'
                },
                'range': 'height'
            }],
            'axes': [{
                'scale': 'xScale',
                'orient': 'bottom',
                'title': transform_function_name
            }, {
                'scale': 'yScale',
                'orient': 'left',
                'title': 'W'
            }],
            'marks': [{
                'type': 'symbol',
                'from': {
                    'data': 'values'
                },
                'encode': {
                    'hover': {
                        'fill': {
                            'value': '#FF0000'
                        },
                        'opacity': {
                            'value': 1
                        }
                    },
                    'enter': {
                        'x': {
                            'scale': 'xScale',
                            'field': transform_function_name
                        },
                        'y': {
                            'scale': 'yScale',
                            'field': 'W'
                        }
                    },
                    'update': {
                        'fill': {
                            'value': 'black'
                        },
                        'opacity': {
                            'value': 0.3
                        },
                        'tooltip': {
                            'signal':
                            "{{'title': datum['id'], '{0}': "
                            "datum['{0}'], 'W': datum['W']}}".format(
                                transform_function_name)
                        }
                    }
                }
            }]
        }
        context['vega_spec'] = json.dumps(spec)
        if filtered_ids:
            context['filtered_ids'] = ', '.join(sorted(filtered_ids))

    copy_tree(os.path.join(TEMPLATES, 'ancom'), output_dir)
    ancom_results[0].to_csv(os.path.join(output_dir, 'ancom.tsv'),
                            header=True,
                            index=True,
                            sep='\t')
    ancom_results[1].to_csv(os.path.join(output_dir, 'percent-abundances.tsv'),
                            header=True,
                            index=True,
                            sep='\t')
    index = os.path.join(TEMPLATES, 'ancom', 'index.html')
    q2templates.render(index, output_dir, context=context)
Example #3
0
def ancom(output_dir: str,
          table: pd.DataFrame,
          metadata: qiime2.CategoricalMetadataColumn,
          transform_function: str = 'clr',
          difference_function: str = None) -> None:
    metadata = metadata.filter_ids(table.index)
    if metadata.has_missing_values():
        missing_data_sids = metadata.get_ids(where_values_missing=True)
        missing_data_sids = ', '.join(sorted(missing_data_sids))
        raise ValueError('Metadata column is missing values for the '
                         'following samples. Values need to be added for '
                         'these samples, or the samples need to be removed '
                         'from the table: %s' % missing_data_sids)
    ancom_results = skbio_ancom(table,
                                metadata.to_series(),
                                significance_test=f_oneway)
    ancom_results[0].sort_values(by='W', ascending=False, inplace=True)
    ancom_results[0].rename(columns={'reject': 'Reject null hypothesis'},
                            inplace=True)
    significant_features = ancom_results[0][
        ancom_results[0]['Reject null hypothesis']]

    context = dict()
    if not significant_features.empty:
        context['significant_features'] = q2templates.df_to_html(
            significant_features['W'].to_frame())
        context['percent_abundances'] = q2templates.df_to_html(
            ancom_results[1].loc[significant_features.index])

    metadata = metadata.to_series()
    cats = list(set(metadata))
    transform_function_name = transform_function
    transform_function = _transform_functions[transform_function]
    transformed_table = table.apply(
        transform_function, axis=1, result_type='broadcast')

    if difference_function is None:
        if len(cats) == 2:
            difference_function = 'mean_difference'
        else:  # len(categories) > 2
            difference_function = 'f_statistic'

    _d_func = _difference_functions[difference_function]

    def diff_func(x):
        args = _d_func(*[x[metadata == c] for c in cats])
        if isinstance(args, tuple):
            return args[0]
        else:
            return args
    # effectively doing a groupby operation wrt to the metadata
    fold_change = transformed_table.apply(diff_func, axis=0)
    if not pd.isnull(fold_change).all():
        volcano_results = pd.DataFrame({transform_function_name: fold_change,
                                        'W': ancom_results[0].W})
        volcano_results = volcano_results.reset_index(drop=False)

        spec = {
            '$schema': 'https://vega.github.io/schema/vega/v4.json',
            'width': 300,
            'height': 300,
            'data': [
                {'name': 'values',
                 'values': volcano_results.to_dict(orient='records')}],
            'scales': [
                {'name': 'xScale',
                 'domain': {'data': 'values',
                            'field': transform_function_name},
                 'range': 'width'},
                {'name': 'yScale',
                 'domain': {'data': 'values', 'field': 'W'},
                 'range': 'height'}],
            'axes': [
                {'scale': 'xScale', 'orient': 'bottom',
                 'title': transform_function_name},
                {'scale': 'yScale', 'orient': 'left', 'title': 'W'}],
            'marks': [
              {'type': 'symbol',
               'from': {'data': 'values'},
               'encode': {
                   'hover': {
                       'fill': {'value': '#FF0000'},
                       'opacity': {'value': 1}},
                   'enter': {
                       'x': {'scale': 'xScale',
                             'field': transform_function_name},
                       'y': {'scale': 'yScale', 'field': 'W'}},
                   'update': {
                       'fill': {'value': 'black'},
                       'opacity': {'value': 0.3},
                       'tooltip': {
                           'signal': "{{'title': datum['index'], '{0}': "
                                     "datum['{0}'], 'W': datum['W']}}".format(
                                         transform_function_name)}}}}]}
        context['vega_spec'] = json.dumps(spec)

    copy_tree(os.path.join(TEMPLATES, 'ancom'), output_dir)
    ancom_results[0].to_csv(os.path.join(output_dir, 'ancom.tsv'),
                            header=True, index=True, sep='\t')
    ancom_results[1].to_csv(os.path.join(output_dir,
                                         'percent-abundances.tsv'),
                            header=True, index=True, sep='\t')
    index = os.path.join(TEMPLATES, 'ancom', 'index.html')
    q2templates.render(index, output_dir, context=context)