Example #1
0
def build_results(
        interactions: pd.DataFrame, mean_analysis: pd.DataFrame,
        percent_analysis: pd.DataFrame, clusters_means: dict,
        result_precision: int) -> (pd.DataFrame, pd.DataFrame, pd.DataFrame):
    core_logger.info('Building Simple results')
    interacting_pair = cpdb_statistical_analysis_helper.interacting_pair_build(
        interactions)

    interactions_data_result = pd.DataFrame(interactions[[
        'id_cp_interaction', 'name_1', 'name_2', 'ensembl_1', 'ensembl_2',
        'source'
    ]].copy())

    interactions_data_result = pd.concat(
        [interacting_pair, interactions_data_result], axis=1, sort=False)

    interactions_data_result['secreted'] = (interactions['secretion_1']
                                            | interactions['secretion_2'])
    interactions_data_result['is_integrin'] = (
        interactions['integrin_interaction_1']
        | interactions['integrin_interaction_2'])

    interactions_data_result.rename(columns={
        'name_1': 'partner_a',
        'name_2': 'partner_b',
        'ensembl_1': 'ensembl_a',
        'ensembl_2': 'ensembl_b'
    },
                                    inplace=True)

    interactions_data_result['partner_a'] = interactions_data_result[
        'partner_a'].apply(lambda name: 'simple:{}'.format(name))
    interactions_data_result['partner_b'] = interactions_data_result[
        'partner_b'].apply(lambda name: 'simple:{}'.format(name))

    significant_mean_rank, significant_means = cpdb_analysis_helper.build_significant_means(
        mean_analysis, percent_analysis)
    significant_means = significant_means.round(result_precision)

    mean_analysis = mean_analysis.round(result_precision)
    for key, cluster_means in clusters_means.items():
        clusters_means[key] = cluster_means.round(result_precision)

    # Document 2
    means_result = pd.concat([interactions_data_result, mean_analysis],
                             axis=1,
                             join='inner',
                             sort=False)

    # Document 3

    significant_means_result = pd.concat(
        [interactions_data_result, significant_mean_rank, significant_means],
        axis=1,
        join='inner',
        sort=False)

    # Document 5
    deconvoluted_result = deconvoluted_result_build(clusters_means,
                                                    interactions)

    return means_result, significant_means_result, deconvoluted_result
def build_results(interactions: pd.DataFrame,
                  mean_analysis: pd.DataFrame,
                  percent_analysis: pd.DataFrame,
                  clusters_means: dict,
                  result_precision: int,
                  counts_data: str) -> (pd.DataFrame, pd.DataFrame, pd.DataFrame):
    core_logger.info('Building Simple results')
    interacting_pair = cpdb_statistical_analysis_helper.interacting_pair_build(interactions)

    gene_columns = ['{}_{}'.format(counts_data, suffix) for suffix in ('1', '2')]
    gene_renames = {column: 'gene_{}'.format(suffix) for column, suffix in zip(gene_columns, ['a', 'b'])}

    interactions_data_result = pd.DataFrame(
        interactions[
            ['id_cp_interaction', 'name_1', 'name_2', 'receptor_1', 'receptor_2', *gene_columns,
             'annotation_strategy']].copy())

    interactions_data_result = pd.concat([interacting_pair, interactions_data_result], axis=1, sort=False)

    interactions_data_result['secreted'] = (interactions['secreted_1'] | interactions['secreted_2'])
    interactions_data_result['is_integrin'] = (
            interactions['integrin_1'] | interactions['integrin_2'])
    interactions_data_result['receptor'] = (
            interactions['receptor_1'] | interactions['receptor_2'])

    interactions_data_result.rename(
        columns={'name_1': 'partner_a', 'name_2': 'partner_b', 'receptor_1': 'receptor_a', 'receptor_2': 'receptor_b',
                 **gene_renames},
        inplace=True)

    interactions_data_result['partner_a'] = interactions_data_result['partner_a'].apply(
        lambda name: 'simple:{}'.format(name))
    interactions_data_result['partner_b'] = interactions_data_result['partner_b'].apply(
        lambda name: 'simple:{}'.format(name))

    # Dedupe rows and filter only desired columns
    interactions_data_result.drop_duplicates(inplace=True)

    means_columns = ['id_cp_interaction', 'interacting_pair', 'partner_a', 'partner_b', 'gene_a', 'gene_b', 'secreted',
                     'receptor_a', 'receptor_b', 'annotation_strategy', 'is_integrin']

    interactions_data_result = interactions_data_result[means_columns]

    significant_mean_rank, significant_means = cpdb_analysis_helper.build_significant_means(mean_analysis,
                                                                                            percent_analysis)
    significant_means = significant_means.round(result_precision)

    mean_analysis = mean_analysis.round(result_precision)
    for key, cluster_means in clusters_means.items():
        clusters_means[key] = cluster_means.round(result_precision)

    # Document 2
    means_result = pd.concat([interactions_data_result, mean_analysis], axis=1, join='inner', sort=False)

    # Document 3

    significant_means_result = pd.concat([interactions_data_result, significant_mean_rank, significant_means], axis=1,
                                         join='inner', sort=False)

    # Document 5
    deconvoluted_result = deconvoluted_result_build(clusters_means, interactions, counts_data)

    return means_result, significant_means_result, deconvoluted_result
def build_results(
        interactions: pd.DataFrame, mean_analysis: pd.DataFrame,
        percent_analysis: pd.DataFrame, clusters_means: dict,
        complex_compositions: pd.DataFrame, counts: pd.DataFrame,
        genes: pd.DataFrame, result_precision: int,
        counts_data: str) -> (pd.DataFrame, pd.DataFrame, pd.DataFrame):
    """
    Sets the results data structure from method generated data. Results documents are defined by specs.
    """
    core_logger.info('Building Complex results')
    interacting_pair = cpdb_statistical_analysis_helper.interacting_pair_build(
        interactions)

    interactions = interactions.copy()

    def simple_complex_indicator(interaction: pd.Series, suffix: str) -> str:
        """
        Add simple/complex prefixes to interaction components
        """
        if interaction['is_complex{}'.format(suffix)]:
            return 'complex:{}'.format(interaction['name{}'.format(suffix)])

        return 'simple:{}'.format(interaction['name{}'.format(suffix)])

    interactions['partner_a'] = interactions.apply(
        lambda interaction: simple_complex_indicator(interaction, '_1'),
        axis=1)
    interactions['partner_b'] = interactions.apply(
        lambda interaction: simple_complex_indicator(interaction, '_2'),
        axis=1)

    significant_mean_rank, significant_means = cpdb_analysis_helper.build_significant_means(
        mean_analysis, percent_analysis)
    significant_means = significant_means.round(result_precision)

    gene_columns = [
        '{}_{}'.format(counts_data, suffix) for suffix in ('1', '2')
    ]
    gene_renames = {
        column: 'gene_{}'.format(suffix)
        for column, suffix in zip(gene_columns, ['a', 'b'])
    }

    # Remove useless columns
    interactions_data_result = pd.DataFrame(interactions[[
        'id_cp_interaction', 'partner_a', 'partner_b', 'receptor_1',
        'receptor_2', *gene_columns, 'annotation_strategy'
    ]].copy())

    interactions_data_result = pd.concat(
        [interacting_pair, interactions_data_result], axis=1, sort=False)

    interactions_data_result['secreted'] = (interactions['secreted_1']
                                            | interactions['secreted_2'])
    interactions_data_result['is_integrin'] = (interactions['integrin_1']
                                               | interactions['integrin_2'])

    interactions_data_result.rename(columns={
        **gene_renames, 'receptor_1': 'receptor_a',
        'receptor_2': 'receptor_b'
    },
                                    inplace=True)

    # Dedupe rows and filter only desired columns
    interactions_data_result.drop_duplicates(inplace=True)

    means_columns = [
        'id_cp_interaction', 'interacting_pair', 'partner_a', 'partner_b',
        'gene_a', 'gene_b', 'secreted', 'receptor_a', 'receptor_b',
        'annotation_strategy', 'is_integrin'
    ]

    interactions_data_result = interactions_data_result[means_columns]

    mean_analysis = mean_analysis.round(result_precision)

    # Round result decimals
    for key, cluster_means in clusters_means.items():
        clusters_means[key] = cluster_means.round(result_precision)

    # Document 2
    means_result = pd.concat([interactions_data_result, mean_analysis],
                             axis=1,
                             join='inner',
                             sort=False)

    # Document 3
    significant_means_result = pd.concat(
        [interactions_data_result, significant_mean_rank, significant_means],
        axis=1,
        join='inner',
        sort=False)

    # Document 5
    deconvoluted_result = deconvoluted_complex_result_build(
        clusters_means, interactions, complex_compositions, counts, genes,
        counts_data)

    return means_result, significant_means_result, deconvoluted_result