def get_involved_complex_from_counts(multidatas_counts: pd.DataFrame, clusters_names: list,
                                     complex_expanded: pd.DataFrame, complex_composition: pd.DataFrame) -> (
        pd.DataFrame, pd.DataFrame):
    """
    Finds the complexes defined in counts and calculates the counts values
    """
    proteins_in_complexes = complex_composition['protein_multidata_id'].tolist()

    # Remove counts that can't be part of a complex
    multidatas_counts_filtered = multidatas_counts[
        multidatas_counts['id_multidata'].apply(lambda multidata: multidata in proteins_in_complexes)]

    # Find complexes with all components defined in counts
    complex_composition_counts = complex_helper.get_involved_complex_from_protein(multidatas_counts_filtered,
                                                                                  complex_expanded,
                                                                                  complex_composition,
                                                                                  drop_duplicates=False)

    if complex_composition_counts.empty:
        return pd.DataFrame(), pd.DataFrame()

    # Remove counts that are not defined in selected complexes
    multidatas_counts_filtered = filter_counts_by_genes(multidatas_counts_filtered,
                                                        complex_composition_counts['gene'].tolist())

    # Set the counts value a complex count. This is the minimum value of the cell component
    complex_counts = cluster_counts_helper.merge_complex_counts(clusters_names, complex_composition_counts,
                                                                list(complex_expanded.columns.values))

    # Removes empty counts
    complex_counts = cluster_counts_filter.filter_empty_cluster_counts(complex_counts, clusters_names)

    complex_counts.drop(clusters_names, axis=1, inplace=True)

    return complex_counts, multidatas_counts_filtered
    def test_get_involved_complex_from_protein_empty_result(self):
        proteins = pd.read_csv('{}/helper_complex_protein.csv'.format(
            self.FIXTURES_SUBPATH))
        proteins.drop(proteins.index, inplace=True)
        complexes = pd.read_csv('{}/helper_complex_complex.csv'.format(
            self.FIXTURES_SUBPATH))
        complex_composition = pd.read_csv(
            '{}/helper_complex_complex_composition.csv'.format(
                self.FIXTURES_SUBPATH))

        result = complex_helper.get_involved_complex_from_protein(
            proteins, complexes, complex_composition, drop_duplicates=False)

        self.assertTrue(
            dataframe_functions.dataframes_has_same_data(
                result, pd.DataFrame()))
    def test_get_involved_complex_from_protein(self):
        proteins = pd.read_csv('{}/helper_complex_protein.csv'.format(
            self.FIXTURES_SUBPATH))
        complexes = pd.read_csv('{}/helper_complex_complex.csv'.format(
            self.FIXTURES_SUBPATH))
        complex_composition = pd.read_csv(
            '{}/helper_complex_complex_composition.csv'.format(
                self.FIXTURES_SUBPATH))

        result_expected = pd.read_csv('{}/helper_complex_result.csv'.format(
            self.FIXTURES_SUBPATH))

        result = complex_helper.get_involved_complex_from_protein(
            proteins, complexes, complex_composition, drop_duplicates=False)

        self.assertTrue(
            dataframe_functions.dataframes_has_same_data(
                result, result_expected))
def get_complex_involved_in_counts(
        multidatas_counts: pd.DataFrame, clusters_names: list,
        complex_composition: pd.DataFrame,
        complex_expanded: pd.DataFrame) -> pd.DataFrame:
    """
    Gets complexes involved in counts
    """
    core_logger.debug('Finding Complexes')
    complex_counts_composition = complex_helper.get_involved_complex_from_protein(
        multidatas_counts,
        complex_expanded,
        complex_composition,
        drop_duplicates=False)

    complex_counts = merge_complex_counts(
        clusters_names, complex_counts_composition,
        list(complex_expanded.columns.values))
    complex_counts = filter_empty_cluster_counts(complex_counts,
                                                 clusters_names)

    complex_counts.reset_index(drop=True, inplace=True)

    return complex_counts