def filter_all(input_path, result_path):
    interactions = pd.read_csv(
        os.path.join(input_path, 'interaction_input.csv'))
    complexes = pd.read_csv(os.path.join(input_path, 'complex_generated.csv'))
    proteins = pd.read_csv(os.path.join(input_path, 'protein_generated.csv'))
    genes = pd.read_csv(os.path.join(input_path, 'gene_generated.csv'))
    output_path = _set_paths(output_dir, result_path)

    interacting_partners = pd.concat(
        [interactions['partner_a'],
         interactions['partner_b']]).drop_duplicates()

    filtered_complexes = _filter_complexes(complexes, interacting_partners)
    write_to_file(filtered_complexes,
                  'complex_input.csv',
                  output_path=output_path)

    filtered_proteins, interacting_proteins = _filter_proteins(
        proteins, filtered_complexes, interacting_partners)
    write_to_file(filtered_proteins,
                  'protein_input.csv',
                  output_path=output_path)

    filtered_genes = _filter_genes(genes, filtered_proteins['uniprot'])
    write_to_file(filtered_genes, 'gene_input.csv', output_path=output_path)

    rejected_members = interacting_partners[~(
        interacting_partners.isin(filtered_complexes['complex_name'])
        | interacting_partners.isin(filtered_proteins['uniprot']))]

    if len(rejected_members):
        app_logger.warning(
            'There are some proteins or complexes not interacting properly: `{}`'
            .format(', '.join(rejected_members)))
Esempio n. 2
0
    def test_duplicated_gene_ensembl_is_not_in_interaction(self):
        all_genes = cellphonedb_app.cellphonedb.database_manager.get_repository(
            'gene').get_all_expanded()
        all_interactions = cellphonedb_app.cellphonedb.database_manager.get_repository(
            'interaction').get_all()

        genes_duplicated_ensembl = all_genes[all_genes.duplicated('ensembl',
                                                                  keep=False)]

        all_interactions_multidata_ids = all_interactions[
            'multidata_1_id'].tolist(
            ) + all_interactions['multidata_2_id'].tolist()

        duplicated_gene_ensembls_in_interactions = genes_duplicated_ensembl[
            genes_duplicated_ensembl['id_multidata'].apply(
                lambda id: id in all_interactions_multidata_ids)]

        if not duplicated_gene_ensembls_in_interactions.empty:
            app_logger.warning(
                'Some duplicated ensembls apears in interactions')
            app_logger.warning(
                duplicated_gene_ensembls_in_interactions.to_csv(index=False))

        self.assertTrue(duplicated_gene_ensembls_in_interactions.empty,
                        'Some duplicated ensembl gene apears in interactions')
Esempio n. 3
0
    def test_interaction(self):

        interaction_df = cellphonedb_app.cellphonedb.database_manager.get_repository(
            'interaction').get_all_expanded()

        data_not_match = False

        for interaction in interaction_entries:
            db_interaction = interaction_df
            non_match_properties = []
            for column_name in interaction:
                if interaction[column_name] == None:
                    db_interaction = db_interaction[pd.isnull(
                        db_interaction[column_name])]
                else:
                    db_interaction = db_interaction[db_interaction[column_name]
                                                    ==
                                                    interaction[column_name]]

                if len(db_interaction) < 1:
                    non_match_properties.append(column_name)
            if (len(db_interaction) < 1):
                app_logger.warning('Failed cheking Interaction:')
                app_logger.warning('Expected data:')
                app_logger.warning(interaction)
                app_logger.warning('Non Match properties')
                app_logger.warning(non_match_properties)
                data_not_match = True

        self.assertFalse(data_not_match, 'Some Interactions doesnt match')
Esempio n. 4
0
def write_to_file(df: pd.DataFrame,
                  filename: str,
                  output_path: str,
                  output_format: Optional[str] = None):
    _, file_extension = os.path.splitext(filename)

    if output_format is None:
        if not file_extension:
            default_format = 'txt'
            default_extension = '.{}'.format(default_format)

            separator = _get_separator(default_extension)
            filename = '{}{}'.format(filename, default_extension)
        else:
            separator = _get_separator(file_extension)
    else:
        selected_extension = '.{}'.format(output_format)

        if file_extension != selected_extension:
            separator = _get_separator(selected_extension)
            filename = '{}{}'.format(filename, selected_extension)

            if file_extension:
                app_logger.warning(
                    'Selected extension missmatches output filename ({}, {}): It will be added => {}'
                    .format(selected_extension, file_extension, filename))
        else:
            separator = _get_separator(selected_extension)

    df.to_csv('{}/{}'.format(output_path, filename),
              sep=separator,
              index=False)
def analysis(meta_filename: str,
             counts_filename: str,
             counts_data: str,
             project_name: str,
             threshold: float,
             result_precision: int,
             output_path: str,
             output_format: str,
             means_result_name: str,
             significant_means_result_name: str,
             deconvoluted_result_name: str,
             verbose: bool,
             database: Optional[str],
             subsampling: bool,
             subsampling_log: bool,
             subsampling_num_pc: int,
             subsampling_num_cells: Optional[int]
             ):
    try:

        subsampler = Subsampler(subsampling_log,
                                subsampling_num_pc,
                                subsampling_num_cells,
                                verbose) if subsampling else None

        LocalMethodLauncher(cpdb_app.create_app(verbose,
                                                database)).cpdb_analysis_local_method_launcher(meta_filename,
                                                                                               counts_filename,
                                                                                               counts_data,
                                                                                               project_name,
                                                                                               threshold,
                                                                                               output_path,
                                                                                               output_format,
                                                                                               means_result_name,
                                                                                               significant_means_result_name,
                                                                                               deconvoluted_result_name,
                                                                                               result_precision,
                                                                                               subsampler,
                                                                                               )
    except (ReadFileException, ParseMetaException, ParseCountsException, ThresholdValueException,
            AllCountsFilteredException) as e:
        app_logger.error(str(e) +
                         (':' if (hasattr(e, 'description') and e.description) or (
                                 hasattr(e, 'hint') and e.hint) else '') +
                         (' {}.'.format(e.description) if hasattr(e, 'description') and e.description else '') +
                         (' {}.'.format(e.hint) if hasattr(e, 'hint') and e.hint else '')
                         )

    except EmptyResultException as e:
        app_logger.warning(str(e) +
                           (':' if (hasattr(e, 'description') and e.description) or (
                                   hasattr(e, 'hint') and e.hint) else '') +
                           (' {}.'.format(e.description) if hasattr(e, 'description') and e.description else '') +
                           (' {}.'.format(e.hint) if hasattr(e, 'hint') and e.hint else '')
                           )
    except:
        app_logger.error('Unexpected error')

        if verbose:
            traceback.print_exc(file=sys.stdout)
def analysis_scanpy(adata,
                    var_names,
                    obs_names,
                    obs_key,
                    var_key=None,
                    gene_id_format=None,
                    project_name='',
                    threshold=0.1,
                    result_precision='3',
                    output_path='',
                    output_format='csv',
                    means_result_name='means',
                    significant_means_result_name='significant_means',
                    deconvoluted_result_name='deconvoluted',
                    verbose=True,
                    database='latest',
                    subsampling=False,
                    subsampling_log=True,
                    subsampling_num_pc=100,
                    subsampling_num_cells=None,
                    write=False,
                    add_to_uns=True):
    try:

        subsampler = Subsampler(subsampling_log, subsampling_num_pc,
                                subsampling_num_cells,
                                verbose) if subsampling else None

        out = LocalMethodLauncher(cpdb_app.create_app(
            verbose, database)).cpdb_analysis_local_method_launcher_scanpy(
                adata, var_names, obs_names, obs_key, var_key, gene_id_format,
                project_name, threshold, output_path, output_format,
                means_result_name, significant_means_result_name,
                deconvoluted_result_name, result_precision, subsampler, write,
                add_to_uns)
        return out
    except (ReadFileException, ParseMetaException, ParseCountsException,
            ThresholdValueException, AllCountsFilteredException) as e:
        app_logger.error(
            str(e) + (':' if (hasattr(e, 'description') and e.description) or
                      (hasattr(e, 'hint') and e.hint) else '') +
            (' {}.'.format(e.description)
             if hasattr(e, 'description') and e.description else '') +
            (' {}.'.format(e.hint) if hasattr(e, 'hint') and e.hint else ''))

    except EmptyResultException as e:
        app_logger.warning(
            str(e) + (':' if (hasattr(e, 'description') and e.description) or
                      (hasattr(e, 'hint') and e.hint) else '') +
            (' {}.'.format(e.description)
             if hasattr(e, 'description') and e.description else '') +
            (' {}.'.format(e.hint) if hasattr(e, 'hint') and e.hint else ''))
    except:
        app_logger.error('Unexpected error')

        if verbose:
            traceback.print_exc(file=sys.stdout)
Esempio n. 7
0
 def _set_paths(output_path, project_name):
     if not output_path:
         output_path = output_dir
     if project_name:
         output_path = os.path.realpath(os.path.expanduser('{}/{}'.format(output_path, project_name)))
     os.makedirs(output_path, exist_ok=True)
     if LocalMethodLauncher._path_is_empty(output_path):
         app_logger.warning(
             'Output directory ({}) exist and is not empty. Result can overwrite old results'.format(output_path))
     return output_path
Esempio n. 8
0
def statistical_analysis(meta_filename: str,
                         counts_filename: str,
                         project_name: str,
                         iterations: int,
                         threshold: float,
                         result_precision: int,
                         output_path: str,
                         means_result_name: str,
                         pvalues_result_name: str,
                         significant_mean_result_name: str,
                         means_pvalues_result_name: str,
                         deconvoluted_result_name: str,
                         debug_seed: int,
                         threads: int,
                         verbose: bool,
                         ) -> None:
    try:
        LocalMethodLauncher(cpdb_app.create_app(verbose)). \
            cpdb_statistical_analysis_local_method_launcher(meta_filename,
                                                            counts_filename,
                                                            project_name,
                                                            iterations,
                                                            threshold,
                                                            output_path,
                                                            means_result_name,
                                                            pvalues_result_name,
                                                            significant_mean_result_name,
                                                            means_pvalues_result_name,
                                                            deconvoluted_result_name,
                                                            debug_seed,
                                                            threads,
                                                            result_precision
                                                            )
    except (ReadFileException, ParseMetaException, ParseCountsException, ThresholdValueException,
            AllCountsFilteredException) as e:
        app_logger.error(str(e) +
                         (':' if (hasattr(e, 'description') and e.description) or (
                                 hasattr(e, 'hint') and e.hint) else '') +
                         (' {}.'.format(e.description) if hasattr(e, 'description') and e.description else '') +
                         (' {}.'.format(e.hint) if hasattr(e, 'hint') and e.hint else '')
                         )

    except EmptyResultException as e:
        app_logger.warning(str(e) +
                           (':' if (hasattr(e, 'description') and e.description) or (
                                   hasattr(e, 'hint') and e.hint) else '') +
                           (' {}.'.format(e.description) if hasattr(e, 'description') and e.description else '') +
                           (' {}.'.format(e.hint) if hasattr(e, 'hint') and e.hint else '')
                           )
    except:
        app_logger.error('Unexpected error')
        if (verbose):
            traceback.print_exc(file=sys.stdout)
def _set_paths(output_path, subfolder):
    if not output_path:
        output_path = output_dir

    if subfolder:
        output_path = os.path.realpath(os.path.expanduser('{}/{}'.format(output_path, subfolder)))

    os.makedirs(output_path, exist_ok=True)

    if _path_is_not_empty(output_path):
        app_logger.warning(
            'Output directory ({}) exist and is not empty. Result can overwrite old results'.format(output_path))

    return output_path
Esempio n. 10
0
    def test_complex(self):

        dataframe = cellphonedb_app.cellphonedb.database_manager.get_repository(
            'complex').get_all_expanded()

        data_not_match = False

        for complex in complex_entries:
            db_complex = dataframe[dataframe['name'] == complex['data']
                                   ['name']]

            for complex_data in complex['data']:
                if db_complex[complex_data].iloc[0] != complex['data'][
                        complex_data]:
                    app_logger.warning(
                        'Failed checking column \'%s\' of multidata/complex with name \'%s\''
                        % (complex_data, complex['data']['name']))
                    app_logger.warning('Expected value: %s' %
                                       complex['data'][complex_data])
                    app_logger.warning('Database value: %s' %
                                       db_complex[complex_data].iloc[0])
                    app_logger.warning('---')
                    data_not_match = True

        self.assertFalse(data_not_match, 'Some complex doesnt match')
Esempio n. 11
0
    def test_protein(self):

        dataframe = cellphonedb_app.cellphonedb.database_manager.get_repository(
            'protein').get_all_expanded()

        data_not_match = False

        for protein in protein_entries:
            db_protein = dataframe[dataframe['name'] == protein['name']]

            if db_protein.empty:
                print('Protein {} dindt exist'.format(protein['name']))
                data_not_match = True
                continue

            for column_name in protein:
                if db_protein[column_name].iloc[0] != protein[column_name]:
                    app_logger.warning('Failed checking column \'%s\' of multidata/protein with name \'%s\'' % (
                        column_name, protein['name']))
                    app_logger.warning('Expected value: %s' % protein[column_name])
                    app_logger.warning('Database value: %s' % db_protein[column_name].iloc[0])
                    app_logger.warning('---')
                    data_not_match = True

        self.assertFalse(data_not_match, 'Some proteins doesnt match or doesnt exist')
Esempio n. 12
0
def call(downloads_path: str,
         fetch: bool,
         save_backup: bool = True) -> pd.DataFrame:
    url = 'http://www.guidetopharmacology.org/DATA/interactions.csv'

    compression = 'xz'
    file_name = 'iuphar_interaction_raw.csv.{}'.format(compression)
    download_file_path = os.path.join(downloads_path, file_name)

    def best_path():
        saved_file_path = os.path.join(data_dir, 'sources', file_name)

        if os.path.exists(download_file_path):
            return download_file_path
        if os.path.exists(saved_file_path):
            return saved_file_path

        app_logger.error('Could not find local source for iuphar')
        exit(1)

    try:
        if fetch:
            response = requests.get(url)

            if response.text:
                s = StringIO(response.text)
                df = pd.read_csv(s, dtype=str)

                df.drop_duplicates(inplace=True)
                if save_backup:
                    df.to_csv(download_file_path,
                              index=False,
                              compression=compression)
                    add_to_meta(file_name,
                                os.path.join(downloads_path, 'meta.json'))

                return df
            else:
                if response.status_code != 200:
                    raise CouldNotFetchFromApiException()
        else:
            app_logger.warning('Using local version for iuphar')
            df = pd.read_csv(best_path(), compression=compression, dtype=str)
            return df
    except (requests.exceptions.ConnectionError, requests.exceptions.SSLError):
        app_logger.warning(
            'Could not fetch remote source for iuphar, using local backup')
        df = pd.read_csv(best_path(), compression=compression)
        return df
Esempio n. 13
0
    def test_gene_are_not_duplicated(self):
        query = cellphonedb_app.cellphonedb.database_manager.database.session.query(
            Gene)
        dataframe = pd.read_sql(
            query.statement,
            cellphonedb_app.cellphonedb.database_manager.database.engine)

        duplicated_genes = dataframe[dataframe.duplicated(keep=False)]
        if len(duplicated_genes):
            app_logger.warning(
                duplicated_genes.sort_values('gene_name').to_csv(index=False))

        self.assertEqual(
            len(duplicated_genes), 0,
            'There are %s duplicated genes in database. Please check WARNING_duplicated_genes.csv file'
            % len(duplicated_genes))
def _merge_proteins(base_protein: pd.DataFrame,
                    additional: pd.DataFrame,
                    default_values: dict,
                    default_types: dict,
                    result_columns: list,
                    log_file: str,
                    quiet: bool = False) -> pd.DataFrame:
    additional = additional.copy()

    # Here we set defaults for uniprot & curated data
    base_protein = generator_helper.set_defaults(base_protein, default_values,
                                                 quiet)
    additional = generator_helper.set_defaults(additional, default_values,
                                               quiet)

    # we will only use these columns
    additional = additional[result_columns]
    base_protein = base_protein[result_columns]

    # Type casting to ensure they are equal
    additional = additional.astype(default_types)
    base_protein = base_protein.astype(default_types)

    join_key = 'uniprot'

    merged_protein = base_protein.append(additional,
                                         ignore_index=True,
                                         sort=False).drop_duplicates()

    if not quiet and merged_protein.duplicated(join_key).any():
        app_logger.warning(
            'There are differences between merged files: logged to {}'.format(
                log_file))

        log = merged_protein[merged_protein.duplicated(
            join_key, keep=False)].sort_values(join_key)
        log.to_csv(log_file, index=False, sep='\t')

    merged_protein.drop_duplicates(join_key, keep='last', inplace=True)
    return merged_protein
Esempio n. 15
0
    def test_duplicated_gene_ensembl_is_not_in_interaction(self):
        all_genes = cellphonedb_app.cellphonedb.database_manager.get_repository(
            'gene').get_all_expanded()
        all_interactions = cellphonedb_app.cellphonedb.database_manager.get_repository(
            'interaction').get_all()

        genes_duplicated_ensembl = all_genes[all_genes.duplicated('ensembl',
                                                                  keep=False)]

        all_interactions_multidata_ids = all_interactions[
            'multidata_1_id'].tolist(
            ) + all_interactions['multidata_2_id'].tolist()

        duplicated_gene_ensembls_in_interactions = genes_duplicated_ensembl[
            genes_duplicated_ensembl['id_multidata'].apply(
                lambda id: id in all_interactions_multidata_ids)]

        nunknowed_duplicated_ensembl = False
        if not duplicated_gene_ensembls_in_interactions[
                duplicated_gene_ensembls_in_interactions['ensembl'].apply(
                    lambda ensembl: ensembl not in KNOWED_DUPLICATED_ENSEMBL.
                    tolist())].empty:
            app_logger.warning(
                'Some duplicated ensembls apears in interactions')
            app_logger.warning(
                duplicated_gene_ensembls_in_interactions.to_csv(index=False))
            nunknowed_duplicated_ensembl = True

        self.assertFalse(
            nunknowed_duplicated_ensembl,
            'Some duplicated ensembl gene apears in interactions')
        self.assertFalse(
            len(KNOWED_DUPLICATED_ENSEMBL.drop_duplicates()) != len(
                duplicated_gene_ensembls_in_interactions.drop_duplicates(
                    'ensembl')),
            'Some duplicated ensembl gene apears in interactions')
Esempio n. 16
0
    def test_all_protein_have_gen(self):

        expected_protein_without_gene = 235
        protein_query = cellphonedb_app.cellphonedb.database_manager.database.session.query(
            Protein, Multidata.name).join(Multidata)

        protein_df = pd.read_sql(
            protein_query.statement,
            cellphonedb_app.cellphonedb.database_manager.database.engine)
        protein_ids = protein_df['id_protein'].tolist()

        gene_query = cellphonedb_app.cellphonedb.database_manager.database.session.query(
            Gene.protein_id)
        gene_protein_ids = \
            pd.read_sql(gene_query.statement,
                        cellphonedb_app.cellphonedb.database_manager.database.engine)[
                'protein_id'].tolist()

        protein_without_gene = []
        for protein_id in protein_ids:
            if not protein_id in gene_protein_ids:
                protein_without_gene.append(protein_df[
                    protein_df['id_protein'] == protein_id]['name'].iloc[0])

        if len(protein_without_gene) != expected_protein_without_gene:
            app_logger.warning('There are {} Proteins without gene'.format(
                len(protein_without_gene)))
            app_logger.warning(protein_without_gene)

            unknowed_proteins_without_gene = []
            for protein in protein_without_gene:
                if not protein in KNOWED_PROTEINS_WITHOUT_GENE:
                    unknowed_proteins_without_gene.append(protein)

            if unknowed_proteins_without_gene:
                app_logger.warning(
                    'There are {} unknowed proteins without gene'.format(
                        len(unknowed_proteins_without_gene)))
                app_logger.warning(
                    pd.Series(unknowed_proteins_without_gene).drop_duplicates(
                    ).tolist())

        self.assertEqual(expected_protein_without_gene,
                         len(protein_without_gene),
                         'There are Proteins without Gene.')
def find_database_for(value: str) -> str:
    file_candidate = os.path.expanduser(value)

    if os.path.exists(file_candidate):
        # todo: warning is perhaps not appropriate, logger doesn't allow info at this point
        app_logger.warning(
            'User selected database `{}` is available, using it'.format(
                file_candidate))
        return file_candidate

    _ensure_core_version_in_user_dbs()
    user_databases_prefix = os.path.expanduser(cpdb_releases)

    if not os.path.isdir(user_databases_prefix):
        app_logger.error(
            'No downloaded databases found, run the `database download` command from the cli first'
        )
        exit(1)

    if value == 'latest' or not value:
        available = list_local_versions()
        latest_available = available[0]
        app_logger.warning(
            'Latest local available version is `{}`, using it'.format(
                latest_available))
        value = latest_available

    downloaded_candidate = os.path.join(user_databases_prefix, value,
                                        database_file)
    valid_database = os.path.exists(downloaded_candidate)

    if valid_database:
        # todo: warning is perhaps not appropriate, logger doesn't allow info at this point
        app_logger.warning(
            'User selected downloaded database `{}` is available, using it'.
            format(value))
    else:
        app_logger.warning(
            'User selected database `{}` not available, trying to download it'.
            format(value))
        download_database(value)
        return find_database_for(value)

    return downloaded_candidate
Esempio n. 18
0
    def test_gene(self):

        dataframe = cellphonedb_app.cellphonedb.database_manager.get_repository(
            'gene').get_all_expanded()

        data_not_match = False

        for gene in gene_entries:
            db_gene = dataframe

            for column_name in gene:
                if gene[column_name] == None:
                    db_gene = db_gene[pd.isnull(db_gene[column_name])]
                else:
                    db_gene = db_gene[db_gene[column_name] == gene[column_name]]

            if (len(db_gene) < 1):
                app_logger.warning('Failed cheking Gene:')
                app_logger.warning('Expected data:')
                app_logger.warning(gene)
                data_not_match = True

        self.assertFalse(data_not_match, 'Some Gene doesnt match')
Esempio n. 19
0
def _merge_complex(base_complex: pd.DataFrame, additional: pd.DataFrame,
                   log_file: str) -> pd.DataFrame:
    additional = additional.copy()

    defaults = {
        'uniprot_3': np.nan,
        'uniprot_4': np.nan,
        'receptor': False,
        'integrin': False,
        'other': False,
        'other_desc': np.nan,
        'peripheral': False,
        'receptor_desc': np.nan,
        'secreted_desc': np.nan,
        'secreted_highlight': False,
        'secreted': False,
        'transmembrane': False,
        'pdb_structure': False,
        'pdb_id': np.nan,
        'stoichiometry': np.nan,
        'comments_complex': np.nan
    }

    default_types = {
        'complex_name': str,
        'uniprot_1': str,
        'uniprot_2': str,
        'uniprot_3': str,
        'uniprot_4': str,
        'transmembrane': bool,
        'peripheral': bool,
        'secreted': bool,
        'secreted_desc': str,
        'secreted_highlight': bool,
        'receptor': bool,
        'receptor_desc': str,
        'integrin': bool,
        'other': bool,
        'other_desc': str,
        'pdb_id': str,
        'pdb_structure': str,
        'stoichiometry': str,
        'comments_complex': str,
    }

    result_columns = list(default_types.keys())

    required_columns = ['complex_name', 'uniprot_1', 'uniprot_2']

    if not set(required_columns).issubset(additional):
        raise MissingRequiredColumns(
            list(set(required_columns).difference(additional)))

    # TODO: Fill NA
    # Here we set defaults for additional data
    additional = set_defaults(additional, defaults)

    # we will only use these columns
    additional = additional[result_columns]
    base_complex = base_complex[result_columns]

    # Type casting to ensure they are equal
    base_complex = base_complex.astype(default_types)
    additional = additional.astype(default_types)

    join_key = 'complex_name'

    merged_complex = base_complex.append(additional,
                                         ignore_index=True,
                                         sort=False).drop_duplicates()

    if merged_complex.duplicated(join_key).any():
        app_logger.warning(
            'There are differences between merged files: logged to {}'.format(
                log_file))

        log = merged_complex[merged_complex.duplicated(
            join_key, keep=False)].sort_values(join_key)
        log.to_csv(log_file, index=False, sep='\t')

    merged_complex.drop_duplicates(join_key, keep='last', inplace=True)
    return merged_complex
Esempio n. 20
0
    def test_complex_composition_table(self):
        df_multidata = cellphonedb_app.cellphonedb.database_manager.get_repository(
            'multidata').get_all()
        df_complex_composition = cellphonedb_app.cellphonedb.database_manager.get_repository(
            'complex').get_all_compositions()

        number_compositions_not_match = False
        some_protein_didnt_exists = False
        some_protein_not_part_of_complex = False

        for complex in complex_entries:
            db_complex_id = df_multidata[df_multidata['name'] ==
                                         complex['data']
                                         ['name']]['id_multidata'].iloc[0]

            if len(df_complex_composition[
                    df_complex_composition['complex_multidata_id'] !=
                    db_complex_id]) == len(complex['composition']):
                app_logger.warning(
                    'Failed checking number of complex_composition with name \'%s\''
                    % (complex['data']['name']))
                app_logger.warning(
                    'Expected value: %s' % len(df_complex_composition[
                        df_complex_composition['complex_multidata_id'] ==
                        db_complex_id]))
                app_logger.warning('Database value: %s' %
                                   len(complex['composition']))
                app_logger.warning('---')
                number_compositions_not_match = True

            for protein_name in complex['composition']:
                db_complex_composition_ids = \
                    df_complex_composition[df_complex_composition['complex_multidata_id'] == db_complex_id][
                        'protein_multidata_id'].tolist()

                composition_multidata_id = df_multidata[
                    df_multidata['name'] == protein_name]['id_multidata']

                if not len(composition_multidata_id):
                    app_logger.warning(
                        'Failed finding protein \'%s\' in multidata from complex name \'%s\''
                        % (protein_name, complex['data']['name']))
                    some_protein_didnt_exists = True
                    continue

                if composition_multidata_id.iloc[
                        0] not in db_complex_composition_ids:
                    app_logger.warning(
                        'Failed finding protein \'%s\' in composition from complex name \'%s\''
                        % (protein_name, complex['data']['name']))
                    some_protein_not_part_of_complex = True

        self.assertFalse(number_compositions_not_match,
                         'Number of complex composition doesnt match')
        self.assertFalse(some_protein_didnt_exists,
                         'Some complex_composition proteins doesnt match')
        self.assertFalse(some_protein_not_part_of_complex,
                         'Complex_composition proteins doesnt match')
def statistical_analysis(meta_filename: str,
                         counts_filename: str,
                         counts_data='ensembl',
                         project_name='',
                         threshold=0.1,
                         result_precision='3',
                         output_path='',
                         output_format='csv',
                         means_result_name='means',
                         significant_means_result_name='significant_means',
                         deconvoluted_result_name='deconvoluted',
                         verbose=True,
                         database='latest',
                         subsampling=False,
                         subsampling_log=True,
                         subsampling_num_pc=100,
                         subsampling_num_cells=None,
                         debug_seed='-1',
                         pvalue=0.05,
                         pvalues_result_name='pvalues',
                         iterations=1000,
                         threads=4) -> None:
    database = choose_database(None, None, value=database)
    try:

        subsampler = Subsampler(subsampling_log, subsampling_num_pc,
                                subsampling_num_cells,
                                verbose) if subsampling else None

        LocalMethodLauncher(cpdb_app.create_app(verbose, database)). \
            cpdb_statistical_analysis_local_method_launcher(meta_filename,
                                                            counts_filename,
                                                            counts_data,
                                                            project_name,
                                                            iterations,
                                                            threshold,
                                                            output_path,
                                                            output_format,
                                                            means_result_name,
                                                            pvalues_result_name,
                                                            significant_means_result_name,
                                                            deconvoluted_result_name,
                                                            debug_seed,
                                                            threads,
                                                            result_precision,
                                                            pvalue,
                                                            subsampler,
                                                            )
    except (ReadFileException, ParseMetaException, ParseCountsException,
            ThresholdValueException, AllCountsFilteredException) as e:
        app_logger.error(
            str(e) + (':' if (hasattr(e, 'description') and e.description) or
                      (hasattr(e, 'hint') and e.hint) else '') +
            (' {}.'.format(e.description)
             if hasattr(e, 'description') and e.description else '') +
            (' {}.'.format(e.hint) if hasattr(e, 'hint') and e.hint else ''))

    except EmptyResultException as e:
        app_logger.warning(
            str(e) + (':' if (hasattr(e, 'description') and e.description) or
                      (hasattr(e, 'hint') and e.hint) else '') +
            (' {}.'.format(e.description)
             if hasattr(e, 'description') and e.description else '') +
            (' {}.'.format(e.hint) if hasattr(e, 'hint') and e.hint else ''))
    except:
        app_logger.error('Unexpected error')
        if verbose:
            traceback.print_exc(file=sys.stdout)