def _assert_result(self, namefile: str, data: str, iterations: str, project_name: str,
                    result_means_filename: str) -> None:
     means_test_filename = 'hi_{}_result__data-{}_it-{}.txt'.format(namefile, data, iterations)
     original_means = pd.read_table(os.path.realpath('{}/{}'.format(data_test_dir, means_test_filename)))
     result_means = pd.read_table('{}/{}/{}'.format(output_test_dir, project_name, result_means_filename))
     self.assertTrue(dataframe_functions.dataframes_has_same_data(result_means, original_means))
     self.remove_file('{}/{}/{}'.format(output_test_dir, project_name, result_means_filename))
Ejemplo n.º 2
0
    def _assert_result(self, filename: str, data: str, iterations: int,
                       project_name: str, result_means_filename: str,
                       debug_seed: int, threshold: float,
                       result_precision: int) -> None:
        str_threshold = ''.join(str(threshold).split('.'))

        means_test_filename = \
            'statistical_analysis__{}_result__' \
            'data-{}_it-{}_seed-{}_threshold-{}_precision-{}.txt'.format(filename,
                                                                         data,
                                                                         iterations,
                                                                         debug_seed,
                                                                         str_threshold,
                                                                         result_precision)
        original_means = pd.read_table(
            os.path.realpath('{}/{}'.format(data_test_dir,
                                            means_test_filename)))
        result_means = pd.read_table('{}/{}/{}'.format(output_test_dir,
                                                       project_name,
                                                       result_means_filename))
        self.assertTrue(dataframe_functions.dataframes_has_same_data(
            result_means, original_means),
                        msg='failed comparing {} with {}'.format(
                            means_test_filename, result_means_filename))
        self.remove_file('{}/{}/{}'.format(output_test_dir, project_name,
                                           result_means_filename))
    def test_different_unsorted_columns(self):
        dataframe1 = pd.DataFrame({'col2': [3, 4], 'col1': [1, 2]})
        dataframe2 = pd.DataFrame({'col1': [1, 2], 'col2': [3, 5]})

        self.assertFalse(
            dataframe_functions.dataframes_has_same_data(
                dataframe1, dataframe2))
    def test_equal_unsorted_columns(self):
        dataframe1 = pd.DataFrame({'col2': [3, 4], 'col1': [1, 2]})
        dataframe2 = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})

        self.assertTrue(
            dataframe_functions.dataframes_has_same_data(
                dataframe1, dataframe2))
Ejemplo n.º 5
0
    def test_apply_threshold(self):
        cluster_counts = pd.read_csv('{}/cluster_counts_generic_cluster_counts.csv'.format(self.FIXTURES_SUBPATH))
        expected_result = pd.read_csv('{}/cluster_counts_helper_threshold_results.csv'.format(self.FIXTURES_SUBPATH))

        gene_column_name = 'gene'

        cluster_names = list(cluster_counts.columns.values)
        cluster_names.remove(gene_column_name)

        result = cluster_counts_helper.apply_threshold(cluster_counts, cluster_names, threshold=0.2)

        self.assertTrue(dataframe_functions.dataframes_has_same_data(result, expected_result))
    def test_filter_by_multidatas(self):
        multidatas = pd.read_csv(
            '{}/filter_interaction/filter_interaction_multidatas_multidata.csv'
            .format(data_test_dir))
        interactions = pd.read_csv(
            '{}/filter_interaction/filter_interaction_multidatas_interaction.csv'
            .format(data_test_dir))

        result = interaction_filter.filter_by_multidatas(
            multidatas, interactions)
        expected_result = interactions[interactions['test_both_enabled']]

        self.assertTrue(
            dataframe_functions.dataframes_has_same_data(
                result, expected_result))
    def test_get_involved_complex_from_protein_empty_result(self):
        proteins = pd.read_csv('{}/helper_complex_protein.csv'.format(
            self.FIXTURES_SUBPATH))
        proteins.drop(proteins.index, inplace=True)
        complexes = pd.read_csv('{}/helper_complex_complex.csv'.format(
            self.FIXTURES_SUBPATH))
        complex_composition = pd.read_csv(
            '{}/helper_complex_complex_composition.csv'.format(
                self.FIXTURES_SUBPATH))

        result = complex_helper.get_involved_complex_from_protein(
            proteins, complexes, complex_composition, drop_duplicates=False)

        self.assertTrue(
            dataframe_functions.dataframes_has_same_data(
                result, pd.DataFrame()))
Ejemplo n.º 8
0
 def _assert_result(self,
                    namefile: str,
                    data: str,
                    project_name: str,
                    result_means_filename: str,
                    threshold: float,
                    result_precision: int,
                    ) -> None:
     str_threshold = ''.join(str(threshold).split('.'))
     test_filename = 'analysis__{}_result__data-{}_threshold-{}_precision-{}.txt'.format(namefile,
                                                                                         data,
                                                                                         str_threshold,
                                                                                         result_precision)
     original_means = pd.read_table('{}/{}'.format(data_test_dir, test_filename))
     result_means = pd.read_table('{}/{}/{}'.format(output_test_dir, project_name, result_means_filename))
     self.assertTrue(dataframe_functions.dataframes_has_same_data(result_means, original_means))
     self.remove_file('{}/{}/{}'.format(output_test_dir, project_name, result_means_filename))
    def test_get_involved_complex_composition_from_protein(self):
        proteins = pd.read_csv('{}/helper_complex_protein.csv'.format(
            self.FIXTURES_SUBPATH),
                               index_col=0)
        complex_composition = pd.read_csv(
            '{}/helper_complex_complex_composition.csv'.format(
                self.FIXTURES_SUBPATH))

        result_expected = pd.read_csv(
            '{}/helper_complex_result_drop_duplicates.csv'.format(
                self.FIXTURES_SUBPATH))

        result = complex_helper.get_involved_complex_composition_from_protein(
            proteins, complex_composition)

        self.assertTrue(
            dataframe_functions.dataframes_has_same_data(
                result, result_expected))
Ejemplo n.º 10
0
    def test_filter_empty_cluster_counts(self):
        cluster_counts = pd.read_csv(
            '{}/cluster_counts_generic_cluster_counts.csv'.format(
                self.FIXTURES_SUBPATH))
        expected_result = pd.read_csv(
            '{}/cluster_counts_filter_empty_cluster_results.csv'.format(
                self.FIXTURES_SUBPATH))

        gene_column_name = 'gene'

        cluster_names = list(cluster_counts.columns.values)
        cluster_names.remove(gene_column_name)

        result = cluster_counts_filter.filter_empty_cluster_counts(
            cluster_counts, cluster_names)

        self.assertTrue(
            dataframe_functions.dataframes_has_same_data(
                result, expected_result))
Ejemplo n.º 11
0
    def test_merge_complex_cluster_counts(self):
        complex_counts_composition = pd.read_csv(
            '{}/cluster_counts_helper_merge_complex_cluster_counts_complex_counts_composition.csv'.format(
                self.FIXTURES_SUBPATH))

        expected_result = pd.read_csv(
            '{}/cluster_counts_helper_merge_complex_cluster_counts_result.csv'.format(
                self.FIXTURES_SUBPATH))
        cluster_names = ['cluster_1', 'cluster_2', 'cluster_3']
        complex_column_names = ['complex_multidata_id']

        result = cluster_counts_helper.merge_complex_counts(cluster_names, complex_counts_composition,
                                                            complex_column_names)

        # Need to set equal 1 to 1.0000
        result[complex_column_names] = result[complex_column_names].astype(dtype='int32')
        expected_result[complex_column_names] = expected_result[complex_column_names].astype(dtype='int32')

        self.assertTrue(dataframe_functions.dataframes_has_same_data(result, expected_result))
    def test_get_involved_complex_from_protein_empty_result(self):
        proteins = pd.read_csv('{}/helper_complex_protein.csv'.format(
            self.FIXTURES_SUBPATH),
                               index_col=0)
        proteins.drop(proteins.index, inplace=True)
        complex_composition = pd.read_csv(
            '{}/helper_complex_complex_composition.csv'.format(
                self.FIXTURES_SUBPATH))

        result = complex_helper.get_involved_complex_composition_from_protein(
            proteins, complex_composition)

        self.assertTrue(
            dataframe_functions.dataframes_has_same_data(
                result,
                pd.DataFrame(columns=[
                    'complex_multidata_id', 'protein_multidata_id',
                    'total_protein'
                ])))
Ejemplo n.º 13
0
    def test_get_complex_involved_in_counts(self):
        multidatas_counts = pd.read_csv(
            '{}/helper_cluster_counts.csv'.format(
                self.FIXTURES_SUBPATH))
        cluster_names = ['cluster_1', 'cluster_2', 'cluster_3']
        complex_composition = pd.read_csv(
            '{}/helper_cluster_counts_complex_composition.csv'.format(
                self.FIXTURES_SUBPATH))
        complex_expanded = pd.read_csv(
            '{}/helper_cluster_counts_complex.csv'.format(
                self.FIXTURES_SUBPATH))

        result_expected = pd.read_csv(
            '{}/cluster_counts_helper_get_complex_involved_in_counts_result.csv'.format(self.FIXTURES_SUBPATH))

        result = cluster_counts_helper.get_complex_involved_in_counts(multidatas_counts, cluster_names,
                                                                      complex_composition, complex_expanded)

        self.assertTrue(dataframe_functions.dataframes_has_same_data(result, result_expected),
                        'get_complex_involved_in_counts result did not match with expected')
 def test_compare_empty(self):
     self.assertTrue(
         dataframe_functions.dataframes_has_same_data(
             pd.DataFrame(), pd.DataFrame()))