Beispiel #1
0
    def _run_statistical_test_groups(self, df: pd.DataFrame, group_col: str,
                                     stats_test: str, correction_method: str,
                                     structure_pval: str, sym: bool):
        if correction_method is not None:
            pval = statistical_test_groups_comparison(
                df[self.DIVERSITY_INDEXES_NAME],
                df[group_col],
                stats_test,
                output='series',
                sym=False)

            # dropping NaN (= comparison that couldn't have been generated due to too few samples in one or both groups)
            corrected_pval = pd.Series(
                multipletests(pval.dropna(),
                              alpha=0.05,
                              method=correction_method)[1])

            corrected_pval.index = pval.dropna(
            ).index  # postulate that the order hasn't changed
            if pval[pval.isnull()].size > 0:
                corrected_pval = corrected_pval.append(pval[pval.isnull()])

            # remodelling of p-values output
            corrected_pval = self._structure_remodelling(
                corrected_pval, structure=structure_pval, sym=sym)
            return corrected_pval
        else:
            pval = statistical_test_groups_comparison(
                df[self.DIVERSITY_INDEXES_NAME],
                df[group_col],
                stats_test,
                output=structure_pval,
                sym=sym)
            return pval
    def test_mann_whitney_u_groups_wrong_metadata(self):
        metadata_df = pd.DataFrame(
            [['F', 2], ['M', 1]],
            columns=['sex', 'group'],
            index=['sample6', 'sample7'],
        )

        with self.assertRaises(RuntimeError) as cm:
            statistical_test_groups_comparison(self.test_df,
                                               metadata_df['group'],
                                               stat_test='mann_whitney_u')
        the_exception = cm.exception
        self.assertEqual(
            the_exception.__str__(),
            "All groups have been dropped: not enough observations by group.")
    def test_chi2_contingency_groups(self):
        expected_df = pd.DataFrame(
            [[np.nan, 0.06720551273974977, 0.9310443064194389],
             [0.06720551273974977, np.nan, 0.152090],
             [0.9310443064194389, 0.152090, np.nan]],
            columns=[1, 2, 3],
            index=[1, 2, 3])

        matrix = statistical_test_groups_comparison(
            self.test_df, self.metadata_df, stat_test='chi2_contingency')
        pd.testing.assert_frame_equal(matrix, expected_df, check_dtype=False)
    def test_ttest_independance_groups(self):
        metadata_df = pd.DataFrame(
            [['F', 2], ['M', 1], ['F', 1], ['M', 1], ['M', 2]],
            columns=['sex', 'group'],
            index=['sample1', 'sample2', 'sample3', 'sample4', 'sample5'],
        )

        expected_df = pd.DataFrame([[np.nan, 0.483379], [0.483379, np.nan]],
                                   columns=[1, 2],
                                   index=[1, 2])

        matrix = statistical_test_groups_comparison(
            self.test_df, metadata_df['group'], stat_test='ttest_independence')
        pd.testing.assert_frame_equal(matrix, expected_df, check_dtype=False)
    def test_mann_whitney_u_groups_nonsym_series_output(self):
        metadata_df = pd.DataFrame(
            [['F', 2], ['M', 1], ['F', 1], ['M', 1], ['M', 2]],
            columns=['sex', 'group'],
            index=['sample1', 'sample2', 'sample3', 'sample4', 'sample5'],
        )

        expected_df = pd.Series({(1, 2): 0.3742593192802244})
        expected_df.index.names = ['Group', 'Group']

        matrix = statistical_test_groups_comparison(self.test_df,
                                                    metadata_df['group'],
                                                    stat_test='mann_whitney_u',
                                                    output='series',
                                                    sym=False)
        pd.testing.assert_series_equal(matrix, expected_df, check_dtype=False)
    def test_mann_whitney_u_groups_nonsym(self):
        metadata_df = pd.DataFrame(
            [['F', 2], ['M', 1], ['F', 1], ['M', 1], ['M', 2]],
            columns=['sex', 'group'],
            index=['sample1', 'sample2', 'sample3', 'sample4', 'sample5'],
        )

        expected_df = pd.DataFrame([[np.nan, 0.374259], [np.nan, np.nan]],
                                   columns=[1, 2],
                                   index=[1, 2])
        matrix = statistical_test_groups_comparison(self.test_df,
                                                    metadata_df['group'],
                                                    stat_test='mann_whitney_u',
                                                    output='dataframe',
                                                    sym=False)
        pd.testing.assert_frame_equal(matrix, expected_df, check_dtype=False)