def dabest_net_measures(self): """ Computes Statistics on Graph Measures """ self.Net_df = pd.read_pickle( self.find(suffix='Graph-Measures-' + self.net_version, filetype='.pkl')) # Result Dictionary dabest_list = [] print('Started Graph Measure Stats.') for Freq in self.FrequencyBands.keys(): with Pool(10) as p: freq_list = p.starmap( self._parallel_net_dabest, zip(self.GraphMeasures.keys(), [Freq] * len(self.GraphMeasures.keys()))) freq_df = pd.concat(freq_list) freq_df['Frequency'] = Freq dabest_list.append(freq_df) # Correct Bootstrapped p-values _, t_bon_corrected = bonferroni_correction( freq_df['pvalue_students_t'], alpha=0.05) _, t_fdr_corrected = fdr_correction(freq_df['pvalue_students_t'], alpha=0.05, method='indep') freq_df['t_bon_corrected'] = t_bon_corrected freq_df['t_fdr_corrected'] = t_fdr_corrected _, welch_bon_corrected = bonferroni_correction( freq_df['pvalue_welch'], alpha=0.05) _, welch_fdr_corrected = fdr_correction(freq_df['pvalue_welch'], alpha=0.05, method='indep') freq_df['welch_bon_corrected'] = welch_bon_corrected freq_df['welch_fdr_corrected'] = welch_fdr_corrected _, mann_whit_bon_corrected = bonferroni_correction( freq_df['pvalue_mann_whitney'], alpha=0.05) _, mann_whit_fdr_corrected = fdr_correction( freq_df['pvalue_mann_whitney'], alpha=0.05, method='indep') freq_df['mann_whit_bon_corrected'] = mann_whit_bon_corrected freq_df['mann_whit_fdr_corrected'] = mann_whit_fdr_corrected # Dabest Dataframe dabest_df = pd.concat(dabest_list) # save DataFrame to File FileName = self.createFileName(suffix='Graph-Measures-DABEST-' + self.net_version, filetype='.pkl') FilePath = self.createFilePath(self.NetMeasuresDir, self.net_version, FileName) dabest_df.to_pickle(FilePath) print('Graph Measure Statistics done.') pass
def test_multi_pval_correction(): """Test pval correction for multi comparison (FDR and Bonferroni) """ rng = np.random.RandomState(0) X = rng.randn(10, 1000, 10) X[:, :50, 0] += 4.0 # 50 significant tests alpha = 0.05 T, pval = stats.ttest_1samp(X, 0) n_samples = X.shape[0] n_tests = X.size / n_samples thresh_uncorrected = stats.t.ppf(1.0 - alpha, n_samples - 1) reject_bonferroni, pval_bonferroni = bonferroni_correction(pval, alpha) thresh_bonferroni = stats.t.ppf(1.0 - alpha / n_tests, n_samples - 1) assert_true(pval_bonferroni.ndim == 2) assert_true(reject_bonferroni.ndim == 2) fwer = np.mean(reject_bonferroni) assert_almost_equal(fwer, alpha, 1) reject_fdr, pval_fdr = fdr_correction(pval, alpha=alpha, method='indep') assert_true(pval_fdr.ndim == 2) assert_true(reject_fdr.ndim == 2) thresh_fdr = np.min(np.abs(T)[reject_fdr]) assert_true(0 <= (reject_fdr.sum() - 50) <= 50 * 1.05) assert_true(thresh_uncorrected <= thresh_fdr <= thresh_bonferroni) reject_fdr, pval_fdr = fdr_correction(pval, alpha=alpha, method='negcorr') thresh_fdr = np.min(np.abs(T)[reject_fdr]) assert_true(0 <= (reject_fdr.sum() - 50) <= 50 * 1.05) assert_true(thresh_uncorrected <= thresh_fdr <= thresh_bonferroni)
def test_multi_pval_correction(): """Test pval correction for multi comparison (FDR and Bonferroni).""" rng = np.random.RandomState(0) X = rng.randn(10, 1000, 10) X[:, :50, 0] += 4.0 # 50 significant tests alpha = 0.05 T, pval = stats.ttest_1samp(X, 0) n_samples = X.shape[0] n_tests = X.size / n_samples thresh_uncorrected = stats.t.ppf(1.0 - alpha, n_samples - 1) reject_bonferroni, pval_bonferroni = bonferroni_correction(pval, alpha) thresh_bonferroni = stats.t.ppf(1.0 - alpha / n_tests, n_samples - 1) assert pval_bonferroni.ndim == 2 assert reject_bonferroni.ndim == 2 assert_allclose(pval_bonferroni / 10000, pval) reject_expected = pval_bonferroni < alpha assert_array_equal(reject_bonferroni, reject_expected) fwer = np.mean(reject_bonferroni) assert_almost_equal(fwer, alpha, 1) reject_fdr, pval_fdr = fdr_correction(pval, alpha=alpha, method='indep') assert pval_fdr.ndim == 2 assert reject_fdr.ndim == 2 thresh_fdr = np.min(np.abs(T)[reject_fdr]) assert 0 <= (reject_fdr.sum() - 50) <= 50 * 1.05 assert thresh_uncorrected <= thresh_fdr <= thresh_bonferroni pytest.raises(ValueError, fdr_correction, pval, alpha, method='blah') assert np.all(fdr_correction(pval, alpha=0)[0] == 0) reject_fdr, pval_fdr = fdr_correction(pval, alpha=alpha, method='negcorr') thresh_fdr = np.min(np.abs(T)[reject_fdr]) assert 0 <= (reject_fdr.sum() - 50) <= 50 * 1.05 assert thresh_uncorrected <= thresh_fdr <= thresh_bonferroni
event_id = 1 reject = dict(grad=4000e-13, eog=150e-6) epochs = mne.Epochs(raw, events, event_id, tmin, tmax, picks=picks, baseline=(None, 0), reject=reject) X = epochs.get_data() # as 3D matrix X = X[:, 0, :] # take only one channel to get a 2D array ############################################################################### # Compute statistic T, pval = stats.ttest_1samp(X, 0) alpha = 0.05 n_samples, n_tests = X.shape threshold_uncorrected = stats.t.ppf(1.0 - alpha, n_samples - 1) reject_bonferroni, pval_bonferroni = bonferroni_correction(pval, alpha=alpha) threshold_bonferroni = stats.t.ppf(1.0 - alpha / n_tests, n_samples - 1) reject_fdr, pval_fdr = fdr_correction(pval, alpha=alpha, method='indep') threshold_fdr = np.min(np.abs(T)[reject_fdr]) ############################################################################### # Plot times = 1e3 * epochs.times import matplotlib.pyplot as plt plt.close('all') plt.plot(times, T, 'k', label='T-stat') xmin, xmax = plt.xlim() plt.hlines(threshold_uncorrected, xmin, xmax, linestyle='--', colors='k', label='p=0.05 (uncorrected)', linewidth=2)
# which we want here: T_obs, clusters, p_values, H0 = \ spatio_temporal_cluster_1samp_test(X, n_jobs=1, threshold=threshold, connectivity=connectivity, tail=1, n_permutations=n_permutations) # Let's put the cluster data in a readable format ps = np.zeros(width * width) for cl, p in zip(clusters, p_values): ps[cl[1]] = -np.log10(p) ps = ps.reshape((width, width)) T_obs = T_obs.reshape((width, width)) # To do a Bonferroni correction on these data is simple: p = stats.distributions.t.sf(T_obs, n_subjects - 1) p_bon = -np.log10(bonferroni_correction(p)[1]) # Now let's do some clustering using the standard method with "hat": stat_fun = partial(ttest_1samp_no_p, sigma=sigma) T_obs_hat, clusters, p_values, H0 = \ spatio_temporal_cluster_1samp_test(X, n_jobs=1, threshold=threshold, connectivity=connectivity, tail=1, n_permutations=n_permutations, stat_fun=stat_fun) # Let's put the cluster data in a readable format ps_hat = np.zeros(width * width) for cl, p in zip(clusters, p_values): ps_hat[cl[1]] = -np.log10(p) ps_hat = ps_hat.reshape((width, width)) T_obs_hat = T_obs_hat.reshape((width, width))
# \mathrm{E}(\frac{N_{\mathrm{type\ I}}}{N_{\mathrm{reject}}} # \mid N_{\mathrm{reject}} > 0) \cdot # \mathrm{P}(N_{\mathrm{reject}} > 0 \mid H_0) # # We cover some techniques that control FWER and FDR below. # # Bonferroni correction # ^^^^^^^^^^^^^^^^^^^^^ # Perhaps the simplest way to deal with multiple comparisons, `Bonferroni # correction <https://en.wikipedia.org/wiki/Bonferroni_correction>`__ # conservatively multiplies the p-values by the number of comparisons to # control the FWER. titles.append('Bonferroni') ts.append(ts[-1]) ps.append(bonferroni_correction(ps[0])[1]) mccs.append(True) plot_t_p(ts[-1], ps[-1], titles[-1], mccs[-1]) ############################################################################### # False discovery rate (FDR) correction # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # Typically FDR is performed with the Benjamini-Hochberg procedure, which # is less restrictive than Bonferroni correction for large numbers of # comparisons (fewer type II errors), but provides less strict control of type # I errors. titles.append('FDR') ts.append(ts[-1]) ps.append(fdr_correction(ps[0])[1]) mccs.append(True)
def testwise_correction_mcp(x, x_p, tail=1, mcp='maxstat'): """Test-wise correction for MCP using non-parametric statistics. This function can be used to correct the p-values for multiple comparisons at the test level (i.e at each time point, each roi, each frequencies etc.). This kind of correction usually suffers from a low statistical power (i.e if an effect is present, you might miss it because the correction is to conservative). Parameters ---------- x : array_like Array of true effect x_p : array_like Array of permutations of shape (n_perm, ...) where the other dimensions should be the same as `x` tail : {-1, 0, 1} Type of comparison. Use -1 for the lower part of the distribution, 1 for the higher part and 0 for both mcp : {'maxstat', 'fdr', 'bonferroni'} Method to use for correcting p-values for the multiple comparison problem. By default, maximum statistics is used Returns ------- pvalues : array_like Array of pvalues corrected for MCP with the same shape as the input `x` """ assert tail in [-1, 0, 1] assert mcp in ['maxstat', 'fdr', 'bonferroni'] assert isinstance(x, np.ndarray) and isinstance(x_p, np.ndarray) n_perm = x_p.shape[0] logger.info(f" Perform correction for MCP (mcp={mcp}; tail={tail})") # ------------------------------------------------------------------------- # change the distribution according to the tail (support inplace operation) if tail == 1: # upper part of the distribution pass elif tail == -1: # bottom part of the distribution x, x_p = -x, -x_p elif tail == 0: # both part of the distribution x, x_p = np.abs(x), np.abs(x_p) x = x[np.newaxis, ...] # ------------------------------------------------------------------------- # mcp correction if mcp == 'maxstat': x_p_sh = tuple([n_perm] + [1] * (x.ndim - 1)) # maximum over all dimensions except the perm one x_p = x_p.reshape(n_perm, -1).max(1).reshape(*x_p_sh) pv = (x <= x_p).sum(0) / n_perm pv = np.clip(pv, 1. / n_perm, 1.) else: pv = (x <= x_p).sum(0) / n_perm if mcp == 'fdr': pv = fdr_correction(pv, .05)[1] if mcp == 'bonferroni': pv = bonferroni_correction(pv, .05)[1] pv = np.clip(pv, 0., 1.) return pv
def test_bonferroni_pval_clip(): """Test that p-values are never exceed 1.0.""" p = (0.2, 0.9) _, p_corrected = bonferroni_correction(p) assert p_corrected.max() <= 1.0
def test_region_GBC(self): """ Compute regionwise t-test between global connectivity values """ from mne.stats import bonferroni_correction, fdr_correction df = pd.read_pickle( self.find(suffix='GBC', filetype='.pkl', Freq=self.Frequencies)) # Result Dictionary testdict = { 'Frequency': [], 'Region': [], 't-value': [], 'p-value': [], 'welch-t-value': [], 'welch-p-value': [], 'levene-p-value': [] } print('Started Statstical Test.') for Region in self.RegionNames: print(f'Testing {Region}') df_pivot = df.pivot_table(index=['Subject', 'Group'], columns='Frequency', values=Region).reset_index() df_control = df_pivot[df_pivot['Group'] == 'Control'] df_fep = df_pivot[df_pivot['Group'] == 'FEP'] for Freq in self.FrequencyBands.keys(): testdict['Frequency'].append(Freq) testdict['Region'].append(Region) # Test for equal variance, levene test _, pval = scipy.stats.levene(df_fep[Freq], df_control[Freq]) testdict['levene-p-value'].append(pval) # welch test if variances are not equal t, pval = scipy.stats.ttest_ind(df_fep[Freq], df_control[Freq], equal_var=False) testdict['welch-t-value'].append(t) testdict['welch-p-value'].append(pval) # Standard t-test t, pval = scipy.stats.ttest_ind(df_fep[Freq], df_control[Freq], equal_var=True) testdict['t-value'].append(t) testdict['p-value'].append(pval) # Transform to DataFrame df = pd.DataFrame(testdict) print('Bonferroni Correction.') # Calculate Bonferroni and FDR correction # Set up columns df['Bonferroni'] = df['FDR'] = np.NaN for Freq in self.FrequencyBands.keys(): df_split = df[df['Frequency'] == Freq] _, p_bon = bonferroni_correction(df_split['p-value'], alpha=0.05) _, p_fdr = fdr_correction(df_split['p-value'], alpha=0.05, method='indep') df.loc[df['Frequency'] == Freq, 'Bonferroni'] = p_bon df.loc[df['Frequency'] == Freq, 'FDR'] = p_fdr # Save Results FileName = self.createFileName(suffix='GBC-Region-T-Test', filetype='.pkl', Freq=self.Frequencies) FilePath = self.createFilePath(self.EdgeStatsDir, 'GBC', FileName) df.to_pickle(FilePath)