def test_multi_pval_correction(): """Test pval correction for multi comparison (FDR and Bonferroni) """ rng = np.random.RandomState(0) X = rng.randn(10, 1000, 10) X[:, :50, 0] += 4.0 # 50 significant tests alpha = 0.05 T, pval = stats.ttest_1samp(X, 0) n_samples = X.shape[0] n_tests = X.size / n_samples thresh_uncorrected = stats.t.ppf(1.0 - alpha, n_samples - 1) reject_bonferroni, pval_bonferroni = bonferroni_correction(pval, alpha) thresh_bonferroni = stats.t.ppf(1.0 - alpha / n_tests, n_samples - 1) assert_true(pval_bonferroni.ndim == 2) assert_true(reject_bonferroni.ndim == 2) fwer = np.mean(reject_bonferroni) assert_almost_equal(fwer, alpha, 1) reject_fdr, pval_fdr = fdr_correction(pval, alpha=alpha, method='indep') assert_true(pval_fdr.ndim == 2) assert_true(reject_fdr.ndim == 2) thresh_fdr = np.min(np.abs(T)[reject_fdr]) assert_true(0 <= (reject_fdr.sum() - 50) <= 50 * 1.05) assert_true(thresh_uncorrected <= thresh_fdr <= thresh_bonferroni) reject_fdr, pval_fdr = fdr_correction(pval, alpha=alpha, method='negcorr') thresh_fdr = np.min(np.abs(T)[reject_fdr]) assert_true(0 <= (reject_fdr.sum() - 50) <= 50 * 1.05) assert_true(thresh_uncorrected <= thresh_fdr <= thresh_bonferroni)
def dabest_net_measures(self): """ Computes Statistics on Graph Measures """ self.Net_df = pd.read_pickle( self.find(suffix='Graph-Measures-' + self.net_version, filetype='.pkl')) # Result Dictionary dabest_list = [] print('Started Graph Measure Stats.') for Freq in self.FrequencyBands.keys(): with Pool(10) as p: freq_list = p.starmap( self._parallel_net_dabest, zip(self.GraphMeasures.keys(), [Freq] * len(self.GraphMeasures.keys()))) freq_df = pd.concat(freq_list) freq_df['Frequency'] = Freq dabest_list.append(freq_df) # Correct Bootstrapped p-values _, t_bon_corrected = bonferroni_correction( freq_df['pvalue_students_t'], alpha=0.05) _, t_fdr_corrected = fdr_correction(freq_df['pvalue_students_t'], alpha=0.05, method='indep') freq_df['t_bon_corrected'] = t_bon_corrected freq_df['t_fdr_corrected'] = t_fdr_corrected _, welch_bon_corrected = bonferroni_correction( freq_df['pvalue_welch'], alpha=0.05) _, welch_fdr_corrected = fdr_correction(freq_df['pvalue_welch'], alpha=0.05, method='indep') freq_df['welch_bon_corrected'] = welch_bon_corrected freq_df['welch_fdr_corrected'] = welch_fdr_corrected _, mann_whit_bon_corrected = bonferroni_correction( freq_df['pvalue_mann_whitney'], alpha=0.05) _, mann_whit_fdr_corrected = fdr_correction( freq_df['pvalue_mann_whitney'], alpha=0.05, method='indep') freq_df['mann_whit_bon_corrected'] = mann_whit_bon_corrected freq_df['mann_whit_fdr_corrected'] = mann_whit_fdr_corrected # Dabest Dataframe dabest_df = pd.concat(dabest_list) # save DataFrame to File FileName = self.createFileName(suffix='Graph-Measures-DABEST-' + self.net_version, filetype='.pkl') FilePath = self.createFilePath(self.NetMeasuresDir, self.net_version, FileName) dabest_df.to_pickle(FilePath) print('Graph Measure Statistics done.') pass
def plot_alpha_deciles_vs_pheontypes(ax_outer,pheno_df=None): if pheno_df is None: pheno_df= pd.read_csv(os.path.join(basepath, 'Phenotype-Alpha-Shannon__il__il_validation.csv'),index_col=0) pheno_df = pheno_df.sort_values('alpha') pheno_df['alpha-decile']=pd.qcut(pheno_df['alpha'],10,labels=[str(x) for x in range(1,11)]) pheno_df=pheno_df[['alpha-decile', 'age', 'bmi', 'hba1c', 'bt__fasting_glucose', 'bt__fasting_triglycerides', 'bt__hdl_cholesterol', 'alpha']] pheno_df['bt__fasting_triglycerides']=pheno_df['bt__fasting_triglycerides'].apply(lambda x: 10**x) ax_all = gridspec.GridSpecFromSubplotSpec(pheno_df.shape[1]-1, 1, ax_outer,hspace=0.55) ax_a = plt.subplot(ax_all[0, 0]) plt.text(-.35, 1.1, 'a', ha='center', va='center', transform=ax_a.transAxes, fontsize=16) phenotype='alpha' ax_alpha = plt.subplot(ax_all[pheno_df.shape[1]-2, 0]) ax_alpha = sns.boxplot(x=pheno_df['alpha-decile'].values.astype(int), y=pheno_df[phenotype].values, color='white',fliersize=0,whis=[5, 95],width=0.5) ax_alpha.set_xlabel('Alpha diversity decile',labelpad=2) ax_alpha.set_ylabel('Alpha\ndiversity',labelpad=2) ax_alpha.set_ylim([1,7]) ax_alpha.set_yticks([1,4, 7]) ax_alpha.set_yticklabels([1, 4, 7]) ax_alpha.spines['right'].set_visible(False) ax_alpha.spines['top'].set_visible(False) ax_alpha.set_title('') ax_alpha.tick_params(top=False, right=False, pad=2) pvals=[] stats = {} for i,phenotype in enumerate(pheno_df.columns): if phenotype == 'alpha-decile' or phenotype=='alpha': continue ax_p = plt.subplot(ax_all[i-1, 0]) decile_df=pheno_df[['alpha-decile', phenotype]].pivot_table(values=phenotype, index=pheno_df[['alpha-decile', phenotype]].index, columns='alpha-decile', aggfunc='first') # print(decile_df[['1','10']].describe()) all_stats = {} for j in range(10): all_stats[j] = [0]*10 for k in range(j): res_rank = ranksums(decile_df[str(k+1)].dropna(), decile_df[str(j+1)].dropna()) all_stats[j][k] = res_rank[1] pd.DataFrame(all_stats).to_csv(os.path.join(FIGURES_DIR,"fig2_stats_%s.csv"%phenotype)) res_rank = ranksums(decile_df['1'].dropna(),decile_df['10'].dropna()) res_ks = ks_2samp(decile_df['1'].dropna(),decile_df['10'].dropna()) stats[phenotype] = [res_rank[1],res_ks[1]] ax_p = sns.boxplot(x=pheno_df['alpha-decile'], y=pheno_df[phenotype], color='white',fliersize=0,whis=[5, 95],width=0.6) ax_p.set_ylabel(rename[phenotype].replace(' ','\n'),labelpad=2) ax_p.set_yticks([limits[phenotype][0],(limits[phenotype][0]+limits[phenotype][1])/2,limits[phenotype][1]]) ax_p.set_ylim(limits[phenotype]) params_for_subplots(ax_p,plot_asterix=True) plt.subplots_adjust(left=0.3) stats_df=pd.DataFrame(stats,index=['RankSum_Pvalue','KS_Pvalue']).T stats_df['RankSum_Qvalue']=fdr_correction(stats_df['RankSum_Pvalue'].values)[1] stats_df['KS_Qvalue']=fdr_correction(stats_df['KS_Pvalue'].values)[1] stats_df.to_csv(os.path.join(FIGURES_DIR,"Figure2_stats.csv"))
def fc_visual(fcfile_pickle): with open(fcfile_pickle, 'rb') as handle: fc = pickle.load(handle) imcohs = fc['imcohs'] pvals = fc['pvals'] chnAreas = fc['chnAreas'] # multiple comparison correction, get weights reject, pval_corr = fdr_correction(pvals, alpha=0.05, method='indep') [rows, cols] = np.where(reject == True) weight = np.zeros(imcohs.shape) if len(rows) > 0: weight[rows, cols] = imcohs[rows, cols] for co in ['normal', 'mild', 'moderate']: if co in fcfile_pickle: cond = co save_prefix = 'all' folder, filename = os.path.split(fcfile_pickle)[0], os.path.split( fcfile_pickle)[1] saveFCGraph = os.path.join( folder, 'visual_' + filename[:-len('.pickle')] + '_' + save_prefix + '.png') texts = dict() texts[cond] = [-80, 40, 15] texts[animal] = [80, 20, 20] weight_visual_save(weight, chnInf=assign_coord2chnArea( area_coord_file=area_coord_file, chnAreas=chnAreas), savefile=saveFCGraph, texts=texts, threds_edge=None)
def parallel_stats(X, function=_my_wilcoxon, correction='FDR', n_jobs=2): # check if correction method was provided if correction not in [False, None, 'FDR']: raise ValueError('Unknown correction') # reshape to 2D X = np.array(X) dims = X.shape X.resize([dims[0], np.prod(dims[1:])]) # prepare parallel n_cols = X.shape[1] parallel, pfunc, n_jobs = parallel_func(_loop, n_jobs) n_chunks = min(n_cols, n_jobs) chunks = np.array_split(range(n_cols), n_chunks) p_values = parallel(pfunc(X[:, chunk], function) for chunk in chunks) p_values = np.reshape(np.hstack(p_values), dims[1:]) X.resize(dims) # apply correction if correction == 'FDR': dims = p_values.shape _, p_values = fdr_correction(p_values) p_values = np.reshape(p_values, dims) return p_values
def truncate_dynfc(dynciCOH, pvals): """ truncate fc to be 0 if not significant Arg: dynciCOH: dynamic ciCOHs [nchns * nchns * ntemp] pvals: p-value for each value in dynciCOH, shape = dynciCOH.shape Return: trunc_dynfc: truncated dynamic fc (value is 0 or 1) """ # multiple comparison correction, get truncate dynfc reject, _ = fdr_correction(pvals, alpha=0.05, method='indep') [rows, cols, ts] = np.where(reject == True) trunc_dynfc = np.zeros(dynciCOH.shape) if len(rows) > 0: trunc_dynfc[rows, cols, ts] = abs(dynciCOH[rows, cols, ts]) return trunc_dynfc
def cohort_paired_t_tests(configs): all_pvals = [] groupnames = [] basepath = os.path.join( configs["base_path"], "for_{}_{}_stats_importance".format(configs["grouping"], configs["comparison_metric"])) for fpath in sorted(glob.glob(os.path.join(basepath, "*.csv"))): groupname = fpath.split("/")[-1][:-4] a = [] b = [] with open(fpath, 'r') as fp: csv_fp = csv.reader(fp, delimiter=',') next(csv_fp) for line in csv_fp: if line[1] == "" or line[2] == "": continue a.append(float(line[1])) b.append(float(line[2])) stats, pval = sp_stats.ttest_rel(a, b) if np.isnan(pval): continue groupnames.append(groupname) all_pvals.append(pval) rej, corr = mne_stats.fdr_correction(all_pvals, method="indep") for idx, gname in enumerate(groupnames): print("Group; {}, Corrected pval: {}".format(gname, corr[idx]))
def perform_test(self, genotype_info, clip_reads, clip_coverage, rna_reads, rna_coverage): asprin_test = defaultdict(lambda: defaultdict(list)) asprin_pvalues = [] asprin_odds_ratio = [] for chrom in genotype_info: for pos in genotype_info[chrom]: if (genotype_info[chrom][pos][2] != "none" and \ clip_coverage[chrom][pos] >= self.minimum_coverage and \ rna_coverage[chrom][pos] >= self.minimum_coverage) : asprin_test[chrom][pos] = \ stats.fisher_exact([\ [clip_reads[chrom][pos][genotype_info[chrom][pos][0]],\ clip_reads[chrom][pos][genotype_info[chrom][pos][1]]],\ [rna_reads[chrom][pos][genotype_info[chrom][pos][0]],\ rna_reads[chrom][pos][genotype_info[chrom][pos][1]]]],\ 'two-sided') asprin_pvalues.append(asprin_test[chrom][pos][1]) asprin_odds_ratio.append(asprin_test[chrom][pos][0]) alpha = 0.1 reject_fdr, asprin_qvalues = fdr_correction(asprin_pvalues, \ alpha=alpha, method='indep') return asprin_qvalues, asprin_odds_ratio
def digitized_dynfc(dynciCOH, pvals): """ digitized fc to be 1 or 0 Arg: dynciCOH: dynamic ciCOHs [nchns * nchns * ntemp] pvals: p-value for each value in dynciCOH, shape = dynciCOH.shape Return: digi_dynfc: digitized dynamic fc (value is 0 or 1) """ # multiple comparison correction, get digitized dynfc reject, _ = fdr_correction(pvals, alpha=0.05, method='indep') [rows, cols, ts] = np.where(reject == True) digi_dynfc = np.zeros(dynciCOH.shape) if len(rows) > 0: digi_dynfc[rows, cols, ts] = 1 return digi_dynfc
def unpair_fdr_t_test(cond1, cond2): """Unpaired t-test with FDR correction for MC""" from scipy.stats import ttest_ind from mne.stats import fdr_correction stat, p_vals = ttest_ind(cond1, cond2) _, p_vals_corr = fdr_correction(p_vals) return p_vals_corr
def JoinAndParse(): results = glob.glob(os.path.join(output_dir, '*_LMM_results.txt')) objs = [pd.read_csv(res, sep='\t') for res in results] obj = pd.concat(objs).set_index('Phenotype').sort_values('microbiome-association index', ascending=False) obj['H2'] = obj['microbiome-association index'] obj['CI_low'] = obj['95% CI'].apply(lambda x: np.float(x.split(' - ')[0])) obj['CI_high'] = obj['95% CI'].apply(lambda x: np.float(x.split(' - ')[1])) obj['Q value'] = fdr_correction(obj['P value'])[1] obj[['H2', 'CI_low', 'CI_high', 'P value', 'Q value', 'Sample size', 'V(G)', 'V(e)', 'mean', 'age', 'gender']].to_csv(os.path.join(output_dir, 'LMM_results.csv'))
def fc_visual_subAreas(fcfile_pickle, subareas=['M1', 'STN', 'GP'], subtitle='M1DBS'): with open(fcfile_pickle, 'rb') as handle: fc = pickle.load(handle) imcohs = fc['imcohs'] pvals = fc['pvals'] chnAreas = fc['chnAreas'] idxs_remain = [] chnAreas_new = [] for ci, carea in enumerate(chnAreas): for sarea in subareas: if sarea.lower() in carea.lower(): idxs_remain.append(ci) chnAreas_new.append(carea) idxs_remain = np.array(idxs_remain) tmp = imcohs[idxs_remain, :] tmp = tmp[:, idxs_remain] imcohs = tmp tmp = pvals[idxs_remain, :] tmp = tmp[:, idxs_remain] pvals = tmp chnAreas = chnAreas_new # multiple comparison correction, get weights reject, pval_corr = fdr_correction(pvals, alpha=0.05, method='indep') [rows, cols] = np.where(reject == True) weight = np.zeros(imcohs.shape) if len(rows) > 0: weight[rows, cols] = imcohs[rows, cols] for co in ['normal', 'mild', 'moderate']: if co in fcfile_pickle: cond = co folder, filename = os.path.split(fcfile_pickle)[0], os.path.split( fcfile_pickle)[1] saveFCGraph = os.path.join( folder, 'visual_' + filename[:-len('.pickle')] + '_' + subtitle + '.png') texts = dict() texts[cond] = [-80, 40, 15] texts[animal] = [80, 20, 20] weight_visual_save(weight, chnInf=assign_coord2chnArea( area_coord_file=area_coord_file, chnAreas=chnAreas), savefile=saveFCGraph, texts=texts, threds_edge=None)
def fdr_correction_matrix(p_value_matrix, template=None): """ This function take a p value matrix as entry and return the corrected p_value for False Rate Discovery. If not all statistical tests have been performed (typically in DTI at a absent connection) a template matrix (which is a binary metrix with 1 if the test is performed and 0 else) with the same shape as p_value_matrix input of the actually performed test can be provide at input. """ import numpy as np from mne.stats import fdr_correction if type(template) == type(p_value_matrix): if p_value_matrix.shape != template.shape: raise IOError( 'p_value_matrix and template should have the same shape.') if type(template) == type(p_value_matrix): p_value_corrected = np.ones(p_value_matrix.shape) reject_test = np.zeros(p_value_matrix.shape, dtype=bool) eff_p_value = [] index_of_eff_p_value = [] for i in np.arange(0, p_value_matrix.shape[0]): for j in np.arange(0, i): if template[j, i] == 1: eff_p_value += [p_value_matrix[j, i]] index_of_eff_p_value += [(j, i)] reject, p_corrected = fdr_correction(eff_p_value) for i, corrected in enumerate(p_corrected): p_value_corrected[ index_of_eff_p_value[i][0], index_of_eff_p_value[i][1] ] = corrected reject_test[ index_of_eff_p_value[i][0], index_of_eff_p_value[i][1] ] = reject[i] elif not template: reject_test, p_value_corrected = fdr_correction(p_value_matrix) else: raise IOError('template input should be an numpy array or None.') return reject_test, p_value_corrected
def test_multi_pval_correction(): """Test pval correction for multi comparison (FDR and Bonferroni).""" rng = np.random.RandomState(0) X = rng.randn(10, 1000, 10) X[:, :50, 0] += 4.0 # 50 significant tests alpha = 0.05 T, pval = stats.ttest_1samp(X, 0) n_samples = X.shape[0] n_tests = X.size / n_samples thresh_uncorrected = stats.t.ppf(1.0 - alpha, n_samples - 1) reject_bonferroni, pval_bonferroni = bonferroni_correction(pval, alpha) thresh_bonferroni = stats.t.ppf(1.0 - alpha / n_tests, n_samples - 1) assert pval_bonferroni.ndim == 2 assert reject_bonferroni.ndim == 2 assert_allclose(pval_bonferroni / 10000, pval) reject_expected = pval_bonferroni < alpha assert_array_equal(reject_bonferroni, reject_expected) fwer = np.mean(reject_bonferroni) assert_almost_equal(fwer, alpha, 1) reject_fdr, pval_fdr = fdr_correction(pval, alpha=alpha, method='indep') assert pval_fdr.ndim == 2 assert reject_fdr.ndim == 2 thresh_fdr = np.min(np.abs(T)[reject_fdr]) assert 0 <= (reject_fdr.sum() - 50) <= 50 * 1.05 assert thresh_uncorrected <= thresh_fdr <= thresh_bonferroni pytest.raises(ValueError, fdr_correction, pval, alpha, method='blah') assert np.all(fdr_correction(pval, alpha=0)[0] == 0) reject_fdr, pval_fdr = fdr_correction(pval, alpha=alpha, method='negcorr') thresh_fdr = np.min(np.abs(T)[reject_fdr]) assert 0 <= (reject_fdr.sum() - 50) <= 50 * 1.05 assert thresh_uncorrected <= thresh_fdr <= thresh_bonferroni
def subArea_dailyfc_visual(files): for onefile in files: lfpdata, chnAreas, fs = lfp_extract([onefile]) if lfpdata.shape[2] < 80: continue print(onefile) ciCOHs = calc_ciCOHs_rest(lfpdata) # permutation test: use the lfp data whose ciCOHs are the largest to get distribution [i, j] = np.unravel_index(np.argmax(ciCOHs), shape = ciCOHs.shape) lfp1, lfp2 = lfpdata[i, :, :], lfpdata[j, :, :] _, mu, std = pval_permciCOH_rest(lfp1, lfp2, ciCOHs[i, j], shuffleN = 1000) cond = re.search('_[a-z]*_[0-9]{8}', files[0]).group()[1:-9] datestr = re.search('[0-9]{8}', os.path.basename(onefile)).group() ### left thalamus and SMA/M1 ### save_prefix = 'leftThaCor_' areas_used = ['lVA', 'lVLo/VPLo', 'lSMA', 'rSMA','M1'] # subareas selection ciCOH_new, chnAreas_new = ciCOH_select(ciCOHs, chnAreas, areas_used) # multiple comparison correction, get weight matrix pvals = norm.sf(abs(ciCOH_new), loc = mu, scale = std) * 2 reject, pval_corr = fdr_correction(pvals, alpha = 0.05, method='indep') [rows, cols]= np.where(reject == True) weight = np.zeros(ciCOH_new.shape) if len(rows) > 0: weight[rows, cols] = ciCOH_new[rows, cols] # visual and save saveFCGraph = os.path.join(savefolder, cond + '_' + save_prefix + '_' + datestr + '.png') texts = dict() texts[datestr] = [80, 50, 15] weight_visual_save(weight, chnInf = assign_coord2chnArea(area_coord_file, chnAreas_new), savefile = saveFCGraph, texts = None, threds_edge = None) del ciCOH_new, chnAreas_new, save_prefix, areas_used del saveFCGraph, weight
def dailyfc_visual(files): for onefile in files: lfpdata, chnAreas, fs = lfp_extract([onefile]) if lfpdata.shape[2] < 80: continue print(onefile) ciCOHs = calc_ciCOHs_rest(lfpdata) # permutation test: use the lfp data whose ciCOHs are the largest to get distribution [i, j] = np.unravel_index(np.argmax(ciCOHs), shape=ciCOHs.shape) lfp1, lfp2 = lfpdata[i, :, :], lfpdata[j, :, :] _, mu, std = pval_permciCOH_rest(lfp1, lfp2, ciCOHs[i, j], shuffleN=1000) pvals = norm.sf(abs(ciCOHs), loc=mu, scale=std) * 2 # multiple comparison correction, get weights reject, pval_corr = fdr_correction(pvals, alpha=0.05, method='indep') [rows, cols] = np.where(reject == True) weight = np.zeros(ciCOHs.shape) if len(rows) > 0: weight[rows, cols] = ciCOHs[rows, cols] # visual and save filename = os.path.basename(onefile) datestr = re.search('[0-9]{8}', filename).group() cond = re.search('_[a-z]*_[0-9]{8}', filename).group()[1:-9] freqstr = 'freq' + re.search('_filtered[0-9]*_[0-9]*', filename).group()[len('_filtered'):] save_prefix = 'all' saveFCGraph = os.path.join( savefolder, freqstr + '_' + cond + '_' + save_prefix + '_' + datestr + '.png') texts = dict() texts[cond + ',' + datestr] = [-80, 50, 15] weight_visual_save(weight, chnInf=assign_coord2chnArea( area_coord_file=area_coord_file, chnAreas=chnAreas), savefile=saveFCGraph, texts=texts, threds_edge=None) del texts, datestr, cond, weight
def mass_univ_corrected(X): from scipy.stats import mannwhitneyu from mne.stats import fdr_correction X0 = np.zeros(X.shape) X_flat = X.reshape(X.shape[0], -1) X0_flat = X0.reshape(X0.shape[0], -1) pvals = np.zeros(X_flat.shape[-1]) for i in range(len(pvals)): stat, p = mannwhitneyu( X_flat[:, i], X0_flat[:, i], alternative='greater') #alternative='two-sided') pvals[i] = p pvals = pvals.reshape(*X.shape[1:]) mask, _ = fdr_correction(pvals, alpha=0.05) pvalsm = np.ma.masked_where(~mask, pvals) return pvalsm
def fc_metrics(fcfile_pickle): """ cc: average Clustering Coefficient nbc: Node Betweenness centrality () """ with open(fcfile_pickle, 'rb') as handle: fc = pickle.load(handle) imcohs = fc['imcohs'] pvals = fc['pvals'] # multiple comparison correction, get weights reject, pval_corr = fdr_correction(pvals, alpha=0.05, method='indep') [rows, cols] = np.where(reject == True) weight = np.zeros(imcohs.shape) if len(rows) > 0: weight[rows, cols] = imcohs[rows, cols] weight = abs(weight) G = nx.Graph() G.add_nodes_from(np.arange(0, weight.shape[0])) for i in range(0, weight.shape[0] - 1): for j in range(i + 1, weight.shape[0]): if weight[i, j] > 0: G.add_edge(i, j, weight=weight[i, j]) cc = nx.average_clustering(G) nbcs = nx.degree_centrality(G) folder, filename = os.path.split(fcfile_pickle)[0], os.path.split( fcfile_pickle)[1] metricfile = os.path.join(folder, 'metric_' + filename) metrics = dict() metrics['cc'] = cc metrics['nbcs'] = nbcs metrics['chnAreas'] = fc['chnAreas'] with open(metricfile, 'wb') as f: pickle.dump(metrics, f) return metricfile
def comb_fc(filepatt): """ combine all fc figures belong to same """ files = glob.glob(os.path.join(savefolder, filepatt)) print(filepatt) if files == []: imgs = [] print('No files found for ' + filepatt) return imgs = np.empty((600, 600, 3)) for fi, file in enumerate(files): img = cv2.imread(file) if fi == 0: imgs = img else: imgs = np.concatenate((imgs, img), axis = 2) idx = filepatt.find('freq') comb_fcGraph = os.path.join(savefolder, 'comb_' + filepatt[idx: -len('.mat')]) cv2.imwrite(comb_fcGraph, imgs) print(comb_fcGraph) # find lowweight pvals_vec = [] ciCOH_vec = [] pvals = pvals_fc_overtime(ciCOH = ciCOH, ntrials = ntrials, ntemp = ntemp, f = f, t = t) reject, pval_corr = fdr_correction(pvals, alpha=0.05, method='indep') lowweight = min(ciCOH_vec[rejs]) return lowweight
def binomial(self, df_freqs_test, df_freqs_control, epsilon_p, apply_fdr): # expected probability for binomial distribution based on control samples num_controls = len(df_freqs_control.columns) p = df_freqs_control.count(axis=1) / num_controls p[p == 0] = epsilon_p p.index = df_freqs_control.index N = len(df_freqs_test.columns) b_test_df = pd.DataFrame(zip(df_freqs_test.count(axis=1), p)) b_test_df['pvals'] = b_test_df.apply(lambda x: stats.binom_test(x[0], N, x[1], alternative="greater"), axis=1) if apply_fdr: (dummy, b_test_df['FDR']) = fdr_correction(b_test_df['pvals'].replace(np.nan, 1), alpha=0.05, method='indep') b_test_df.index = df_freqs_test.index if apply_fdr: return b_test_df['pvals'], b_test_df['FDR'] else: return b_test_df['pvals']
def check_whiteness_and_consistency(X, E, alpha=0.05): """ Check the whiteness and consistency of the MVAR model. Test whiteness with Durbin-Watson and FDR correction for multiple comparisons. Paramters: ---------- X : np.array of shape (n_sources, n_times, n_epochs) The data array. E : np.array Serially uncorrelated residuals. Returns: -------- whi : bool Whiteness after FDR correction. cons: float Result of the consistency test. dw : np.array The Durbin-Watson statistics. pval : float The uncorrected p-values corresponding to the DW-statistics. """ whi = True dw, pval = dw_whiteness(X, E) from mne.stats import fdr_correction reject, pval_corrected = fdr_correction(pval, alpha=alpha) if reject.any(): # test if serial correlation is present in at least one residual whi = False cons = consistency(X, E) return whi, cons, dw, pval
def segfc_visual(onefile): # lfpdata: nchns * ntemp * nsegs lfpdata, chnAreas, fs = lfp_extract([onefile]) nchns, _, nsegs = lfpdata.shape seg_ciCOHs = np.zeros(shape=(nchns, nchns, nsegs)) for segi in range(nsegs): seg_ciCOHs[:, :, segi] = calc_ciCOHs_rest( np.expand_dims(lfpdata[:, :, segi], axis=2)) # permutation test: use the lfp data whose ciCOHs are the largest to get distribution [i, j] = np.unravel_index(np.argmax(ciCOHs), shape=ciCOHs.shape) lfp1, lfp2 = lfpdata[i, :, :], lfpdata[j, :, :] _, mu, std = pval_permciCOH_rest(lfp1, lfp2, ciCOHs[i, j], shuffleN=1000) pvals = norm.sf(abs(ciCOHs), loc=mu, scale=std) * 2 # multiple comparison correction, get weights reject, pval_corr = fdr_correction(pvals, alpha=0.05, method='indep') [rows, cols] = np.where(reject == True) weight = np.zeros(ciCOHs.shape) if len(rows) > 0: weight[rows, cols] = ciCOHs[rows, cols] # visual and save filename = os.path.basename(onefile) datestr = re.search('[0-9]{8}', filename).group() cond = re.search('_[a-z]*_[0-9]{8}', filename).group()[1:-9] save_prefix = 'all' saveFCGraph = os.path.join( savefolder, cond + '_' + save_prefix + '_' + datestr + '.png') weight_visual_save(weight, chnInf=assign_coord2chnArea( area_coord_file=area_coord_file, chnAreas=chnAreas), savefile=saveFCGraph, texts=None, threds_edge=None)
def parallel_stats(X, function=_my_wilcoxon, correction="FDR", n_jobs=-1): from mne.parallel import parallel_func if correction not in [False, None, "FDR"]: raise ValueError("Unknown correction") # reshape to 2D X = np.array(X) dims = X.shape X.resize([dims[0], np.prod(dims[1:])]) # prepare parallel n_cols = X.shape[1] parallel, pfunc, n_jobs = parallel_func(_loop, n_jobs) n_chunks = min(n_cols, n_jobs) chunks = np.array_split(range(n_cols), n_chunks) p_values = parallel(pfunc(X[:, chunk], function) for chunk in chunks) p_values = np.reshape(np.hstack(p_values), dims[1:]) X.resize(dims) # apply correction if correction == "FDR": dims = p_values.shape _, p_values = fdr_correction(p_values) p_values = np.reshape(p_values, dims) return p_values
stat, stat_extra = STAT_FUN(TRANSFORM_FUN(z_score), d, return_extra=True) h0_distribution = simulate_h0_distribution(n=len(unique_blocks), d=d, transform=TRANSFORM_FUN, stat_fun=STAT_FUN, verbose=False, sim_verbose=True) ps.append(P_VAL_FUN(stat, h0_distribution)) stats.append(stat) stats_extra.append(stat_extra) z_scores.append(z_score) ds.append(d) stats_all_th.append(stats) p_all_th.append(ps) stats_extra_all_th.append(stats_extra) z_scores_all_th.append(z_scores) stats_all_th = np.array(stats_all_th).T _, p_corrected = fdr_correction(np.array(p_all_th).T) stats_all_metrics[metric_type] = stats_all_th p_vals_all_metrics[metric_type] = p_corrected z_scores_all_metrics[metric_type] = np.array(z_scores_all_th) stats_extra_all_metrics[metric_type] = np.array(stats_extra_all_th) # print not-normal samples shapiro_p_vals = np.array(shapiro_p_vals).ravel() fdr_p_shapiro = fdr_correction(shapiro_p_vals) shapiro_names = np.array(shapiro_names).ravel() print('FDR shapiro', shapiro_names[fdr_p_shapiro[0]]) print('Bonferroni shapiro', shapiro_names[shapiro_p_vals < 0.05/len(shapiro_p_vals)]) # plt.figure() # [plt.scatter(x, k*np.ones_like(x), alpha=0.2, color='k') for k, x in enumerate(np.array(shapiro_samples).flatten()[fdr_p_shapiro[0]])] # plt.yticks(np.arange(len(shapiro_names[fdr_p_shapiro[0]])), shapiro_names[fdr_p_shapiro[0]]) # plt.tight_layout()
os.chdir(cwd) emp_p = np.zeros(emp_c.shape) #using PMF calculated across perms for a given contrast #calculated in CombinePMF.py pmf = slab.LoadPermResults(OutputPath,'PMF','msgpack',0)[1] for i in xrange(0,len(emp_c)): if (emp_c[i]>len(pmf)): emp_p[i] = pmf[-1]/np.round(np.sum(pmf)) else: emp_p[i] = np.sum(pmf[int(emp_c[i]):])/np.round(np.sum(pmf)) #FDR correct h, fdr_p = fdr_correction(emp_p,method='indep') slab.SavePermResults(OutputPath,'fdr','msgpack',h.tolist(),fdr_p.tolist(),emp_c.tolist(),emp_p.tolist()) #messy output stuff #select p-value bins, and concatenate rows of FWE #clusters and which are FDR clusters ps = [] pbins = [-0.1,0.00001,0.0001,0.001,0.01,0.05] for ip in xrange(0,len(pbins)-1): fwe = sum(np.logical_and(fwe_p>pbins[ip],fwe_p<=pbins[ip+1])) temp = sum(fwe_p<=0.05) ps.append([fwe,sum(h[np.logical_and(fwe_p>pbins[ip],fwe_p<=pbins[ip+1])])]) all_fdr.append(fdr_p) all_fwe.append(fwe_p)
def dailyfc_visual(files): ### fc extract ### for onefile in files: filename = os.path.basename(onefile) datestr = re.search('[0-9]{8}', filename).group() lfpdata, chnAreas, fs = lfp_extract([onefile]) if 'cond' not in locals(): cond = re.search('_[a-z]*_[0-9]{8}', filename).group()[1:-9] if 'lfpdatas' not in locals(): lfpdatas = lfpdata datestrs = datestr else: lfpdatas = np.concatenate((lfpdatas, lfpdata), axis = 2) datestrs = datestrs + '_' + datestr del lfpdata, datestr ### if enough lfpdatas if lfpdatas.shape[2] >= 500 / 5: lfp1, lfp2 = lfpdatas[:, 0:500, :], lfpdatas[:, 125:625, :] lfp3, lfp4 = lfpdatas[:, 250:750, :], lfpdatas[:, 375:875, :] lfp5= lfpdatas[:, 500:, :] lfpdatas = np.concatenate((lfp1, lfp2, lfp3, lfp4, lfp5), axis=2) idx_ntrials = np.random.randint(lfpdatas.shape[2], size = 500) lfpdatas = lfpdatas[:, :, idx_ntrials] nchns, ntemp, ntrials = lfpdatas.shape ### calc ciCOH for each cond ### ciCOH = calc_ciCOHs_rest(lfpdatas) ciCOH = abs(ciCOH) ### all ## save_prefix = 'all' # get weight matrix pvals = pvals_fc_overtime(ciCOH = ciCOH, ntrials = ntrials, ntemp = ntemp, f = (freq[0] + freq[1])/2, t = ntemp/fs) reject, pval_corr = fdr_correction(pvals, alpha = 0.1, method='indep') [rows, cols]= np.where(reject) weight = np.zeros(ciCOH.shape) if len(rows) > 0: weight[rows, cols] = ciCOH[rows, cols] # visual and save saveFCname = cond + '_' + save_prefix + '_' + datestrs + '.png' saveFCGraph = os.path.join(savefolder, saveFCname) weight_visual_save(weight, chnInf = assign_coord2chnArea(area_coord_file, chnAreas), savefile = saveFCGraph, texts = None, threds_edge = None) # network metric avg_CC = graph_metrics(weight) d = {saveFCname: avg_CC} with open(os.path.join(savefolder, 'avgCC.csv'), 'a+') as f: for key in d.keys(): f.write("%s,%s\n"%(key,d[key])) del avg_CC, d del pvals, reject, pval_corr, rows, cols del saveFCGraph, weight, save_prefix, saveFCname ### left thalamus and SMA/M1 ### save_prefix = 'leftThaCor_' areas_used = ['lVA', 'lVLo/VPLo', 'lSMA', 'rSMA','M1'] # subareas selection ciCOH_new, chnAreas_new = ciCOH_select(ciCOH, chnAreas, areas_used) # get weight matrix pvals = pvals_fc_overtime(ciCOH = ciCOH_new, ntrials = ntrials, ntemp = ntemp, f = (freq[0] + freq[1])/2, t = ntemp/fs) reject, pval_corr = fdr_correction(pvals, alpha = 0.1, method='indep') [rows, cols]= np.where(reject) weight = np.zeros(ciCOH.shape) if len(rows) > 0: weight[rows, cols] = ciCOH[rows, cols] # visual and save saveFCGraph = os.path.join(savefolder, cond + '_' + save_prefix + '_' + datestrs + '.png') weight_visual_save(weight, chnInf = assign_coord2chnArea(area_coord_file, chnAreas_new), savefile = saveFCGraph, texts = None, threds_edge = None) del ciCOH_new, chnAreas_new, save_prefix, areas_used del saveFCGraph, weight ### right thalamus and SMA/M1 ### save_prefix = 'rightThaCor' areas_used = ['rVA', 'rVLo/VPLo', 'lSMA', 'rSMA','M1'] # subareas selection ciCOH_new, chnAreas_new = ciCOH_select(ciCOH, chnAreas, areas_used) # get weight matrix pvals = pvals_fc_overtime(ciCOH = ciCOH_new, ntrials = ntrials, ntemp = ntemp, f = (freq[0] + freq[1])/2, t = ntemp/fs) reject, pval_corr = fdr_correction(pvals, alpha = 0.1, method='indep') [rows, cols]= np.where(reject) weight = np.zeros(ciCOH.shape) if len(rows) > 0: weight[rows, cols] = ciCOH[rows, cols] # visual and save saveFCGraph = os.path.join(savefolder, cond + '_' + save_prefix + '_' + datestrs + '.png') weight_visual_save(weight, chnInf = assign_coord2chnArea(area_coord_file, chnAreas_new), savefile = saveFCGraph, texts = None, threds_edge = None) del ciCOH_new, chnAreas_new, save_prefix, areas_used del saveFCGraph, weight ### right thalamus and GP ### save_prefix = 'gpRightTha' areas_used = ['rVA', 'rVLo/VPLo', 'GP'] # subareas selection ciCOH_new, chnAreas_new = ciCOH_select(ciCOH, chnAreas, areas_used) # get weight matrix pvals = pvals_fc_overtime(ciCOH = ciCOH_new, ntrials = ntrials, ntemp = ntemp, f = (freq[0] + freq[1])/2, t = ntemp/fs) reject, pval_corr = fdr_correction(pvals, alpha = 0.1, method='indep') [rows, cols]= np.where(reject) weight = np.zeros(ciCOH.shape) if len(rows) > 0: weight[rows, cols] = ciCOH[rows, cols] # visual and save saveFCGraph = os.path.join(savefolder, cond + '_' + save_prefix + '_' + datestrs + '.png') weight_visual_save(weight, chnInf = assign_coord2chnArea(area_coord_file, chnAreas_new), savefile = saveFCGraph, texts = None, threds_edge = None) del ciCOH_new, chnAreas_new, save_prefix, areas_used del saveFCGraph, weight ### left thalamus and GP ### save_prefix = 'gpLeftTha' areas_used = ['lVA', 'lVLo/VPLo', 'GP'] # subareas selection ciCOH_new, chnAreas_new = ciCOH_select(ciCOH, chnAreas, areas_used) # get weight matrix pvals = pvals_fc_overtime(ciCOH = ciCOH_new, ntrials = ntrials, ntemp = ntemp, f = (freq[0] + freq[1])/2, t = ntemp/fs) reject, pval_corr = fdr_correction(pvals, alpha = 0.1, method='indep') [rows, cols]= np.where(reject) weight = np.zeros(ciCOH.shape) if len(rows) > 0: weight[rows, cols] = ciCOH[rows, cols] # visual and save saveFCGraph = os.path.join(savefolder, cond + '_' + save_prefix + '_' + datestrs + '.png') weight_visual_save(weight, chnInf = assign_coord2chnArea(area_coord_file, chnAreas_new), savefile = saveFCGraph, texts = None, threds_edge = None) del ciCOH_new, chnAreas_new, save_prefix, areas_used del saveFCGraph, weight del lfpdatas, idx_ntrials, datestrs del ciCOH
def statistics(request): """ This function is called when the Statistics button is pressed by the user. It's purpose is to take the selected platforms as well as some statistical parameters and perform two statistical functions: a T-Test and an FDR analysis :param request: :return: a rendered HTML page. """ cutoff_type = request.GET.get('cutoff_type') cutoff_value = float(request.GET.get('cutoff_value')) display_values = request.session.get('display_values', {}) spps = request.GET.get('spps') spps = spps.split(',') combined_series = [] display_profile = None for spp in spps: _, study, display_profile, platform = spp.split('|') profile = display_profile.replace('_', '-') sample_ids = geo_data.get_sample_ids(study, profile, platform) control_sample_ids = [] diseased_sample_ids = [] for sample_id in sample_ids: sample_attributes = geo_data.get_sample_attributes(study, profile, platform, sample_id) if sample_attributes['control']: control_sample_ids.append(sample_id) else: diseased_sample_ids.append(sample_id) genes = geo_data.get_all_gene_symbols(study, profile, platform) no_of_genes = len(genes) control_exprs = zeros((no_of_genes, len(control_sample_ids))) diseased_exprs = zeros((no_of_genes, len(diseased_sample_ids))) for (g_index, gene) in enumerate(genes): gene_exprs = zeros(len(control_sample_ids)) for (s_index, sample_id) in enumerate(control_sample_ids): expr_value = geo_data.get_gene_expression_value(study, profile, platform, sample_id, gene) if expr_value == 'None': continue gene_exprs[s_index] = expr_value control_exprs[g_index] = gene_exprs gene_exprs = zeros(len(diseased_sample_ids)) for (s_index, sample_id) in enumerate(diseased_sample_ids): expr_value = geo_data.get_gene_expression_value(study, profile, platform, sample_id, gene) if expr_value == 'None': continue gene_exprs[s_index] = expr_value diseased_exprs[g_index] = gene_exprs control_df = DataFrame(control_exprs, index=genes, columns=control_sample_ids) diseased_df = DataFrame(diseased_exprs, index=genes, columns=diseased_sample_ids) # Perform the the t-test and create a pandas Series t_statistics, p_values = ttest_ind(control_df.T, diseased_df.T) p_values_series = Series(p_values, index=genes) # Perform the fdr analysis, create a pandas Series and sort the series reject_fdr, pval_fdr = fdr_correction(p_values_series, method='indep') fdr_values_series = Series(pval_fdr, index=genes) p_values_series.sort(ascending=True) combined_series = [] for i in range(len(p_values_series)): symbol = p_values_series.index[i] p_value = p_values_series[i] if cutoff_type == 'p-value' and p_value > cutoff_value: break fdr_value = fdr_values_series.get(symbol) if cutoff_type == 'fdr-value' and fdr_value > cutoff_value: break combined_series.append([symbol, p_value, fdr_value]) display_values[display_profile] = combined_series request.session['display_values'] = display_values response = render_to_string('statistics.html', {display_profile: combined_series}) return HttpResponse(response)
reject=reject) X = epochs.get_data() # as 3D matrix X = X[:, 0, :] # take only one channel to get a 2D array ############################################################################### # Compute statistic T, pval = stats.ttest_1samp(X, 0) alpha = 0.05 n_samples, n_tests = X.shape threshold_uncorrected = stats.t.ppf(1.0 - alpha, n_samples - 1) reject_bonferroni, pval_bonferroni = bonferroni_correction(pval, alpha=alpha) threshold_bonferroni = stats.t.ppf(1.0 - alpha / n_tests, n_samples - 1) reject_fdr, pval_fdr = fdr_correction(pval, alpha=alpha, method='indep') threshold_fdr = np.min(np.abs(T)[reject_fdr]) ############################################################################### # Plot times = 1e3 * epochs.times import matplotlib.pyplot as plt plt.close('all') plt.plot(times, T, 'k', label='T-stat') xmin, xmax = plt.xlim() plt.hlines(threshold_uncorrected, xmin, xmax, linestyle='--', colors='k',
"rb")) elif condition is "tone": tmp = pickle.load(open( "MI_tone_zscore_DKT_-05-0_resample_crop_deg.p", "rb")) filter_keys = ['pval', 'area', 'obsDiff'] filtered_dict = [] for d in tmp: filtered_dict += [{key: d[key] for key in filter_keys if key in d}] result = pd.DataFrame(columns=filter_keys) result = result.append(filtered_dict, ignore_index=True) result["condition"] = condition result["band"] = band result["rejected"], result["pval_corr"] = fdr_correction(result["pval"]) exec("result_%s_%s=%s" % (condition, band, "result")) for band in bands: for condition in conditions: exec("%s=result_%s_%s" % ("result", condition, band)) print "\nCondition: %s" % (condition) print "\nBand: %s" % band print result[(result["obsDiff"] != 0) & (result["rejected"] == True)] bands=["theta", "alpha", "beta", "gamma_low", "gamma_high"] # bands = ["beta"] conditions = ["degrees"]
T_obs_plot = np.ma.masked_array(T_obs, np.invert(clusters[np.squeeze(good_clusers)])) plt.figure() for f_image, cmap in zip([T_obs, T_obs_plot], [plt.cm.gray, 'RdBu_r']): plt.imshow(f_image, cmap=cmap, extent=[times[0], times[-1], frequencies[0], frequencies[-1]], aspect='auto', origin='lower') plt.xlabel('time (ms)') plt.ylabel('Frequency (Hz)') plt.title('Time-locked response for \'modality by location\' (%s)\n' ' cluster-level corrected (p <= 0.05)' % ch_name) plt.show() # now using FDR mask, _ = fdr_correction(pvals[2]) T_obs_plot2 = np.ma.masked_array(T_obs, np.invert(mask)) plt.figure() for f_image, cmap in zip([T_obs, T_obs_plot2], [plt.cm.gray, 'RdBu_r']): plt.imshow(f_image, cmap=cmap, extent=[times[0], times[-1], frequencies[0], frequencies[-1]], aspect='auto', origin='lower') plt.xlabel('time (ms)') plt.ylabel('Frequency (Hz)') plt.title('Time-locked response for \'modality by location\' (%s)\n' ' FDR corrected (p <= 0.05)' % ch_name) plt.show() # Both, cluster level and FDR correction help getting rid of
from mne.stats import (fdr_correction) # change data into dataframe data = {'subject': sub_all, 'time': timePoints_all, 'scores': scores_all} df = pd.DataFrame(data=data) # one-sample t-test with chance level (0.125) t_all = np.zeros(n_times_resample) p_all = np.zeros(n_times_resample) sig_all = np.zeros(n_times_resample) for i in range(n_times_resample): current_time = time_points_resample[i] current_scores = df[df['time'].isin([current_time])] t, p_twoTail = stats.ttest_1samp(current_scores['scores'], 0.125) p_FDR = fdr_correction(p_twoTail)[1] if p_FDR <= .05: sig = 1 else: sig = 0 t_all[i] = t p_all[i] = p_FDR sig_all[i] = sig # record significant time points for plot x_sig = timePoints_all[np.nonzero(sig_all)] y_sig = np.repeat(0.45, len(x_sig)) # s_sig = t_all[np.nonzero(sig_all)]
def fc_metrics_subareas(fcfile_pickle, subareas=['M1', 'STN', 'GP'], subtitle='M1DBS'): """ cc: average Clustering Coefficient nbc: Node Betweenness centrality () """ with open(fcfile_pickle, 'rb') as handle: fc = pickle.load(handle) imcohs = fc['imcohs'] pvals = fc['pvals'] chnAreas = fc['chnAreas'] idxs_remain = [] chnAreas_new = [] for ci, carea in enumerate(chnAreas): for sarea in subareas: if sarea.lower() in carea.lower(): idxs_remain.append(ci) chnAreas_new.append(carea) idxs_remain = np.array(idxs_remain) tmp = imcohs[idxs_remain, :] tmp = tmp[:, idxs_remain] imcohs = tmp tmp = pvals[idxs_remain, :] tmp = tmp[:, idxs_remain] pvals = tmp chnAreas = chnAreas_new # multiple comparison correction, get weights reject, pval_corr = fdr_correction(pvals, alpha=0.05, method='indep') [rows, cols] = np.where(reject == True) weight = np.zeros(imcohs.shape) if len(rows) > 0: weight[rows, cols] = imcohs[rows, cols] weight = abs(weight) G = nx.Graph() G.add_nodes_from(np.arange(0, weight.shape[0])) for i in range(0, weight.shape[0] - 1): for j in range(i + 1, weight.shape[0]): if weight[i, j] > 0: G.add_edge(i, j, weight=weight[i, j]) cc = nx.average_clustering(G) nbcs = nx.degree_centrality(G) folder, filename = os.path.split(fcfile_pickle)[0], os.path.split( fcfile_pickle)[1] metricfile = os.path.join(folder, 'metric_' + subtitle + '_' + filename) metrics = dict() metrics['cc'] = cc metrics['nbcs'] = nbcs metrics['chnAreas'] = fc['chnAreas'] with open(metricfile, 'wb') as f: pickle.dump(metrics, f) return metricfile
colors = {str(val): val for val in df[name].unique()} epochs.metadata = df.assign(Intercept=1) # Add an intercept for later evokeds = {val: epochs[name + " == " + val].average() for val in colors} plot_compare_evokeds(evokeds, colors=colors, split_legend=True, cmap=(name + " Percentile", "viridis")) ############################################################################## # We observe that there appears to be a monotonic dependence of EEG on # concreteness. We can also conduct a continuous analysis: single-trial level # regression with concreteness as a continuous (although here, binned) # feature. We can plot the resulting regression coefficient just like an # Event-related Potential. names = ["Intercept", name] res = linear_regression(epochs, epochs.metadata[names], names=names) for cond in names: res[cond].beta.plot_joint(title=cond, ts_args=dict(time_unit='s'), topomap_args=dict(time_unit='s')) ############################################################################## # Because the `linear_regression` function also estimates p values, we can -- # after applying FDR correction for multiple comparisons -- also visualise the # statistical significance of the regression of word concreteness. # The :func:`mne.viz.plot_evoked_image` function takes a `mask` parameter. # If we supply it with a boolean mask of the positions where we can reject # the null hypothesis, points that are not significant will be shown # transparently, and if desired, in a different colour palette and surrounded # by dark contour lines. reject_H0, fdr_pvals = fdr_correction(res["Concreteness"].p_val.data) evoked = res["Concreteness"].beta evoked.plot_image(mask=reject_H0, time_unit='s')
ts.append(ts[-1]) ps.append(bonferroni_correction(ps[0])[1]) mccs.append(True) plot_t_p(ts[-1], ps[-1], titles[-1], mccs[-1]) ############################################################################### # False discovery rate (FDR) correction # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # Typically FDR is performed with the Benjamini-Hochberg procedure, which # is less restrictive than Bonferroni correction for large numbers of # comparisons (fewer type II errors), but provides less strict control of type # I errors. titles.append('FDR') ts.append(ts[-1]) ps.append(fdr_correction(ps[0])[1]) mccs.append(True) plot_t_p(ts[-1], ps[-1], titles[-1], mccs[-1]) ############################################################################### # Non-parametric resampling test with a maximum statistic # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # **Non-parametric resampling tests** can also be used to correct for multiple # comparisons. In its simplest form, we again do permutations using # exchangeability under the null hypothesis, but this time we take the # *maximum statistic across all voxels* in each permutation to form the # null distribution. The p-value for each voxel from the veridical data # is then given by the proportion of null distribution values # that were smaller. # # This method has two important features:
vol_data = sio.loadmat("p2_vol_post_plv.mat")["vol_results"] invol_data = sio.loadmat("p2_invol_post_plv.mat")["inv_results"] res_inx = np.tril_indices(68, k=-1) vol_test = [] for j in range(len(vol_data)): tmp = vol_data[j, :, :, 0, 3] vol_test.append(tmp[res_inx]) invol_test = [] for j in range(len(invol_data)): tmp = invol_data[j, :, :, 0, 3] invol_test.append(tmp[res_inx]) vol_test = np.asarray(vol_test) invol_test = np.asarray(invol_test) t_stat, pval = stats.ttest_ind(vol_test, invol_test, axis=0) rejected, pval_fdr = fdr_correction(pval) foo = np.zeros([68, 68]) foo[res_inx] = pval_fdr ## Extract labels labels = [lbl[0][0].split()[0] + "_" + lbl[0][0].split()[1] for lbl in ff["RowNames"]]
topomap_args = dict( cmap='RdBu_r', # keep values scale scalings=dict(eeg=1), average=0.05) # plot t-values fig = t_vals[predictor].plot_joint(ts_args=ts_args, topomap_args=topomap_args, title='T-values for predictor ' + predictor, times=[.13, .23]) fig.axes[0].set_ylabel('T-value') ############################################################################### # correct p-values for multiple testing and create a mask for non-significant # time point dor each channel. reject_H0, fdr_pvals = fdr_correction(p_vals[predictor], alpha=0.01) # plot t-values, masking non-significant time points. fig = t_vals[predictor].plot_image( time_unit='s', mask=reject_H0, unit=False, # keep values scale scalings=dict(eeg=1)) fig.axes[1].set_title('T-value') ############################################################################### # plot surprise-values as "erp" # only show electrode `B8` pick = epochs.info['ch_names'].index('B8') fig, ax = plt.subplots(figsize=(7, 4)) plot_compare_evokeds(s_vals[predictor],
labels, X, snr=1, wsize = wsize, tstep = tstep) plt.figure() plt.imshow(np.real(F_array[1,:,:])) plt.colorbar() plt.show() df1, df2 = p-1, n_trials-p import scipy p_array = np.zeros(np.hstack([F_array.shape,2])) # compute the p-values, for the F test p_array[:,:,:,0] = 1.0-scipy.stats.f.cdf(np.real(F_array),df1,df2) p_array[:,:,:,1] = 1.0-scipy.stats.f.cdf(np.imag(F_array),df1,df2) # plus FDR comparison? from mne.stats import fdr_correction p_array_ravaled = np.ravel(p_array, order = 'C') reject_array, p_val_corrected = fdr_correction(p_array_ravaled) reject_array = np.reshape(reject_array, p_array.shape, order = 'C') i = 40 plt.figure() plt.subplot(2,3,1) plt.imshow((reject_array[i,:,:,0])) plt.title("real fdr results") plt.subplot(2,3,2) plt.imshow(np.real(F[i,:,:])) plt.colorbar() plt.title("real F") plt.subplot(2,3,3) plt.imshow(np.real(coef[i,:,:,0]))
baseline=(None, 0), reject=reject) X = epochs.get_data() # as 3D matrix X = X[:, 0, :] # take only one channel to get a 2D array ############################################################################### # Compute statistic T, pval = stats.ttest_1samp(X, 0) alpha = 0.05 n_samples, n_tests = X.shape threshold_uncorrected = stats.t.ppf(1.0 - alpha, n_samples - 1) reject_bonferroni, pval_bonferroni = bonferroni_correction(pval, alpha=alpha) threshold_bonferroni = stats.t.ppf(1.0 - alpha / n_tests, n_samples - 1) reject_fdr, pval_fdr = fdr_correction(pval, alpha=alpha, method='indep') threshold_fdr = np.min(np.abs(T)[reject_fdr]) ############################################################################### # Plot times = 1e3 * epochs.times import matplotlib.pyplot as plt plt.close('all') plt.plot(times, T, 'k', label='T-stat') xmin, xmax = plt.xlim() plt.hlines(threshold_uncorrected, xmin, xmax, linestyle='--', colors='k', label='p=0.05 (uncorrected)', linewidth=2) plt.hlines(threshold_bonferroni, xmin, xmax, linestyle='--', colors='r', label='p=0.05 (Bonferroni)', linewidth=2) plt.hlines(threshold_fdr, xmin, xmax, linestyle='--', colors='b',
# The ANOVA returns a tuple f-values and p-values, we will pick the former. pthresh = 0.00001 # set threshold rather high to save some time f_thresh = f_threshold_mway_rm(n_replications, factor_levels, effects, pthresh) tail = 1 # f-test, so tail > 0 n_permutations = 256 # Save some time (the test won't be too sensitive ...) T_obs, clusters, cluster_p_values, h0 = mne.stats.permutation_cluster_test( epochs_power, stat_fun=stat_fun, threshold=f_thresh, tail=tail, n_jobs=1, n_permutations=n_permutations, buffer_size=None) mask, _ = fdr_correction(pvals[2]) T_obs_plot2 = np.ma.masked_array(T_obs, np.invert(mask)) plt.figure() for f_image, cmap in zip([T_obs, T_obs_plot2], [plt.cm.gray, 'RdBu_r']): plt.imshow(f_image, cmap=cmap, extent=[times[0], times[-1], freqs[0], freqs[-1]], aspect='auto', origin='lower') plt.xlabel('Time (ms)') plt.ylabel('Frequency (Hz)') plt.title("Time-locked response for 'modality by location' (%s)\n" " FDR corrected (p <= 0.05)" % ch_name) plt.show()