Example #1
0
def test_multi_pval_correction():
    """Test pval correction for multi comparison (FDR and Bonferroni)
    """
    rng = np.random.RandomState(0)
    X = rng.randn(10, 1000, 10)
    X[:, :50, 0] += 4.0  # 50 significant tests
    alpha = 0.05

    T, pval = stats.ttest_1samp(X, 0)

    n_samples = X.shape[0]
    n_tests = X.size / n_samples
    thresh_uncorrected = stats.t.ppf(1.0 - alpha, n_samples - 1)

    reject_bonferroni, pval_bonferroni = bonferroni_correction(pval, alpha)
    thresh_bonferroni = stats.t.ppf(1.0 - alpha / n_tests, n_samples - 1)
    assert_true(pval_bonferroni.ndim == 2)
    assert_true(reject_bonferroni.ndim == 2)

    fwer = np.mean(reject_bonferroni)
    assert_almost_equal(fwer, alpha, 1)

    reject_fdr, pval_fdr = fdr_correction(pval, alpha=alpha, method='indep')
    assert_true(pval_fdr.ndim == 2)
    assert_true(reject_fdr.ndim == 2)
    thresh_fdr = np.min(np.abs(T)[reject_fdr])
    assert_true(0 <= (reject_fdr.sum() - 50) <= 50 * 1.05)
    assert_true(thresh_uncorrected <= thresh_fdr <= thresh_bonferroni)

    reject_fdr, pval_fdr = fdr_correction(pval, alpha=alpha, method='negcorr')
    thresh_fdr = np.min(np.abs(T)[reject_fdr])
    assert_true(0 <= (reject_fdr.sum() - 50) <= 50 * 1.05)
    assert_true(thresh_uncorrected <= thresh_fdr <= thresh_bonferroni)
Example #2
0
    def dabest_net_measures(self):
        """
		Computes Statistics on Graph Measures
		"""
        self.Net_df = pd.read_pickle(
            self.find(suffix='Graph-Measures-' + self.net_version,
                      filetype='.pkl'))
        # Result Dictionary
        dabest_list = []
        print('Started Graph Measure Stats.')
        for Freq in self.FrequencyBands.keys():
            with Pool(10) as p:
                freq_list = p.starmap(
                    self._parallel_net_dabest,
                    zip(self.GraphMeasures.keys(),
                        [Freq] * len(self.GraphMeasures.keys())))

            freq_df = pd.concat(freq_list)
            freq_df['Frequency'] = Freq
            dabest_list.append(freq_df)

            # Correct Bootstrapped p-values
            _, t_bon_corrected = bonferroni_correction(
                freq_df['pvalue_students_t'], alpha=0.05)
            _, t_fdr_corrected = fdr_correction(freq_df['pvalue_students_t'],
                                                alpha=0.05,
                                                method='indep')
            freq_df['t_bon_corrected'] = t_bon_corrected
            freq_df['t_fdr_corrected'] = t_fdr_corrected

            _, welch_bon_corrected = bonferroni_correction(
                freq_df['pvalue_welch'], alpha=0.05)
            _, welch_fdr_corrected = fdr_correction(freq_df['pvalue_welch'],
                                                    alpha=0.05,
                                                    method='indep')
            freq_df['welch_bon_corrected'] = welch_bon_corrected
            freq_df['welch_fdr_corrected'] = welch_fdr_corrected

            _, mann_whit_bon_corrected = bonferroni_correction(
                freq_df['pvalue_mann_whitney'], alpha=0.05)
            _, mann_whit_fdr_corrected = fdr_correction(
                freq_df['pvalue_mann_whitney'], alpha=0.05, method='indep')
            freq_df['mann_whit_bon_corrected'] = mann_whit_bon_corrected
            freq_df['mann_whit_fdr_corrected'] = mann_whit_fdr_corrected

        # Dabest Dataframe
        dabest_df = pd.concat(dabest_list)
        # save DataFrame to File
        FileName = self.createFileName(suffix='Graph-Measures-DABEST-' +
                                       self.net_version,
                                       filetype='.pkl')
        FilePath = self.createFilePath(self.NetMeasuresDir, self.net_version,
                                       FileName)
        dabest_df.to_pickle(FilePath)
        print('Graph Measure Statistics done.')
        pass
Example #3
0
def plot_alpha_deciles_vs_pheontypes(ax_outer,pheno_df=None):
    if pheno_df is None:
        pheno_df= pd.read_csv(os.path.join(basepath, 'Phenotype-Alpha-Shannon__il__il_validation.csv'),index_col=0)
    pheno_df = pheno_df.sort_values('alpha')
    pheno_df['alpha-decile']=pd.qcut(pheno_df['alpha'],10,labels=[str(x) for x in range(1,11)])
    pheno_df=pheno_df[['alpha-decile', 'age', 'bmi', 'hba1c', 'bt__fasting_glucose',
              'bt__fasting_triglycerides', 'bt__hdl_cholesterol', 'alpha']]
    pheno_df['bt__fasting_triglycerides']=pheno_df['bt__fasting_triglycerides'].apply(lambda x: 10**x)
    ax_all = gridspec.GridSpecFromSubplotSpec(pheno_df.shape[1]-1, 1, ax_outer,hspace=0.55)
    ax_a = plt.subplot(ax_all[0, 0])
    plt.text(-.35, 1.1, 'a', ha='center', va='center', transform=ax_a.transAxes, fontsize=16)

    phenotype='alpha'
    ax_alpha = plt.subplot(ax_all[pheno_df.shape[1]-2, 0])
    ax_alpha = sns.boxplot(x=pheno_df['alpha-decile'].values.astype(int), y=pheno_df[phenotype].values,
                           color='white',fliersize=0,whis=[5, 95],width=0.5)
    ax_alpha.set_xlabel('Alpha diversity decile',labelpad=2)
    ax_alpha.set_ylabel('Alpha\ndiversity',labelpad=2)
    ax_alpha.set_ylim([1,7])
    ax_alpha.set_yticks([1,4, 7])
    ax_alpha.set_yticklabels([1, 4, 7])
    ax_alpha.spines['right'].set_visible(False)
    ax_alpha.spines['top'].set_visible(False)
    ax_alpha.set_title('')
    ax_alpha.tick_params(top=False, right=False, pad=2)
    pvals=[]
    stats = {}
    for i,phenotype in enumerate(pheno_df.columns):
        if phenotype == 'alpha-decile' or phenotype=='alpha':
            continue
        ax_p = plt.subplot(ax_all[i-1, 0])
        decile_df=pheno_df[['alpha-decile', phenotype]].pivot_table(values=phenotype,
               index=pheno_df[['alpha-decile', phenotype]].index,
               columns='alpha-decile', aggfunc='first')
        # print(decile_df[['1','10']].describe())
        all_stats = {}
        for j in range(10):
            all_stats[j] = [0]*10
            for k in range(j):
                res_rank = ranksums(decile_df[str(k+1)].dropna(), decile_df[str(j+1)].dropna())
                all_stats[j][k] = res_rank[1]
        pd.DataFrame(all_stats).to_csv(os.path.join(FIGURES_DIR,"fig2_stats_%s.csv"%phenotype))
        res_rank = ranksums(decile_df['1'].dropna(),decile_df['10'].dropna())
        res_ks = ks_2samp(decile_df['1'].dropna(),decile_df['10'].dropna())
        stats[phenotype] = [res_rank[1],res_ks[1]]
        ax_p = sns.boxplot(x=pheno_df['alpha-decile'], y=pheno_df[phenotype],
                           color='white',fliersize=0,whis=[5, 95],width=0.6)
        ax_p.set_ylabel(rename[phenotype].replace(' ','\n'),labelpad=2)
        ax_p.set_yticks([limits[phenotype][0],(limits[phenotype][0]+limits[phenotype][1])/2,limits[phenotype][1]])
        ax_p.set_ylim(limits[phenotype])
        params_for_subplots(ax_p,plot_asterix=True)
        plt.subplots_adjust(left=0.3)
    stats_df=pd.DataFrame(stats,index=['RankSum_Pvalue','KS_Pvalue']).T
    stats_df['RankSum_Qvalue']=fdr_correction(stats_df['RankSum_Pvalue'].values)[1]
    stats_df['KS_Qvalue']=fdr_correction(stats_df['KS_Pvalue'].values)[1]
    stats_df.to_csv(os.path.join(FIGURES_DIR,"Figure2_stats.csv"))
Example #4
0
def fc_visual(fcfile_pickle):

    with open(fcfile_pickle, 'rb') as handle:
        fc = pickle.load(handle)

    imcohs = fc['imcohs']
    pvals = fc['pvals']
    chnAreas = fc['chnAreas']

    # multiple comparison correction, get weights
    reject, pval_corr = fdr_correction(pvals, alpha=0.05, method='indep')
    [rows, cols] = np.where(reject == True)
    weight = np.zeros(imcohs.shape)
    if len(rows) > 0:
        weight[rows, cols] = imcohs[rows, cols]

    for co in ['normal', 'mild', 'moderate']:
        if co in fcfile_pickle:
            cond = co

    save_prefix = 'all'
    folder, filename = os.path.split(fcfile_pickle)[0], os.path.split(
        fcfile_pickle)[1]
    saveFCGraph = os.path.join(
        folder,
        'visual_' + filename[:-len('.pickle')] + '_' + save_prefix + '.png')
    texts = dict()
    texts[cond] = [-80, 40, 15]
    texts[animal] = [80, 20, 20]
    weight_visual_save(weight,
                       chnInf=assign_coord2chnArea(
                           area_coord_file=area_coord_file, chnAreas=chnAreas),
                       savefile=saveFCGraph,
                       texts=texts,
                       threds_edge=None)
def parallel_stats(X, function=_my_wilcoxon, correction='FDR', n_jobs=2):

    # check if correction method was provided
    if correction not in [False, None, 'FDR']:
        raise ValueError('Unknown correction')

    # reshape to 2D
    X = np.array(X)
    dims = X.shape
    X.resize([dims[0], np.prod(dims[1:])])

    # prepare parallel
    n_cols = X.shape[1]
    parallel, pfunc, n_jobs = parallel_func(_loop, n_jobs)
    n_chunks = min(n_cols, n_jobs)
    chunks = np.array_split(range(n_cols), n_chunks)
    p_values = parallel(pfunc(X[:, chunk], function) for chunk in chunks)
    p_values = np.reshape(np.hstack(p_values), dims[1:])
    X.resize(dims)

    # apply correction
    if correction == 'FDR':
        dims = p_values.shape
        _, p_values = fdr_correction(p_values)
        p_values = np.reshape(p_values, dims)

    return p_values
Example #6
0
def truncate_dynfc(dynciCOH, pvals):
    """
        truncate fc to be 0 if not significant



        Arg:
            dynciCOH: dynamic ciCOHs [nchns * nchns * ntemp]

            pvals: p-value for each value in dynciCOH, shape = dynciCOH.shape


        Return:

            trunc_dynfc: truncated dynamic fc (value is 0 or 1)

    """

    # multiple comparison correction, get truncate dynfc
    reject, _ = fdr_correction(pvals, alpha=0.05, method='indep')
    [rows, cols, ts] = np.where(reject == True)
    trunc_dynfc = np.zeros(dynciCOH.shape)
    if len(rows) > 0:
        trunc_dynfc[rows, cols, ts] = abs(dynciCOH[rows, cols, ts])

    return trunc_dynfc
Example #7
0
def cohort_paired_t_tests(configs):
    all_pvals = []
    groupnames = []
    basepath = os.path.join(
        configs["base_path"],
        "for_{}_{}_stats_importance".format(configs["grouping"],
                                            configs["comparison_metric"]))

    for fpath in sorted(glob.glob(os.path.join(basepath, "*.csv"))):
        groupname = fpath.split("/")[-1][:-4]
        a = []
        b = []
        with open(fpath, 'r') as fp:
            csv_fp = csv.reader(fp, delimiter=',')
            next(csv_fp)
            for line in csv_fp:
                if line[1] == "" or line[2] == "":
                    continue
                a.append(float(line[1]))
                b.append(float(line[2]))

        stats, pval = sp_stats.ttest_rel(a, b)
        if np.isnan(pval):
            continue
        groupnames.append(groupname)
        all_pvals.append(pval)

    rej, corr = mne_stats.fdr_correction(all_pvals, method="indep")

    for idx, gname in enumerate(groupnames):
        print("Group; {}, Corrected pval: {}".format(gname, corr[idx]))
Example #8
0
    def perform_test(self, genotype_info, clip_reads, clip_coverage, rna_reads,
                     rna_coverage):
        asprin_test = defaultdict(lambda: defaultdict(list))
        asprin_pvalues = []
        asprin_odds_ratio = []
        for chrom in genotype_info:
            for pos in genotype_info[chrom]:
                if (genotype_info[chrom][pos][2] != "none" and \
                    clip_coverage[chrom][pos] >= self.minimum_coverage and \
                    rna_coverage[chrom][pos] >= self.minimum_coverage) :
                    asprin_test[chrom][pos] = \
                      stats.fisher_exact([\
                        [clip_reads[chrom][pos][genotype_info[chrom][pos][0]],\
                         clip_reads[chrom][pos][genotype_info[chrom][pos][1]]],\
                        [rna_reads[chrom][pos][genotype_info[chrom][pos][0]],\
                         rna_reads[chrom][pos][genotype_info[chrom][pos][1]]]],\
                        'two-sided')
                    asprin_pvalues.append(asprin_test[chrom][pos][1])
                    asprin_odds_ratio.append(asprin_test[chrom][pos][0])

        alpha = 0.1
        reject_fdr, asprin_qvalues = fdr_correction(asprin_pvalues, \
                                                  alpha=alpha, method='indep')

        return asprin_qvalues, asprin_odds_ratio
Example #9
0
def digitized_dynfc(dynciCOH, pvals):
    """
        digitized fc to be 1 or 0



        Arg:
            dynciCOH: dynamic ciCOHs [nchns * nchns * ntemp]

            pvals: p-value for each value in dynciCOH, shape = dynciCOH.shape


        Return:

            digi_dynfc: digitized dynamic fc (value is 0 or 1)

    """

    # multiple comparison correction, get digitized dynfc
    reject, _ = fdr_correction(pvals, alpha=0.05, method='indep')
    [rows, cols, ts] = np.where(reject == True)
    digi_dynfc = np.zeros(dynciCOH.shape)
    if len(rows) > 0:
        digi_dynfc[rows, cols, ts] = 1

    return digi_dynfc
Example #10
0
def unpair_fdr_t_test(cond1, cond2):
    """Unpaired t-test with FDR correction for MC"""
    from scipy.stats import ttest_ind
    from mne.stats import fdr_correction

    stat, p_vals = ttest_ind(cond1, cond2)
    _, p_vals_corr = fdr_correction(p_vals)
    return p_vals_corr
Example #11
0
def JoinAndParse():
    results = glob.glob(os.path.join(output_dir, '*_LMM_results.txt'))
    objs = [pd.read_csv(res, sep='\t') for res in results]
    obj = pd.concat(objs).set_index('Phenotype').sort_values('microbiome-association index', ascending=False)
    obj['H2'] = obj['microbiome-association index']
    obj['CI_low'] = obj['95% CI'].apply(lambda x: np.float(x.split(' - ')[0]))
    obj['CI_high'] = obj['95% CI'].apply(lambda x: np.float(x.split(' - ')[1]))
    obj['Q value'] = fdr_correction(obj['P value'])[1]
    obj[['H2', 'CI_low', 'CI_high', 'P value', 'Q value', 'Sample size', 'V(G)', 'V(e)', 'mean', 'age',
         'gender']].to_csv(os.path.join(output_dir, 'LMM_results.csv'))
Example #12
0
def fc_visual_subAreas(fcfile_pickle,
                       subareas=['M1', 'STN', 'GP'],
                       subtitle='M1DBS'):

    with open(fcfile_pickle, 'rb') as handle:
        fc = pickle.load(handle)

    imcohs = fc['imcohs']
    pvals = fc['pvals']
    chnAreas = fc['chnAreas']

    idxs_remain = []
    chnAreas_new = []
    for ci, carea in enumerate(chnAreas):
        for sarea in subareas:
            if sarea.lower() in carea.lower():
                idxs_remain.append(ci)
                chnAreas_new.append(carea)

    idxs_remain = np.array(idxs_remain)

    tmp = imcohs[idxs_remain, :]
    tmp = tmp[:, idxs_remain]
    imcohs = tmp
    tmp = pvals[idxs_remain, :]
    tmp = tmp[:, idxs_remain]
    pvals = tmp

    chnAreas = chnAreas_new

    # multiple comparison correction, get weights
    reject, pval_corr = fdr_correction(pvals, alpha=0.05, method='indep')
    [rows, cols] = np.where(reject == True)
    weight = np.zeros(imcohs.shape)
    if len(rows) > 0:
        weight[rows, cols] = imcohs[rows, cols]

    for co in ['normal', 'mild', 'moderate']:
        if co in fcfile_pickle:
            cond = co

    folder, filename = os.path.split(fcfile_pickle)[0], os.path.split(
        fcfile_pickle)[1]
    saveFCGraph = os.path.join(
        folder,
        'visual_' + filename[:-len('.pickle')] + '_' + subtitle + '.png')
    texts = dict()
    texts[cond] = [-80, 40, 15]
    texts[animal] = [80, 20, 20]
    weight_visual_save(weight,
                       chnInf=assign_coord2chnArea(
                           area_coord_file=area_coord_file, chnAreas=chnAreas),
                       savefile=saveFCGraph,
                       texts=texts,
                       threds_edge=None)
Example #13
0
def fdr_correction_matrix(p_value_matrix, template=None):
    """
    This function take a p value matrix as entry and return the corrected
    p_value for False Rate Discovery.
    If not all statistical tests have been performed (typically in DTI at
    a absent connection) a template matrix (which is a binary metrix with 1
    if the test is performed and 0 else) with the same shape as p_value_matrix
    input of the actually performed test can be provide at input.
    """
    
    import numpy as np
    from mne.stats import fdr_correction
    
    if type(template) == type(p_value_matrix):
        if p_value_matrix.shape != template.shape:
            raise IOError(
                'p_value_matrix and template should have the same shape.')
         
    if type(template) == type(p_value_matrix):
        p_value_corrected = np.ones(p_value_matrix.shape)
        reject_test = np.zeros(p_value_matrix.shape, dtype=bool)     
        eff_p_value = []
        index_of_eff_p_value = []    
        for i in np.arange(0, p_value_matrix.shape[0]):
            for j in np.arange(0, i):
                if template[j, i] == 1:
                    eff_p_value += [p_value_matrix[j, i]]
                    index_of_eff_p_value += [(j, i)]
        reject, p_corrected = fdr_correction(eff_p_value)     
        for i, corrected in enumerate(p_corrected):
            p_value_corrected[
                index_of_eff_p_value[i][0], index_of_eff_p_value[i][1]
            ] = corrected
            reject_test[
                index_of_eff_p_value[i][0], index_of_eff_p_value[i][1]
            ] = reject[i]
    elif not template:
        reject_test, p_value_corrected = fdr_correction(p_value_matrix)
    else:
        raise IOError('template input should be an numpy array or None.')
    return reject_test, p_value_corrected
def test_multi_pval_correction():
    """Test pval correction for multi comparison (FDR and Bonferroni)."""
    rng = np.random.RandomState(0)
    X = rng.randn(10, 1000, 10)
    X[:, :50, 0] += 4.0  # 50 significant tests
    alpha = 0.05

    T, pval = stats.ttest_1samp(X, 0)

    n_samples = X.shape[0]
    n_tests = X.size / n_samples
    thresh_uncorrected = stats.t.ppf(1.0 - alpha, n_samples - 1)

    reject_bonferroni, pval_bonferroni = bonferroni_correction(pval, alpha)
    thresh_bonferroni = stats.t.ppf(1.0 - alpha / n_tests, n_samples - 1)
    assert pval_bonferroni.ndim == 2
    assert reject_bonferroni.ndim == 2
    assert_allclose(pval_bonferroni / 10000, pval)
    reject_expected = pval_bonferroni < alpha
    assert_array_equal(reject_bonferroni, reject_expected)

    fwer = np.mean(reject_bonferroni)
    assert_almost_equal(fwer, alpha, 1)

    reject_fdr, pval_fdr = fdr_correction(pval, alpha=alpha, method='indep')
    assert pval_fdr.ndim == 2
    assert reject_fdr.ndim == 2
    thresh_fdr = np.min(np.abs(T)[reject_fdr])
    assert 0 <= (reject_fdr.sum() - 50) <= 50 * 1.05
    assert thresh_uncorrected <= thresh_fdr <= thresh_bonferroni
    pytest.raises(ValueError, fdr_correction, pval, alpha, method='blah')
    assert np.all(fdr_correction(pval, alpha=0)[0] == 0)

    reject_fdr, pval_fdr = fdr_correction(pval, alpha=alpha, method='negcorr')
    thresh_fdr = np.min(np.abs(T)[reject_fdr])
    assert 0 <= (reject_fdr.sum() - 50) <= 50 * 1.05
    assert thresh_uncorrected <= thresh_fdr <= thresh_bonferroni
Example #15
0
def subArea_dailyfc_visual(files):
    
    for onefile in files:
        lfpdata, chnAreas, fs = lfp_extract([onefile])

        if lfpdata.shape[2] < 80:
            continue


        print(onefile)
        ciCOHs = calc_ciCOHs_rest(lfpdata)




        # permutation test: use the lfp data whose ciCOHs are the largest to get  distribution
        [i, j] = np.unravel_index(np.argmax(ciCOHs), shape = ciCOHs.shape)
        lfp1, lfp2 = lfpdata[i, :, :], lfpdata[j, :, :]
        _, mu, std = pval_permciCOH_rest(lfp1, lfp2, ciCOHs[i, j], shuffleN = 1000)


        cond = re.search('_[a-z]*_[0-9]{8}', files[0]).group()[1:-9]
        datestr = re.search('[0-9]{8}', os.path.basename(onefile)).group()


        ### left thalamus and SMA/M1 ###
        save_prefix = 'leftThaCor_' 
        areas_used = ['lVA', 'lVLo/VPLo', 'lSMA', 'rSMA','M1']

        # subareas selection
        ciCOH_new, chnAreas_new = ciCOH_select(ciCOHs, chnAreas, areas_used)
        
        
        # multiple comparison correction, get weight matrix
        pvals = norm.sf(abs(ciCOH_new), loc = mu, scale = std) * 2
        reject, pval_corr = fdr_correction(pvals, alpha = 0.05, method='indep')
        [rows, cols]= np.where(reject == True)
        weight = np.zeros(ciCOH_new.shape)
        if len(rows) > 0:
            weight[rows, cols] = ciCOH_new[rows, cols]

        # visual and save
        saveFCGraph = os.path.join(savefolder, cond + '_' + save_prefix + '_' + datestr + '.png')
        texts = dict()
        texts[datestr] = [80, 50, 15]
        weight_visual_save(weight, chnInf = assign_coord2chnArea(area_coord_file, chnAreas_new), 
                            savefile = saveFCGraph, texts = None, threds_edge = None)
        del ciCOH_new, chnAreas_new, save_prefix, areas_used
        del saveFCGraph, weight
Example #16
0
def dailyfc_visual(files):

    for onefile in files:
        lfpdata, chnAreas, fs = lfp_extract([onefile])

        if lfpdata.shape[2] < 80:
            continue

        print(onefile)
        ciCOHs = calc_ciCOHs_rest(lfpdata)

        # permutation test: use the lfp data whose ciCOHs are the largest to get  distribution
        [i, j] = np.unravel_index(np.argmax(ciCOHs), shape=ciCOHs.shape)
        lfp1, lfp2 = lfpdata[i, :, :], lfpdata[j, :, :]
        _, mu, std = pval_permciCOH_rest(lfp1,
                                         lfp2,
                                         ciCOHs[i, j],
                                         shuffleN=1000)
        pvals = norm.sf(abs(ciCOHs), loc=mu, scale=std) * 2

        # multiple comparison correction, get weights
        reject, pval_corr = fdr_correction(pvals, alpha=0.05, method='indep')
        [rows, cols] = np.where(reject == True)
        weight = np.zeros(ciCOHs.shape)
        if len(rows) > 0:
            weight[rows, cols] = ciCOHs[rows, cols]

        # visual and save
        filename = os.path.basename(onefile)
        datestr = re.search('[0-9]{8}', filename).group()
        cond = re.search('_[a-z]*_[0-9]{8}', filename).group()[1:-9]
        freqstr = 'freq' + re.search('_filtered[0-9]*_[0-9]*',
                                     filename).group()[len('_filtered'):]

        save_prefix = 'all'
        saveFCGraph = os.path.join(
            savefolder,
            freqstr + '_' + cond + '_' + save_prefix + '_' + datestr + '.png')
        texts = dict()
        texts[cond + ',' + datestr] = [-80, 50, 15]
        weight_visual_save(weight,
                           chnInf=assign_coord2chnArea(
                               area_coord_file=area_coord_file,
                               chnAreas=chnAreas),
                           savefile=saveFCGraph,
                           texts=texts,
                           threds_edge=None)

        del texts, datestr, cond, weight
def mass_univ_corrected(X):
    from scipy.stats import mannwhitneyu
    from mne.stats import fdr_correction
    X0 = np.zeros(X.shape)
    X_flat = X.reshape(X.shape[0], -1)
    X0_flat = X0.reshape(X0.shape[0], -1)
    pvals = np.zeros(X_flat.shape[-1])
    for i in range(len(pvals)):
        stat, p = mannwhitneyu(
            X_flat[:, i], X0_flat[:, i],
            alternative='greater')  #alternative='two-sided')
        pvals[i] = p
    pvals = pvals.reshape(*X.shape[1:])
    mask, _ = fdr_correction(pvals, alpha=0.05)
    pvalsm = np.ma.masked_where(~mask, pvals)
    return pvalsm
Example #18
0
def fc_metrics(fcfile_pickle):
    """
        cc: average Clustering Coefficient

        nbc: Node Betweenness centrality ()
    """

    with open(fcfile_pickle, 'rb') as handle:
        fc = pickle.load(handle)

    imcohs = fc['imcohs']
    pvals = fc['pvals']

    # multiple comparison correction, get weights
    reject, pval_corr = fdr_correction(pvals, alpha=0.05, method='indep')
    [rows, cols] = np.where(reject == True)
    weight = np.zeros(imcohs.shape)
    if len(rows) > 0:
        weight[rows, cols] = imcohs[rows, cols]

    weight = abs(weight)

    G = nx.Graph()
    G.add_nodes_from(np.arange(0, weight.shape[0]))

    for i in range(0, weight.shape[0] - 1):
        for j in range(i + 1, weight.shape[0]):
            if weight[i, j] > 0:
                G.add_edge(i, j, weight=weight[i, j])

    cc = nx.average_clustering(G)
    nbcs = nx.degree_centrality(G)

    folder, filename = os.path.split(fcfile_pickle)[0], os.path.split(
        fcfile_pickle)[1]
    metricfile = os.path.join(folder, 'metric_' + filename)

    metrics = dict()
    metrics['cc'] = cc
    metrics['nbcs'] = nbcs
    metrics['chnAreas'] = fc['chnAreas']

    with open(metricfile, 'wb') as f:
        pickle.dump(metrics, f)

    return metricfile
Example #19
0
def comb_fc(filepatt):
    """
        combine all fc figures belong to same 
    """


    files = glob.glob(os.path.join(savefolder, filepatt))
    print(filepatt)
    
    if files == []:
        imgs = []
        print('No files found for ' + filepatt)
        return


    imgs = np.empty((600, 600, 3))
    for fi, file in enumerate(files):
        img = cv2.imread(file)
        
        if fi == 0:
            imgs = img
        else:
            imgs = np.concatenate((imgs, img), axis = 2)

    idx = filepatt.find('freq')
    comb_fcGraph = os.path.join(savefolder, 'comb_' + filepatt[idx: -len('.mat')])
    cv2.imwrite(comb_fcGraph, imgs)
    print(comb_fcGraph)

    # find lowweight
    pvals_vec = []
    ciCOH_vec = []
    pvals = pvals_fc_overtime(ciCOH = ciCOH, ntrials = ntrials, ntemp = ntemp, f = f, t = t)

    reject, pval_corr = fdr_correction(pvals, alpha=0.05, method='indep')
    

    
    lowweight = min(ciCOH_vec[rejs])

    return lowweight
    def binomial(self, df_freqs_test, df_freqs_control, epsilon_p, apply_fdr):

        # expected probability for binomial distribution based on control samples
        num_controls = len(df_freqs_control.columns)
        p = df_freqs_control.count(axis=1) / num_controls
        p[p == 0] = epsilon_p
        p.index = df_freqs_control.index

        N = len(df_freqs_test.columns)
        b_test_df = pd.DataFrame(zip(df_freqs_test.count(axis=1), p))

        b_test_df['pvals'] = b_test_df.apply(lambda x: stats.binom_test(x[0], N, x[1], alternative="greater"), axis=1)
        if apply_fdr:
            (dummy, b_test_df['FDR']) = fdr_correction(b_test_df['pvals'].replace(np.nan, 1), alpha=0.05,
                                                       method='indep')
        b_test_df.index = df_freqs_test.index

        if apply_fdr:
            return b_test_df['pvals'], b_test_df['FDR']
        else:
            return b_test_df['pvals']
Example #21
0
def check_whiteness_and_consistency(X, E, alpha=0.05):
    """
    Check the whiteness and consistency of the MVAR model.

    Test whiteness with Durbin-Watson and FDR correction
    for multiple comparisons.

    Paramters:
    ----------
    X : np.array of shape (n_sources, n_times, n_epochs)
        The data array.
    E : np.array
        Serially uncorrelated residuals.

    Returns:
    --------
    whi : bool
        Whiteness after FDR correction.
    cons: float
        Result of the consistency test.
    dw : np.array
        The Durbin-Watson statistics.
    pval : float
        The uncorrected p-values corresponding to the DW-statistics.
    """

    whi = True
    dw, pval = dw_whiteness(X, E)

    from mne.stats import fdr_correction
    reject, pval_corrected = fdr_correction(pval, alpha=alpha)
    if reject.any():
        # test if serial correlation is present in at least one residual
        whi = False

    cons = consistency(X, E)

    return whi, cons, dw, pval
Example #22
0
def segfc_visual(onefile):

    # lfpdata: nchns * ntemp * nsegs
    lfpdata, chnAreas, fs = lfp_extract([onefile])

    nchns, _, nsegs = lfpdata.shape
    seg_ciCOHs = np.zeros(shape=(nchns, nchns, nsegs))
    for segi in range(nsegs):
        seg_ciCOHs[:, :, segi] = calc_ciCOHs_rest(
            np.expand_dims(lfpdata[:, :, segi], axis=2))

    # permutation test: use the lfp data whose ciCOHs are the largest to get  distribution
    [i, j] = np.unravel_index(np.argmax(ciCOHs), shape=ciCOHs.shape)
    lfp1, lfp2 = lfpdata[i, :, :], lfpdata[j, :, :]
    _, mu, std = pval_permciCOH_rest(lfp1, lfp2, ciCOHs[i, j], shuffleN=1000)
    pvals = norm.sf(abs(ciCOHs), loc=mu, scale=std) * 2

    # multiple comparison correction, get weights
    reject, pval_corr = fdr_correction(pvals, alpha=0.05, method='indep')
    [rows, cols] = np.where(reject == True)
    weight = np.zeros(ciCOHs.shape)
    if len(rows) > 0:
        weight[rows, cols] = ciCOHs[rows, cols]

    # visual and save
    filename = os.path.basename(onefile)
    datestr = re.search('[0-9]{8}', filename).group()
    cond = re.search('_[a-z]*_[0-9]{8}', filename).group()[1:-9]

    save_prefix = 'all'
    saveFCGraph = os.path.join(
        savefolder, cond + '_' + save_prefix + '_' + datestr + '.png')
    weight_visual_save(weight,
                       chnInf=assign_coord2chnArea(
                           area_coord_file=area_coord_file, chnAreas=chnAreas),
                       savefile=saveFCGraph,
                       texts=None,
                       threds_edge=None)
Example #23
0
def parallel_stats(X, function=_my_wilcoxon, correction="FDR", n_jobs=-1):
    from mne.parallel import parallel_func

    if correction not in [False, None, "FDR"]:
        raise ValueError("Unknown correction")
    # reshape to 2D
    X = np.array(X)
    dims = X.shape
    X.resize([dims[0], np.prod(dims[1:])])
    # prepare parallel
    n_cols = X.shape[1]
    parallel, pfunc, n_jobs = parallel_func(_loop, n_jobs)
    n_chunks = min(n_cols, n_jobs)
    chunks = np.array_split(range(n_cols), n_chunks)
    p_values = parallel(pfunc(X[:, chunk], function) for chunk in chunks)
    p_values = np.reshape(np.hstack(p_values), dims[1:])
    X.resize(dims)
    # apply correction
    if correction == "FDR":
        dims = p_values.shape
        _, p_values = fdr_correction(p_values)
        p_values = np.reshape(p_values, dims)
    return p_values
Example #24
0
            stat, stat_extra = STAT_FUN(TRANSFORM_FUN(z_score), d, return_extra=True)
            h0_distribution = simulate_h0_distribution(n=len(unique_blocks), d=d, transform=TRANSFORM_FUN,
                                                       stat_fun=STAT_FUN, verbose=False, sim_verbose=True)
            ps.append(P_VAL_FUN(stat, h0_distribution))
            stats.append(stat)
            stats_extra.append(stat_extra)
            z_scores.append(z_score)
            ds.append(d)
        stats_all_th.append(stats)
        p_all_th.append(ps)
        stats_extra_all_th.append(stats_extra)
        z_scores_all_th.append(z_scores)


    stats_all_th = np.array(stats_all_th).T
    _, p_corrected = fdr_correction(np.array(p_all_th).T)
    stats_all_metrics[metric_type] = stats_all_th
    p_vals_all_metrics[metric_type] = p_corrected
    z_scores_all_metrics[metric_type] = np.array(z_scores_all_th)
    stats_extra_all_metrics[metric_type] = np.array(stats_extra_all_th)

# print not-normal samples
shapiro_p_vals = np.array(shapiro_p_vals).ravel()
fdr_p_shapiro = fdr_correction(shapiro_p_vals)
shapiro_names = np.array(shapiro_names).ravel()
print('FDR shapiro', shapiro_names[fdr_p_shapiro[0]])
print('Bonferroni shapiro', shapiro_names[shapiro_p_vals < 0.05/len(shapiro_p_vals)])
# plt.figure()
# [plt.scatter(x, k*np.ones_like(x), alpha=0.2, color='k') for k, x in enumerate(np.array(shapiro_samples).flatten()[fdr_p_shapiro[0]])]
# plt.yticks(np.arange(len(shapiro_names[fdr_p_shapiro[0]])), shapiro_names[fdr_p_shapiro[0]])
# plt.tight_layout()
Example #25
0
            os.chdir(cwd)

            emp_p = np.zeros(emp_c.shape)
            
            #using PMF calculated across perms for a given contrast
            #calculated in CombinePMF.py
            pmf = slab.LoadPermResults(OutputPath,'PMF','msgpack',0)[1]
            for i in xrange(0,len(emp_c)):
                if (emp_c[i]>len(pmf)):
                    emp_p[i] = pmf[-1]/np.round(np.sum(pmf))
                else:
                    emp_p[i] = np.sum(pmf[int(emp_c[i]):])/np.round(np.sum(pmf))

            #FDR correct
            h, fdr_p = fdr_correction(emp_p,method='indep')

            slab.SavePermResults(OutputPath,'fdr','msgpack',h.tolist(),fdr_p.tolist(),emp_c.tolist(),emp_p.tolist())
            
            #messy output stuff
            #select p-value bins, and concatenate rows of FWE
            #clusters and which are FDR clusters
            ps = []
            pbins = [-0.1,0.00001,0.0001,0.001,0.01,0.05]
            for ip in xrange(0,len(pbins)-1):
                fwe = sum(np.logical_and(fwe_p>pbins[ip],fwe_p<=pbins[ip+1]))
                temp = sum(fwe_p<=0.05)
                ps.append([fwe,sum(h[np.logical_and(fwe_p>pbins[ip],fwe_p<=pbins[ip+1])])])

            all_fdr.append(fdr_p)
            all_fwe.append(fwe_p)
Example #26
0
def dailyfc_visual(files):

    ### fc extract ###
    for onefile in files:
        
        filename = os.path.basename(onefile)
        datestr = re.search('[0-9]{8}', filename).group()

        lfpdata, chnAreas, fs = lfp_extract([onefile])

        if 'cond' not in locals():
            cond = re.search('_[a-z]*_[0-9]{8}', filename).group()[1:-9]
        
        if 'lfpdatas' not in locals():
            lfpdatas = lfpdata
            datestrs = datestr
        else:
            lfpdatas = np.concatenate((lfpdatas, lfpdata), axis = 2)
            datestrs = datestrs + '_' + datestr 


        del lfpdata, datestr
        

        ### if enough lfpdatas
        if lfpdatas.shape[2] >= 500 / 5:

            lfp1, lfp2 = lfpdatas[:, 0:500, :], lfpdatas[:, 125:625, :]
            lfp3, lfp4 = lfpdatas[:, 250:750, :], lfpdatas[:, 375:875, :]
            lfp5= lfpdatas[:, 500:, :]
            lfpdatas = np.concatenate((lfp1, lfp2, lfp3, lfp4, lfp5), axis=2)

            idx_ntrials = np.random.randint(lfpdatas.shape[2], size = 500)
            lfpdatas = lfpdatas[:, :, idx_ntrials]
            nchns, ntemp, ntrials = lfpdatas.shape
            
            
            ### calc ciCOH for each cond ###
            ciCOH = calc_ciCOHs_rest(lfpdatas)
            ciCOH = abs(ciCOH)

            
            ### all ##
            save_prefix = 'all'
            
            # get weight matrix
            pvals = pvals_fc_overtime(ciCOH = ciCOH, ntrials = ntrials, ntemp = ntemp, f = (freq[0] + freq[1])/2, t = ntemp/fs)
            reject, pval_corr = fdr_correction(pvals, alpha = 0.1, method='indep')
            [rows, cols]= np.where(reject)
            weight = np.zeros(ciCOH.shape)
            if len(rows) > 0:
                weight[rows, cols] = ciCOH[rows, cols]

            # visual and save
            saveFCname = cond + '_'  + save_prefix + '_' + datestrs + '.png'
            saveFCGraph = os.path.join(savefolder, saveFCname)
            weight_visual_save(weight, chnInf = assign_coord2chnArea(area_coord_file, chnAreas), 
                                savefile = saveFCGraph, texts = None, threds_edge = None)


            # network metric
            avg_CC = graph_metrics(weight)
            d = {saveFCname: avg_CC}
            with open(os.path.join(savefolder, 'avgCC.csv'), 'a+') as f:
                for key in d.keys():
                    f.write("%s,%s\n"%(key,d[key]))
            
            del avg_CC, d
            del pvals, reject, pval_corr, rows, cols
            del saveFCGraph, weight, save_prefix, saveFCname

            
            
            ### left thalamus and SMA/M1 ###
            save_prefix = 'leftThaCor_' 
            areas_used = ['lVA', 'lVLo/VPLo', 'lSMA', 'rSMA','M1']

            # subareas selection
            ciCOH_new, chnAreas_new = ciCOH_select(ciCOH, chnAreas, areas_used)
            
            
            # get weight matrix
            pvals = pvals_fc_overtime(ciCOH = ciCOH_new, ntrials = ntrials, ntemp = ntemp, f = (freq[0] + freq[1])/2, t = ntemp/fs)
            reject, pval_corr = fdr_correction(pvals, alpha = 0.1, method='indep')
            [rows, cols]= np.where(reject)
            weight = np.zeros(ciCOH.shape)
            if len(rows) > 0:
                weight[rows, cols] = ciCOH[rows, cols]

            # visual and save
            saveFCGraph = os.path.join(savefolder, cond + '_' + save_prefix + '_' + datestrs + '.png')
            weight_visual_save(weight, chnInf = assign_coord2chnArea(area_coord_file, chnAreas_new), 
                                savefile = saveFCGraph, texts = None, threds_edge = None)
            del ciCOH_new, chnAreas_new, save_prefix, areas_used
            del saveFCGraph, weight




            ### right thalamus and SMA/M1 ###
            save_prefix = 'rightThaCor'
            areas_used = ['rVA', 'rVLo/VPLo', 'lSMA', 'rSMA','M1']
            
            # subareas selection
            ciCOH_new, chnAreas_new = ciCOH_select(ciCOH, chnAreas, areas_used)

            # get weight matrix
            pvals = pvals_fc_overtime(ciCOH = ciCOH_new, ntrials = ntrials, ntemp = ntemp, f = (freq[0] + freq[1])/2, t = ntemp/fs)
            reject, pval_corr = fdr_correction(pvals, alpha = 0.1, method='indep')
            [rows, cols]= np.where(reject)
            weight = np.zeros(ciCOH.shape)
            if len(rows) > 0:
                weight[rows, cols] = ciCOH[rows, cols]

            # visual and save
            saveFCGraph = os.path.join(savefolder, cond + '_' + save_prefix + '_' + datestrs + '.png')
            weight_visual_save(weight, chnInf = assign_coord2chnArea(area_coord_file, chnAreas_new), 
                                savefile = saveFCGraph, texts = None, threds_edge = None)
            del ciCOH_new, chnAreas_new, save_prefix, areas_used
            del saveFCGraph, weight
           

            
            ### right thalamus and GP ###
            save_prefix = 'gpRightTha'
            areas_used = ['rVA', 'rVLo/VPLo', 'GP']
            
            # subareas selection
            ciCOH_new, chnAreas_new = ciCOH_select(ciCOH, chnAreas, areas_used)

            # get weight matrix
            pvals = pvals_fc_overtime(ciCOH = ciCOH_new, ntrials = ntrials, ntemp = ntemp, f = (freq[0] + freq[1])/2, t = ntemp/fs)
            reject, pval_corr = fdr_correction(pvals, alpha = 0.1, method='indep')
            [rows, cols]= np.where(reject)
            weight = np.zeros(ciCOH.shape)
            if len(rows) > 0:
                weight[rows, cols] = ciCOH[rows, cols]

            # visual and save
            saveFCGraph = os.path.join(savefolder, cond + '_' + save_prefix + '_' + datestrs + '.png')
            weight_visual_save(weight, chnInf = assign_coord2chnArea(area_coord_file, chnAreas_new), 
                                savefile = saveFCGraph, texts = None, threds_edge = None)
            del ciCOH_new, chnAreas_new, save_prefix, areas_used
            del saveFCGraph, weight



            ### left thalamus and GP ###
            save_prefix = 'gpLeftTha'
            areas_used = ['lVA', 'lVLo/VPLo', 'GP']
            
            # subareas selection
            ciCOH_new, chnAreas_new = ciCOH_select(ciCOH, chnAreas, areas_used)

            # get weight matrix
            pvals = pvals_fc_overtime(ciCOH = ciCOH_new, ntrials = ntrials, ntemp = ntemp, f = (freq[0] + freq[1])/2, t = ntemp/fs)
            reject, pval_corr = fdr_correction(pvals, alpha = 0.1, method='indep')
            [rows, cols]= np.where(reject)
            weight = np.zeros(ciCOH.shape)
            if len(rows) > 0:
                weight[rows, cols] = ciCOH[rows, cols]

            # visual and save
            saveFCGraph = os.path.join(savefolder, cond + '_' + save_prefix + '_' + datestrs + '.png')
            weight_visual_save(weight, chnInf = assign_coord2chnArea(area_coord_file, chnAreas_new), 
                                savefile = saveFCGraph, texts = None, threds_edge = None)
            del ciCOH_new, chnAreas_new, save_prefix, areas_used
            del saveFCGraph, weight



            del lfpdatas, idx_ntrials, datestrs
            del ciCOH
Example #27
0
def statistics(request):
    """
    This function is called when the Statistics button is pressed by the user. It's purpose is to
    take the selected platforms as well as some statistical parameters and perform two
    statistical functions: a T-Test and an FDR analysis

    :param request:
    :return: a rendered HTML page.
    """
    cutoff_type = request.GET.get('cutoff_type')
    cutoff_value = float(request.GET.get('cutoff_value'))
    display_values = request.session.get('display_values', {})
    spps = request.GET.get('spps')
    spps = spps.split(',')
    combined_series = []
    display_profile = None
    for spp in spps:
        _, study, display_profile, platform = spp.split('|')
        profile = display_profile.replace('_', '-')
        sample_ids = geo_data.get_sample_ids(study, profile, platform)
        control_sample_ids = []
        diseased_sample_ids = []
        for sample_id in sample_ids:
            sample_attributes = geo_data.get_sample_attributes(study, profile, platform, sample_id)
            if sample_attributes['control']:
                control_sample_ids.append(sample_id)
            else:
                diseased_sample_ids.append(sample_id)

        genes = geo_data.get_all_gene_symbols(study, profile, platform)
        no_of_genes = len(genes)
        control_exprs = zeros((no_of_genes, len(control_sample_ids)))
        diseased_exprs = zeros((no_of_genes, len(diseased_sample_ids)))

        for (g_index, gene) in enumerate(genes):
            gene_exprs = zeros(len(control_sample_ids))
            for (s_index, sample_id) in enumerate(control_sample_ids):
                expr_value = geo_data.get_gene_expression_value(study, profile, platform, sample_id, gene)
                if expr_value == 'None':
                    continue
                gene_exprs[s_index] = expr_value
            control_exprs[g_index] = gene_exprs

            gene_exprs = zeros(len(diseased_sample_ids))
            for (s_index, sample_id) in enumerate(diseased_sample_ids):
                expr_value = geo_data.get_gene_expression_value(study, profile, platform, sample_id, gene)
                if expr_value == 'None':
                    continue
                gene_exprs[s_index] = expr_value
            diseased_exprs[g_index] = gene_exprs

        control_df = DataFrame(control_exprs, index=genes, columns=control_sample_ids)
        diseased_df = DataFrame(diseased_exprs, index=genes, columns=diseased_sample_ids)

        # Perform the the t-test and create a pandas Series
        t_statistics, p_values = ttest_ind(control_df.T, diseased_df.T)
        p_values_series = Series(p_values, index=genes)

        # Perform the fdr analysis, create a pandas Series and sort the series
        reject_fdr, pval_fdr = fdr_correction(p_values_series, method='indep')
        fdr_values_series = Series(pval_fdr, index=genes)
        p_values_series.sort(ascending=True)

        combined_series = []
        for i in range(len(p_values_series)):
            symbol = p_values_series.index[i]
            p_value = p_values_series[i]
            if cutoff_type == 'p-value' and p_value > cutoff_value:
                break
            fdr_value = fdr_values_series.get(symbol)
            if cutoff_type == 'fdr-value' and fdr_value > cutoff_value:
                break
            combined_series.append([symbol, p_value, fdr_value])

        display_values[display_profile] = combined_series

    request.session['display_values'] = display_values
    response = render_to_string('statistics.html',
                                {display_profile: combined_series})

    return HttpResponse(response)
Example #28
0
                    reject=reject)
X = epochs.get_data()  # as 3D matrix
X = X[:, 0, :]  # take only one channel to get a 2D array

###############################################################################
# Compute statistic
T, pval = stats.ttest_1samp(X, 0)
alpha = 0.05

n_samples, n_tests = X.shape
threshold_uncorrected = stats.t.ppf(1.0 - alpha, n_samples - 1)

reject_bonferroni, pval_bonferroni = bonferroni_correction(pval, alpha=alpha)
threshold_bonferroni = stats.t.ppf(1.0 - alpha / n_tests, n_samples - 1)

reject_fdr, pval_fdr = fdr_correction(pval, alpha=alpha, method='indep')
threshold_fdr = np.min(np.abs(T)[reject_fdr])

###############################################################################
# Plot
times = 1e3 * epochs.times

import matplotlib.pyplot as plt
plt.close('all')
plt.plot(times, T, 'k', label='T-stat')
xmin, xmax = plt.xlim()
plt.hlines(threshold_uncorrected,
           xmin,
           xmax,
           linestyle='--',
           colors='k',
                "rb"))
        elif condition is "tone":
            tmp = pickle.load(open(
                "MI_tone_zscore_DKT_-05-0_resample_crop_deg.p", "rb"))

        filter_keys = ['pval', 'area', 'obsDiff']
        filtered_dict = []
        for d in tmp:
            filtered_dict += [{key: d[key] for key in filter_keys if key in d}]

        result = pd.DataFrame(columns=filter_keys)
        result = result.append(filtered_dict, ignore_index=True)
        result["condition"] = condition
        result["band"] = band

        result["rejected"], result["pval_corr"] = fdr_correction(result["pval"])

        exec("result_%s_%s=%s" % (condition, band, "result"))

for band in bands:
    for condition in conditions:
            exec("%s=result_%s_%s" % ("result", condition, band))
            print "\nCondition: %s" % (condition)
            print "\nBand: %s" % band
            print result[(result["obsDiff"] != 0)
                         & (result["rejected"] == True)]

bands=["theta", "alpha", "beta", "gamma_low", "gamma_high"]
# bands = ["beta"]
conditions = ["degrees"]
T_obs_plot = np.ma.masked_array(T_obs,
                                np.invert(clusters[np.squeeze(good_clusers)]))

plt.figure()
for f_image, cmap in zip([T_obs, T_obs_plot], [plt.cm.gray, 'RdBu_r']):
    plt.imshow(f_image, cmap=cmap, extent=[times[0], times[-1],
               frequencies[0], frequencies[-1]], aspect='auto',
               origin='lower')
plt.xlabel('time (ms)')
plt.ylabel('Frequency (Hz)')
plt.title('Time-locked response for \'modality by location\' (%s)\n'
          ' cluster-level corrected (p <= 0.05)' % ch_name)
plt.show()

# now using FDR
mask, _ = fdr_correction(pvals[2])
T_obs_plot2 = np.ma.masked_array(T_obs, np.invert(mask))

plt.figure()
for f_image, cmap in zip([T_obs, T_obs_plot2], [plt.cm.gray, 'RdBu_r']):
    plt.imshow(f_image, cmap=cmap, extent=[times[0], times[-1],
               frequencies[0], frequencies[-1]], aspect='auto',
               origin='lower')

plt.xlabel('time (ms)')
plt.ylabel('Frequency (Hz)')
plt.title('Time-locked response for \'modality by location\' (%s)\n'
          ' FDR corrected (p <= 0.05)' % ch_name)
plt.show()

# Both, cluster level and FDR correction help getting rid of
from mne.stats import (fdr_correction)

# change data into dataframe
data = {'subject': sub_all, 'time': timePoints_all, 'scores': scores_all}
df = pd.DataFrame(data=data)

# one-sample t-test with chance level (0.125)
t_all = np.zeros(n_times_resample)
p_all = np.zeros(n_times_resample)
sig_all = np.zeros(n_times_resample)
for i in range(n_times_resample):
    current_time = time_points_resample[i]
    current_scores = df[df['time'].isin([current_time])]

    t, p_twoTail = stats.ttest_1samp(current_scores['scores'], 0.125)
    p_FDR = fdr_correction(p_twoTail)[1]

    if p_FDR <= .05:
        sig = 1
    else:
        sig = 0

    t_all[i] = t
    p_all[i] = p_FDR
    sig_all[i] = sig

# record significant time points for plot
x_sig = timePoints_all[np.nonzero(sig_all)]
y_sig = np.repeat(0.45, len(x_sig))
# s_sig = t_all[np.nonzero(sig_all)]
Example #32
0
def fc_metrics_subareas(fcfile_pickle,
                        subareas=['M1', 'STN', 'GP'],
                        subtitle='M1DBS'):
    """
        cc: average Clustering Coefficient

        nbc: Node Betweenness centrality ()
    """

    with open(fcfile_pickle, 'rb') as handle:
        fc = pickle.load(handle)

    imcohs = fc['imcohs']
    pvals = fc['pvals']
    chnAreas = fc['chnAreas']

    idxs_remain = []
    chnAreas_new = []
    for ci, carea in enumerate(chnAreas):
        for sarea in subareas:
            if sarea.lower() in carea.lower():
                idxs_remain.append(ci)
                chnAreas_new.append(carea)

    idxs_remain = np.array(idxs_remain)

    tmp = imcohs[idxs_remain, :]
    tmp = tmp[:, idxs_remain]
    imcohs = tmp

    tmp = pvals[idxs_remain, :]
    tmp = tmp[:, idxs_remain]
    pvals = tmp

    chnAreas = chnAreas_new

    # multiple comparison correction, get weights
    reject, pval_corr = fdr_correction(pvals, alpha=0.05, method='indep')
    [rows, cols] = np.where(reject == True)
    weight = np.zeros(imcohs.shape)
    if len(rows) > 0:
        weight[rows, cols] = imcohs[rows, cols]

    weight = abs(weight)

    G = nx.Graph()
    G.add_nodes_from(np.arange(0, weight.shape[0]))

    for i in range(0, weight.shape[0] - 1):
        for j in range(i + 1, weight.shape[0]):
            if weight[i, j] > 0:
                G.add_edge(i, j, weight=weight[i, j])

    cc = nx.average_clustering(G)
    nbcs = nx.degree_centrality(G)

    folder, filename = os.path.split(fcfile_pickle)[0], os.path.split(
        fcfile_pickle)[1]
    metricfile = os.path.join(folder, 'metric_' + subtitle + '_' + filename)

    metrics = dict()
    metrics['cc'] = cc
    metrics['nbcs'] = nbcs
    metrics['chnAreas'] = fc['chnAreas']

    with open(metricfile, 'wb') as f:
        pickle.dump(metrics, f)

    return metricfile
colors = {str(val): val for val in df[name].unique()}
epochs.metadata = df.assign(Intercept=1)  # Add an intercept for later
evokeds = {val: epochs[name + " == " + val].average() for val in colors}
plot_compare_evokeds(evokeds, colors=colors, split_legend=True,
                     cmap=(name + " Percentile", "viridis"))

##############################################################################
# We observe that there appears to be a monotonic dependence of EEG on
# concreteness. We can also conduct a continuous analysis: single-trial level
# regression with concreteness as a continuous (although here, binned)
# feature. We can plot the resulting regression coefficient just like an
# Event-related Potential.
names = ["Intercept", name]
res = linear_regression(epochs, epochs.metadata[names], names=names)
for cond in names:
    res[cond].beta.plot_joint(title=cond, ts_args=dict(time_unit='s'),
                              topomap_args=dict(time_unit='s'))

##############################################################################
# Because the `linear_regression` function also estimates p values, we can --
# after applying FDR correction for multiple comparisons -- also visualise the
# statistical significance of the regression of word concreteness.
# The :func:`mne.viz.plot_evoked_image` function takes a `mask` parameter.
# If we supply it with a boolean mask of the positions where we can reject
# the null hypothesis, points that are not significant will be shown
# transparently, and if desired, in a different colour palette and surrounded
# by dark contour lines.
reject_H0, fdr_pvals = fdr_correction(res["Concreteness"].p_val.data)
evoked = res["Concreteness"].beta
evoked.plot_image(mask=reject_H0, time_unit='s')
ts.append(ts[-1])
ps.append(bonferroni_correction(ps[0])[1])
mccs.append(True)
plot_t_p(ts[-1], ps[-1], titles[-1], mccs[-1])

###############################################################################
# False discovery rate (FDR) correction
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
# Typically FDR is performed with the Benjamini-Hochberg procedure, which
# is less restrictive than Bonferroni correction for large numbers of
# comparisons (fewer type II errors), but provides less strict control of type
# I errors.

titles.append('FDR')
ts.append(ts[-1])
ps.append(fdr_correction(ps[0])[1])
mccs.append(True)
plot_t_p(ts[-1], ps[-1], titles[-1], mccs[-1])

###############################################################################
# Non-parametric resampling test with a maximum statistic
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
# **Non-parametric resampling tests** can also be used to correct for multiple
# comparisons. In its simplest form, we again do permutations using
# exchangeability under the null hypothesis, but this time we take the
# *maximum statistic across all voxels* in each permutation to form the
# null distribution. The p-value for each voxel from the veridical data
# is then given by the proportion of null distribution values
# that were smaller.
#
# This method has two important features:
vol_data = sio.loadmat("p2_vol_post_plv.mat")["vol_results"]
invol_data = sio.loadmat("p2_invol_post_plv.mat")["inv_results"]

res_inx = np.tril_indices(68, k=-1)

vol_test = []

for j in range(len(vol_data)):
    tmp = vol_data[j, :, :, 0, 3]
    vol_test.append(tmp[res_inx])

invol_test = []

for j in range(len(invol_data)):
    tmp = invol_data[j, :, :, 0, 3]
    invol_test.append(tmp[res_inx])

vol_test = np.asarray(vol_test)
invol_test = np.asarray(invol_test)

t_stat, pval = stats.ttest_ind(vol_test, invol_test, axis=0)

rejected, pval_fdr = fdr_correction(pval)

foo = np.zeros([68, 68])
foo[res_inx] = pval_fdr

## Extract labels
labels = [lbl[0][0].split()[0] + "_" + lbl[0][0].split()[1]
          for lbl in ff["RowNames"]]
Example #36
0
topomap_args = dict(
    cmap='RdBu_r',
    # keep values scale
    scalings=dict(eeg=1),
    average=0.05)
# plot t-values
fig = t_vals[predictor].plot_joint(ts_args=ts_args,
                                   topomap_args=topomap_args,
                                   title='T-values for predictor ' + predictor,
                                   times=[.13, .23])
fig.axes[0].set_ylabel('T-value')

###############################################################################
# correct p-values for multiple testing and create a mask for non-significant
# time point dor each channel.
reject_H0, fdr_pvals = fdr_correction(p_vals[predictor], alpha=0.01)
# plot t-values, masking non-significant time points.
fig = t_vals[predictor].plot_image(
    time_unit='s',
    mask=reject_H0,
    unit=False,
    # keep values scale
    scalings=dict(eeg=1))
fig.axes[1].set_title('T-value')

###############################################################################
# plot surprise-values as "erp"
# only show electrode `B8`
pick = epochs.info['ch_names'].index('B8')
fig, ax = plt.subplots(figsize=(7, 4))
plot_compare_evokeds(s_vals[predictor],
                                labels, X, snr=1, wsize = wsize, tstep = tstep)
plt.figure()
plt.imshow(np.real(F_array[1,:,:]))
plt.colorbar()
plt.show()  

df1, df2 = p-1, n_trials-p
import scipy
p_array = np.zeros(np.hstack([F_array.shape,2]))
# compute the p-values, for the F test
p_array[:,:,:,0] = 1.0-scipy.stats.f.cdf(np.real(F_array),df1,df2) 
p_array[:,:,:,1] = 1.0-scipy.stats.f.cdf(np.imag(F_array),df1,df2)
# plus FDR comparison?
from mne.stats import fdr_correction
p_array_ravaled = np.ravel(p_array, order = 'C')
reject_array, p_val_corrected = fdr_correction(p_array_ravaled)
reject_array = np.reshape(reject_array, p_array.shape, order = 'C')



i = 40
plt.figure()
plt.subplot(2,3,1)
plt.imshow((reject_array[i,:,:,0]))
plt.title("real fdr results")
plt.subplot(2,3,2)
plt.imshow(np.real(F[i,:,:]))
plt.colorbar()
plt.title("real F")
plt.subplot(2,3,3)
plt.imshow(np.real(coef[i,:,:,0]))
                    baseline=(None, 0), reject=reject)
X = epochs.get_data()  # as 3D matrix
X = X[:, 0, :]  # take only one channel to get a 2D array

###############################################################################
# Compute statistic
T, pval = stats.ttest_1samp(X, 0)
alpha = 0.05

n_samples, n_tests = X.shape
threshold_uncorrected = stats.t.ppf(1.0 - alpha, n_samples - 1)

reject_bonferroni, pval_bonferroni = bonferroni_correction(pval, alpha=alpha)
threshold_bonferroni = stats.t.ppf(1.0 - alpha / n_tests, n_samples - 1)

reject_fdr, pval_fdr = fdr_correction(pval, alpha=alpha, method='indep')
threshold_fdr = np.min(np.abs(T)[reject_fdr])

###############################################################################
# Plot
times = 1e3 * epochs.times

import matplotlib.pyplot as plt
plt.close('all')
plt.plot(times, T, 'k', label='T-stat')
xmin, xmax = plt.xlim()
plt.hlines(threshold_uncorrected, xmin, xmax, linestyle='--', colors='k',
           label='p=0.05 (uncorrected)', linewidth=2)
plt.hlines(threshold_bonferroni, xmin, xmax, linestyle='--', colors='r',
           label='p=0.05 (Bonferroni)', linewidth=2)
plt.hlines(threshold_fdr, xmin, xmax, linestyle='--', colors='b',
# The ANOVA returns a tuple f-values and p-values, we will pick the former.
pthresh = 0.00001  # set threshold rather high to save some time
f_thresh = f_threshold_mway_rm(n_replications, factor_levels, effects, pthresh)
tail = 1  # f-test, so tail > 0
n_permutations = 256  # Save some time (the test won't be too sensitive ...)
T_obs, clusters, cluster_p_values, h0 = mne.stats.permutation_cluster_test(
    epochs_power,
    stat_fun=stat_fun,
    threshold=f_thresh,
    tail=tail,
    n_jobs=1,
    n_permutations=n_permutations,
    buffer_size=None)

mask, _ = fdr_correction(pvals[2])
T_obs_plot2 = np.ma.masked_array(T_obs, np.invert(mask))

plt.figure()
for f_image, cmap in zip([T_obs, T_obs_plot2], [plt.cm.gray, 'RdBu_r']):
    plt.imshow(f_image,
               cmap=cmap,
               extent=[times[0], times[-1], freqs[0], freqs[-1]],
               aspect='auto',
               origin='lower')

plt.xlabel('Time (ms)')
plt.ylabel('Frequency (Hz)')
plt.title("Time-locked response for 'modality by location' (%s)\n"
          " FDR corrected (p <= 0.05)" % ch_name)
plt.show()
Example #40
0
colors = {str(val): val for val in df[name].unique()}
epochs.metadata = df.assign(Intercept=1)  # Add an intercept for later
evokeds = {val: epochs[name + " == " + val].average() for val in colors}
plot_compare_evokeds(evokeds, colors=colors, split_legend=True,
                     cmap=(name + " Percentile", "viridis"))

##############################################################################
# We observe that there appears to be a monotonic dependence of EEG on
# concreteness. We can also conduct a continuous analysis: single-trial level
# regression with concreteness as a continuous (although here, binned)
# feature. We can plot the resulting regression coefficient just like an
# Event-related Potential.
names = ["Intercept", name]
res = linear_regression(epochs, epochs.metadata[names], names=names)
for cond in names:
    res[cond].beta.plot_joint(title=cond, ts_args=dict(time_unit='s'),
                              topomap_args=dict(time_unit='s'))

##############################################################################
# Because the `linear_regression` function also estimates p values, we can --
# after applying FDR correction for multiple comparisons -- also visualise the
# statistical significance of the regression of word concreteness.
# The :func:`mne.viz.plot_evoked_image` function takes a `mask` parameter.
# If we supply it with a boolean mask of the positions where we can reject
# the null hypothesis, points that are not significant will be shown
# transparently, and if desired, in a different colour palette and surrounded
# by dark contour lines.
reject_H0, fdr_pvals = fdr_correction(res["Concreteness"].p_val.data)
evoked = res["Concreteness"].beta
evoked.plot_image(mask=reject_H0, time_unit='s')