def mne_permutation_ttest(group, threshold, fc, tail=1): fc_group_1 = [fc[i] for i in dataset.group_indices[group]] tv, pv, H0 = permutation_t_test(np.array(fc_group_1), tail=tail, n_jobs=3) pv = -np.log10(pv) thresh_log = -np.log10(threshold) ind_threshold = np.where(pv < thresh_log) pv[ind_threshold] = 0 return pv
def pair_perm_max_stat_t_test(cond1, cond2): """ Paired permutation t-test with maximum statistic correction for MC""" from mne.stats import permutation_t_test try: assert np.all(cond1.shape == cond2.shape) except AssertionError: raise ValueError('Different sizes for cond1 with shape {}and cond2 with shape {}'.format(cond1.shape, cond2.shape)) except AttributeError: raise AttributeError('Both conditions should be of type numpy.ndarray') diff = cond1 - cond2 T_obs, p_vals_corr, H0 = permutation_t_test(diff, n_permutations=100000) return p_vals_corr
include = [] # or stim channel ['STI 014'] raw.info['bads'] += ['MEG 2443', 'EEG 053'] # bads + 2 more # pick MEG Gradiometers picks = mne.pick_types(raw.info, meg='grad', eeg=False, stim=False, eog=True, include=include, exclude='bads') epochs = mne.Epochs(raw, events, event_id, tmin, tmax, picks=picks, baseline=(None, 0), reject=dict(grad=4000e-13, eog=150e-6)) data = epochs.get_data() times = epochs.times temporal_mask = np.logical_and(0.04 <= times, times <= 0.06) data = np.mean(data[:, :, temporal_mask], axis=2) n_permutations = 50000 T0, p_values, H0 = permutation_t_test(data, n_permutations, n_jobs=2) significant_sensors = picks[p_values <= 0.05] significant_sensors_names = [raw.ch_names[k] for k in significant_sensors] print("Number of significant sensors : %d" % len(significant_sensors)) print("Sensors names : %s" % significant_sensors_names) ############################################################################### # View location of significantly active sensors evoked = mne.EvokedArray(-np.log10(p_values)[:, np.newaxis], epochs.info, tmin=0.) # Extract mask and indices of active sensors in layout stats_picks = mne.pick_channels(evoked.ch_names, significant_sensors_names)
include = [] # or stim channel ['STI 014'] raw.info['bads'] += ['MEG 2443', 'EEG 053'] # bads + 2 more # pick MEG Gradiometers picks = fiff.pick_types(raw.info, meg='grad', eeg=False, stim=False, eog=True, include=include, exclude='bads') epochs = mne.Epochs(raw, events, event_id, tmin, tmax, picks=picks, baseline=(None, 0), reject=dict(grad=4000e-13, eog=150e-6)) data = epochs.get_data() times = epochs.times temporal_mask = np.logical_and(0.04 <= times, times <= 0.06) data = np.squeeze(np.mean(data[:, :, temporal_mask], axis=2)) n_permutations = 50000 T0, p_values, H0 = permutation_t_test(data, n_permutations, n_jobs=2) significant_sensors = picks[p_values <= 0.05] significant_sensors_names = [raw.info['ch_names'][k] for k in significant_sensors] print "Number of significant sensors : %d" % len(significant_sensors) print "Sensors names : %s" % significant_sensors_names ############################################################################### # View location of significantly active sensors import pylab as pl # load sensor layout from mne.layouts import read_layout layout = read_layout('Vectorview-grad')
X, y, scoring="accuracy", cv=3, n_permutations=100, n_jobs=1) #scores = cross_val_multiscore(time_decod, X, y, cv=5, n_jobs=1) scores = cross_val_multiscore(time_decod, X, y, cv=cv, n_jobs=1) # Mean scores across cross-validation splits scores = np.mean(scores, axis=0) class_balance = np.mean(y == y[0]) class_balance = max(class_balance, 1. - class_balance) # Plot fig, ax = plt.subplots() ax.plot(epochs.times, scores, label='score') ax.axhline(class_balance, color='k', linestyle='--', label='chance') ax.set_xlabel('Times') ax.set_ylabel('AUC') # Area Under the Curve ax.legend() ax.axvline(.0, color='k', linestyle='-') ax.set_title('Sensor space decoding') plt.show() from mne.stats import permutation_t_test n_permutations = 50000 T0, p_values, H0 = permutation_t_test(scores, n_permutations, n_jobs=1)
# (which is :math:`2^{N_{samp}}-1` for a one-tailed and # :math:`2^{N_{samp}-1}-1` for a two-tailed test, not counting the # veridical distribution), instead of randomly exchanging conditions # the null is formed from using all possible exchanges. This is known # as a permutation test (or exact test). # Here we have to do a bit of gymnastics to get our function to do # a permutation test without correcting for multiple comparisons: X.shape = (n_subjects, n_src) # flatten the array for simplicity titles.append('Permutation') ts.append(np.zeros(width * width)) ps.append(np.zeros(width * width)) mccs.append(False) for ii in range(n_src): ts[-1][ii], ps[-1][ii] = permutation_t_test(X[:, [ii]], verbose=False)[:2] plot_t_p(ts[-1], ps[-1], titles[-1], mccs[-1]) ############################################################################### # Multiple comparisons # -------------------- # So far, we have done no correction for multiple comparisons. This is # potentially problematic for these data because there are # :math:`40 \cdot 40 = 1600` tests being performed. If we use a threshold # p < 0.05 for each individual test, we would expect many voxels to be declared # significant even if there were no true effect. In other words, we would make # many **type I errors** (adapted from `here <errors_>`_): # # .. rst-class:: skinnytable # # +----------+--------+------------------+------------------+
# instead of randomly exchanging conditions the null is formed # from using all possible exchanges. This is known as a permutation # test (or exact test) form of a non-parametric resampling test. # Here we have to do a bit of gymnastics to get our function to do # a permutation test without correcting for multiple comparisons: # Let's flatten the array for simplicity X.shape = (n_subjects, n_src) titles.append('Permutation') ts.append(np.zeros(width * width)) ps.append(np.zeros(width * width)) mccs.append(False) for ii in range(n_src): ts[-1][ii], ps[-1][ii] = \ permutation_t_test(X[:, [ii]], verbose=True if ii == 0 else False)[:2] plot_t_p(ts[-1], ps[-1], titles[-1], mccs[-1]) ############################################################################### # Multiple comparisons # -------------------- # So far, we have done no correction for multiple comparisons. This is # potentially problematic for these data because there are # :math:`40 \times 40 = 1600` tests being performed. If we just use # a threshold ``p < 0.05`` for all of our tests, we would expect many # voxels to be declared significant even if there were no true effect. # In other words, we would make many **type I errors** (adapted from # `here <https://en.wikipedia.org/wiki/Type_I_and_type_II_errors>`_): # # .. rst-class:: skinnytable #
def test_measures_corr(subjects): from pandas.plotting import scatter_matrix from statsmodels.sandbox.stats import multicomp from mne.stats import permutation_t_test from scipy.stats import wilcoxon plt.style.use('ggplot') log_df = pd.read_csv(op.join(study_path, 'tables', 's_trial_dat.csv')) log_df = log_df.drop(['Unnamed: 0', 'Unnamed: 0.1', 'Unnamed: 0.1.1', 'a', 'b', 'c', 'd', 'e', 'f'], axis=1) log_df = set_dif_and_rt_exp(log_df) clf_scores = load_decoding(subjects) scores = [sc.scores_ for sc in clf_scores] times = clf_scores[0].train_times_['times'] tds = np.array([np.diag(sc) for sc in scores]) mean_td = np.mean(tds, axis=0) mean_scores = np.mean(tds[:, (times > -0.2) & (times < 0.7)], axis=1) clf_peak = np.max(tds, axis=1) clf_lat = times[np.argmax(tds, axis=1)] fig, ax = plt.subplots(3, 1, figsize=(18, 5), sharex=True, sharey=False) for ix, (c_mark, color) in enumerate(zip([90, 70], ['blue', 'red'])): ax[0].hist(log_df.loc[log_df.condition == c_mark, 'rt_exp'], bins=np.linspace(-1, 5, 31), align='left', alpha=0.5, color=color) ax[1].hist(log_df.loc[log_df.condition == c_mark, 'dif'], bins=np.linspace(-1, 5, 61), align='left', alpha=0.5, color=color) ax[1].set_xticks(np.linspace(-1, 5, 31)) ax[1].set_ylim([0, 200]) ax[1].set_xlim([-1.2, 5]) ax[1].legend(['S2 Longer', 'S2 Shorter']) ax[2].plot(times[times > -0.7], mean_td[times > -0.7]) ax[2].set_ylim([0.5, 0.65]) ax[0].vlines(0, ymin=0, ymax=200, linestyles='--') ax[1].vlines(0, ymin=0, ymax=200, linestyles='--') ax[2].vlines(0, ymin=0.5, ymax=0.65, linestyles='--') corr_dat = pd.read_csv(op.join(study_path, 'tables', 'pow_table.csv')) corr_dat = corr_dat.drop('Unnamed: 0', axis=1) conn_dat = pd.read_csv(op.join(study_path, 'tables', 'conn_x_subj_lon_sho.csv'), header=None) corr_dat['conn_lon'] = conn_dat[0] corr_dat['conn_sho'] = conn_dat[1] corr_dat['conn_dif'] = corr_dat['conn_lon'] - corr_dat['conn_sho'] rt_lon = log_df[log_df.condition == 90][['RT']].groupby(log_df['subject'], as_index=False).agg([np.nanmedian])['RT']['nanmedian'].tolist() rt_sho = log_df[log_df.condition == 70][['RT']].groupby(log_df['subject'], as_index=False).agg([np.nanmedian])['RT']['nanmedian'].tolist() rt = log_df[['RT']].groupby(log_df['subject'], as_index=False).agg([np.nanmedian])['RT']['nanmedian'].tolist() corr_dat['rt_sho'] = rt_sho corr_dat['rt_lon'] = rt_lon corr_dat['rt'] = rt corr_dat['clf'] = mean_scores corr_dat['clf_pk'] = clf_peak corr_dat['clf_lat'] = clf_lat corr_dat['acc_lon'] = log_df[log_df.condition == 90][['Accuracy']].groupby(log_df['subject'], as_index=False).agg(np.nanmean) corr_dat['acc_sho'] = log_df[log_df.condition == 70][['Accuracy']].groupby(log_df['subject'], as_index=False).agg(np.nanmean) corr_dat['acc'] = log_df[['Accuracy']].groupby(log_df['subject'], as_index=False).agg(np.nanmean) corr_dat['rt_exp'] = log_df[log_df.condition == 70][['rt_exp']].groupby(log_df['subject'], as_index=False).agg(np.nanmedian) corr_dat['abs'] = np.abs(corr_dat['clf_lat'] - corr_dat['rt_exp']) corr_dat['pow_dif'] = np.abs(corr_dat['pow_lon'] - corr_dat['pow_sho']) corr_dat.corr() corr_dat.to_csv(op.join(study_path, 'tables', 'corr_table.csv')) # plt.style.use('classic') # scatter_matrix(corr_dat[['clf_pk', 'rt_lon', 'rt_sho']]) # corr_dat.corr() n_perm = 10000 # r_sho, p_sho = permutation_pearson(corr_dat['rt_sho'], corr_dat['clf_pk'], n_perm) # r_lon, p_lon = permutation_pearson(corr_dat['rt_lon'], corr_dat['clf_pk'], n_perm) r_dec_rt, p_dec_rt = permutation_pearson(corr_dat['rt'], corr_dat['clf_pk'], n_perm) r_conn_rt, p_conn_rt = permutation_pearson(corr_dat['rt'], corr_dat['conn_dif'], n_perm) r_tf_rt, p_tf_rt = permutation_pearson(corr_dat['rt'], corr_dat['pow_dif'], n_perm) r_dec_lon, p_dec_lon = permutation_pearson(corr_dat['acc_lon'], corr_dat['clf_pk'], n_perm) r_dec_sho, p_dec_sho = permutation_pearson(corr_dat['acc_sho'], corr_dat['clf_pk'], n_perm) r_conn_lon, p_conn_lon = permutation_pearson(corr_dat['acc_lon'], corr_dat['conn_lon'], n_perm) r_conn_sho, p_conn_sho = permutation_pearson(corr_dat['acc_sho'], corr_dat['conn_sho'], n_perm) r_tf_lon, p_tf_lon = permutation_pearson(corr_dat['acc_lon'], corr_dat['pow_lon'], n_perm) r_tf_sho, p_tf_sho = permutation_pearson(corr_dat['acc_sho'], corr_dat['pow_sho'], n_perm) all_p = [p_dec_rt, p_conn_rt, p_tf_rt, p_dec_lon, p_dec_sho, p_conn_lon, p_conn_sho, p_tf_lon, p_tf_sho] p_corr = multicomp.multipletests([p_dec_rt, p_conn_rt, p_tf_rt], method='fdr_bh') seaborn.regplot(corr_dat['rt'], corr_dat['clf_pk'], ci=None) plt.title('r = %0.3f p = %0.3f' % (r_dec_rt, p_dec_rt)) plt.savefig(op.join(study_path, 'figures', 'clf_pk_vs_RT_all.eps'), dpi=300) fig, ax = plt.subplots(1, 2, sharey=True, sharex=True) seaborn.regplot(corr_dat['acc_lon'], corr_dat['conn_dif'], ax=ax[0], ci=None) seaborn.regplot(corr_dat['acc_sho'], corr_dat['conn_dif'], ax=ax[1], ci=None) [ax[ix].set_title('r = {} p = {}' .format(round(r, 3), round(p, 3))) for ix, (r, p) in enumerate(zip([r_conn_lon, r_conn_sho], [p_conn_lon, p_conn_sho]))] [ax[ix].set_ylabel('Accuracy') for ix in range(len(ax))] [ax[ix].set_xlabel('Connectivity Difference (wSMI) \n %s' % lab) for ix, lab in enumerate(['S2 Longer', 'S2 Shorter'])] # ax[0].set_xlim(0, 0.05) fig.savefig(op.join(study_path, 'figures', 'conn_vs_acc.eps'), dpi=300) seaborn.regplot(corr_dat['pow_lon'], corr_dat['acc_lon'], ci=None) plt.title('r = %0.3f p = %0.3f' % (r_tf_lon, p_tf_lon)) plt.savefig(op.join(study_path, 'figures', 'pow_lon_vs_acc_lon.eps'), dpi=300) seaborn.regplot(corr_dat['conn_lon'], corr_dat['acc_lon'], ci=None) plt.violinplot([corr_dat['conn_lon'], corr_dat['conn_sho']]) T_obs, p_values, H0 = permutation_t_test(np.array(corr_dat['abs'], ndmin=2).T, n_permutations=10000, tail=1) corr_dat['abs'].plot(kind='box') plt.title('p = %.8f' % p_values[0])
i = 0 for val in col_values: t, p = ttest_1samp(col_values, val) if p < 0.05 and val < np.mean(col_values): print(names[i]) print(p) i = i + 1 col_values.shape = (np.shape(col_values)[0], 1) i = 0 for val in col_values: print("perm") values = np.subtract(col_values, val) T, p, HO = permutation_t_test(values, n_permutations=1000) if p < 0.05 and val < np.mean(col_values): print(names[i]) print(p) i = 1 + i ##choose column with the top hit, and then identify any row name with significant diff by t-tse print(ldskjf) #create or get path single expression value vs all,excluding itself res_top_hit_path = run_sim_single(top_hit[0], top_hit[1], top_hit[2], exclude=exclude)