def behavior_classification(behavior='tinnitus_side', covariates=True): ml_data, side_data = load_data(behavior, covariates=covariates) models = ['SVM', 'ExtraTrees', 'KNN'] resample_methods = [None, 'under', 'over', 'smote'] for model in models: for resamp in resample_methods: prog = '%s %s' % (model, resamp) print('%s: Running %s classification with %s' % (pu.ctime(), behavior, prog)) EC = EEG_Classifier(n_splits=10, seed=seed, classifier_type=model, resample_type=resamp) scores, confusion_matrices, features, grid_df = EC.classify( eeg_data=ml_data, target_data=side_data) print('%s: Saving output for %s' % (pu.ctime(), prog)) save_output(output_dir=output_dir, behavior=behavior, scores=scores, confusion_matrices=confusion_matrices, features=features, grid_df=grid_df, model=model, resamp_method=resamp, covariates=covariates)
def main(): logging.info('%s: Starting script' % proj_utils.ctime()) bands = ['delta', 'theta', 'alpha', 'beta', 'gamma'] data_df = proj_utils.load_connectivity_data(drop_behavior=True) dpath = os.path.abspath('./../data/subject_adjacency_matrices/') if not os.path.isdir(dpath): os.mkdir(dpath) # print('%s: Creating adjacency dicts' % proj_utils.ctime()) # adj_dict = create_adjacency_dict(data_df, bands) # print('%s: Creating subject adjacency matrices' % proj_utils.ctime()) # rois = parse_roi_names(list(data_df)) # create_subj_adjacency_mats(adj_dict, bands, rois, dpath) test_res = test_graph_functions() logging.info('%s: Running graph theory analyses' % proj_utils.ctime()) columns = list(test_res) subjects = np.arange(0, len(data_df.index)) outpath = './../data/graph_theory_res/' if not os.path.isdir(outpath): os.mkdir(outpath) for band in bands: filelist = sorted([os.path.join(dpath, f) for f in os.listdir(dpath) if band in f]) run_graph_theory(band, filelist, subjects, columns, outpath) logging.info('%s: Finished' % proj_utils.ctime())
def test_gridsearch(): def gridsearch_pipe(cv=None): from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler from sklearn.feature_selection import SelectFromModel from sklearn.ensemble import ExtraTreesClassifier from sklearn.model_selection import GridSearchCV from sklearn.svm import SVC kernel_range = ('linear', 'rbf') # , 'poly'] c_range = [1, 10, 100] # np.arange(start=1, stop=100, step=10, dtype=int) # gamma_range = np.arange(.01, 1, .01) param_grid = { 'C': c_range } # , 'gamma': gamma_range} # , 'kernel': kernel_range} pipe = Pipeline([ ('preprocess_data', StandardScaler()), ('feature_selection', SelectFromModel(ExtraTreesClassifier(random_state=13), threshold="2*mean")), ('grid', GridSearchCV(SVC(kernel='rbf'), param_grid=param_grid, cv=cv, scoring='balanced_accuracy')) ]) return pipe print('%s: Loading data' % pu.ctime()) behavior_data, conn_data = pu.load_data_full_subjects() ml_data_without_covariates = conn_data.astype(float) side_data = pu.convert_tin_to_str( behavior_data['tinnitus_side'].values.astype(float), 'tinnitus_side') resampler = SMOTE(sampling_strategy='not majority', random_state=seed) x_res, y_res = resampler.fit_resample(ml_data_without_covariates, side_data) n_splits = 10 skf = model_selection.StratifiedKFold(n_splits=n_splits, random_state=seed) skf.get_n_splits(x_res, y_res) pipe = gridsearch_pipe(cv=skf).fit(x_res, y_res) gridsearch = pipe[-1] best_params = gridsearch.best_params_ print(best_params) best_score = gridsearch.best_score_ print(best_score) print('%s: Finished' % pu.ctime())
def run_pls(x, y, output_dir, n_iters=10000, scaling='ss1'): p = pls_functions.PLSC(n_iters=n_iters, center_scale=scaling) logging.info('%s: Running permutation tests' % pu.ctime()) pres = p.permutation_tests(x, y) logging.info('%s: Running bootstrap tests' % pu.ctime()) bres = p.bootstrap_tests(x, y) res = {'permutation tests': pres, 'bootstrap_tests': bres} with open(output_dir + '/pls_sleep.pkl', 'wb') as file: pkl.dump(res, file) return pres, bres
def first_level_ppc(phase_amp_file, meg_subj, meg_sess, rois, attn_rois): # First-level analysis, BOLD phase phase coupling version attn_indices = [] for aroi in attn_rois: for r, roi in enumerate(rois): if aroi == roi: attn_indices.append(r) conns = ['%s-%s' % (r1, r2) for r2 in attn_rois for r1 in attn_rois] res = [] for sess in meg_sess: sess_conns = ['%s %s' % (sess, c) for c in conns] sess_res = pd.DataFrame(index=meg_subj, columns=sess_conns) count = 0 for subj in meg_subj: logging.info(' %s: Phase-phase coupling for %s %s' % (pu.ctime(), sess, subj)) f = h5py.File(phase_amp_file) dset = f[subj + '/' + sess + '/BOLD bandpass/phase_data'][...] attn_data = dset[:, attn_indices] f.close() sess_res.loc[subj] = ppc(attn_data) count += 1 res.append(sess_res) return pd.concat(res, axis=1)
def pca_by_band(data, n_iters=1000, res_dir=None): if res_dir is None: res_dir = os.path.dirname(__file__) conn_by_band = split_connectivity_by_band(data) band_results = {} for b in conn_by_band: band_df = conn_by_band[b] print(pu.ctime() + 'Running PCA on %s' % b) # scaled_data = norm_to_ss1(band_df.values) # scaled_data = RobustScaler().fit_transform(band_df.values) scaled_data = StandardScaler().fit_transform(band_df) pca = PCA(.97) pca.fit(scaled_data) band_res = pretty_pca_res(pca) band_results[b] = band_res print(pca.n_components_) del pca perm_res = perm_pca(data=band_df, n_iters=n_iters) p_values = p_from_perm_data(observed_df=band_res, perm_data=perm_res) plot_scree(band_res, pvals=p_values, percent=False, fname=os.path.join(res_dir, '%s_pca_scree.png' % b)) band_res.to_excel(os.path.join(res_dir, '%s_pca_res.xlsx' % b))
def full_matrix_second_level(data_df, output_path=None): logging.info('%s: Running second level for all subjects' % proj_utils.ctime()) full_matrix_res = run_second_level(data_df) if output_path is not None: with open(output_path, 'wb') as f: pkl.dump(full_matrix_res, f)
def type_classification_drop_mixed(ml_data, behavior_data, output_dir, models=None): print( '%s: Running classification on tinnitus type, dropping mixed type subjects' % pu.ctime()) ml_copy = deepcopy(ml_data) if models is None: models = ['extra_trees'] resample_methods = [None, 'over', 'under'] t = pu.convert_tin_to_str( behavior_data['tinnitus_type'].values.astype(float), 'tinnitus_type') t_df = pd.DataFrame(t, index=ml_copy.index) mixed_indices = [i for i, s in enumerate(t) if s == 'PT_and_NBN'] type_data = ml_copy.iloc[mixed_indices] ml_copy.drop(index=type_data.index, inplace=True) t_df.drop(index=type_data.index, inplace=True) target_cleaned = np.ravel(t_df.values) for model in models: for res in resample_methods: eeg_classify(ml_copy, target_cleaned, 'tinnitus_type_no_mixed', model, output_dir, resample=res)
def side_classification_drop_asym(ml_data, behavior_data, output_dir, models=None): print( '%s: Running classification on tinnitus side, dropping asymmetrical subjects' % pu.ctime()) ml_copy = deepcopy(ml_data) if models is None: models = ['extra_trees'] resample_methods = [None, 'over', 'under'] t = pu.convert_tin_to_str( behavior_data['tinnitus_side'].values.astype(float), 'tinnitus_side') t_df = pd.DataFrame(t, index=ml_copy.index) asym_indices = [] for asym in ['Right>Left', 'Left>Right']: asym_indices.extend([i for i, s in enumerate(t) if asym == s]) asym_data = ml_copy.iloc[asym_indices] ml_copy.drop(index=asym_data.index, inplace=True) t_df.drop(index=asym_data.index, inplace=True) target_cleaned = np.ravel(t_df.values) for model in models: for res in resample_methods: eeg_classify(ml_copy, target_cleaned, 'tinnitus_side_no_asym', model, output_dir, resample=res)
def pls_psqi_with_power(sessions, rois, fig_dir, run_check=False): logging.info('%s: Running PLSC on PSQI components with power' % pu.ctime()) if not os.path.isdir(fig_dir): os.mkdir(fig_dir) meg_df = pd.read_excel('../data/MEG_infraslow_power.xlsx', sheet_name='location', index_col=0) sleep_df, sleep_variables = load_psqi_data() if run_check: pres, bres = run_pls(x=meg_df.values, y=sleep_df.values, output_dir=fig_dir) else: logging.info('%s: Loading raw output' % pu.ctime()) with open(fig_dir + '/pls_sleep.pkl', 'rb') as file: res = pkl.load(file) pres = res['permutation tests'] bres = res['bootstrap_tests'] alpha = .001 nv = len(np.where(pres['p_values'] < alpha)[0]) latent_vars = ['LV_%d' % (v + 1) for v in range(nv)] pls_functions.plot_scree(eigs=pres['true_eigs'], pvals=pres['p_values'], alpha=alpha, fname=fig_dir + '/scree.png') behavior_df = pd.DataFrame(bres['y_zscores'][:nv, :], index=latent_vars, columns=sleep_variables) behavior_df.to_excel(fig_dir + '/behavior_res.xlsx') brain_res = organize_brain_sals(bres['x_zscores'], rois, sessions, latent_vars, comp='sign') pu.save_xls(brain_res, fig_dir + '/brain_res.xlsx') conj = brain_res['brain_conjunction'] plot_roi_saliences(rois, conj, fig_dir, maxv=120, create_rois=False) logging.info('%s: Finished' % pu.ctime())
def group_matrices_second_level(data_df, index_dict, output_dir=None): for key in index_dict: logging.info('%s: Running second level for %s' % (proj_utils.ctime(), key)) index_list = index_dict[key] res_df = run_second_level( create_new_df_from_indices(index_list, data_df)) with open(os.path.join(output_dir, '%s.pkl' % key), 'wb') as file: pkl.dump(res_df, file)
def grand_pca(data, res_dir=None): if res_dir is None: res_dir = os.path.dirname(__file__) print(pu.ctime() + 'Running grand PCA') pca = PCA(n_components=.99, whiten=True) zdata = StandardScaler().fit_transform(data) pca.fit(zdata) print(pca.n_components_) true_df = pretty_pca_res(pca) plot_scree(true_df, percent=False, fname=os.path.join(res_dir, 'grand_pca_scree.png')) true_df.to_excel(os.path.join(res_dir, 'grand_pca_res.xlsx'))
def run_ancova(connectivity_data, covariates, where_zero='ind', output_path=None): logging.info('%s: Running second level analysis' % proj_utils.ctime()) intercept = np.zeros(len(covariates.index)) # if where_zero is 'ind': # covariates['intercept'] = intercept # elif where_zero is 'dep': # pass res_df = pd.DataFrame(index=['F', 'P'], columns=list(connectivity_data)) for c, conn_var in enumerate(list(connectivity_data)): if where_zero is 'ind': dep_ = connectivity_data[conn_var].values covariates['intercept'] = intercept ind_ = covariates.values elif where_zero is 'dep': dep_ = intercept covariates['predictor'] = connectivity_data[conn_var].values ind_ = covariates.values model = sm.OLS(dep_, ind_, hasconst=False) results = model.fit() res_df.loc['F'].iloc[c] = results.fvalue res_df.loc['P'].iloc[c] = results.f_pvalue if output_path is not None: with open(output_path, 'wb') as file: pkl.dump(res_df, file) logging.info('%s: Finished second level analysis' % proj_utils.ctime()) return res_df
def run_graph_theory(band, filelist, subjects, columns, outpath): thresholds = [0, .1, .2, .3, .4, .5, .6, .7, .8, .9] for thresh in thresholds: logging.info('%s: Running %s at %.2f' % (proj_utils.ctime(), band, thresh)) s = 0 graph_df = pd.DataFrame(index=subjects, columns=columns) for adj_file in filelist: if band in adj_file: with open(adj_file, 'rb') as f: data_df = pkl.load(f) conn_res = calc_graph_measures(clean_df_to_numpy(data_df), thresh) for r, res_key in enumerate(conn_res): graph_df.iloc[s, r] = conn_res[res_key] s += 1 outfile = os.path.join(outpath, 'graph_results_%s_%.2f_thresh.pkl' % (band, thresh)) with open(outfile, 'wb') as f: pkl.dump(graph_df, f)
def first_level_pac(phase_amp_file, meg_subj, meg_sess, rois, attn_rois): # First-level analysis, BOLD - Alpha phase amplitude coupling version attn_indices = [] for aroi in attn_rois: for r, roi in enumerate(rois): if aroi == roi: attn_indices.append(r) # f = h5py.File(phase_amp_file) # subj_level = f[meg_subj[0]] # sess_level = subj_level[meg_sess[0]] # band_level = f[meg_subj[0] + '/' + meg_sess[0] + '/BOLD bandpass'] # sess_level['BOLD bandpass'] # d_level = band_level['phase_data'] # # print(list(band_level)) # print(d_level[...].shape) # f.close() conns = ['%s_%s' % (r1, r2) for r2 in attn_rois for r1 in attn_rois] res = [] for sess in meg_sess: sess_conns = ['%s %s' % (sess, c) for c in conns] sess_res = pd.DataFrame(index=meg_subj, columns=sess_conns) for subj in meg_subj: logging.info(' %s: Phase-amplitude coupling for %s %s' % (pu.ctime(), sess, subj)) f = h5py.File(phase_amp_file) bold_dset = f[subj + '/' + sess + '/BOLD bandpass/phase_data'][...] bold_data = bold_dset[:, attn_indices] alpha_dset = f[subj + '/' + sess + '/Alpha/amplitude_data'][...] alpha_data = alpha_dset[:, attn_indices] f.close() res_df = pac(bold_data, alpha_data, attn_rois) sess_res.loc[subj] = res_df.loc['pac'].values res.append(sess_res) return pd.concat(res, axis=1)
score_df = find_n_components(data, step=5) score_df.to_excel('./pca_cross_val_scores_test.xlsx') def grand_pca(data, res_dir=None): if res_dir is None: res_dir = os.path.dirname(__file__) print(pu.ctime() + 'Running grand PCA') pca = PCA(n_components=.99, whiten=True) zdata = StandardScaler().fit_transform(data) pca.fit(zdata) print(pca.n_components_) true_df = pretty_pca_res(pca) plot_scree(true_df, percent=False, fname=os.path.join(res_dir, 'grand_pca_scree.png')) true_df.to_excel(os.path.join(res_dir, 'grand_pca_res.xlsx')) if __name__ == "__main__": print(pu.ctime() + 'Loading data') data = pu.load_connectivity_data() res_dir = os.path.abspath('./../results/pca') if not os.path.isdir(res_dir): os.mkdir(res_dir) grand_pca(data) pca_by_band(data, n_iters=0, res_dir=res_dir)
output_dir = './../data/eeg_regression/extra_trees/' if not os.path.isdir(output_dir): os.mkdir(output_dir) behavior_data, conn_data = pu.load_data_full_subjects() conn_data.astype(float) categorical_variables = [ 'smoking', 'deanxit_antidepressants', 'rivotril_antianxiety', 'sex' ] categorical_data = behavior_data[categorical_variables] dummy_coded_categorical = pu.dummy_code_binary(categorical_data) covariate_data = pd.concat([behavior_data['age'], dummy_coded_categorical], axis=1) ml_data = pd.concat([conn_data, covariate_data], axis=1) target = behavior_data['distress_TQ'].values.astype(float) targets = [ 'loudness_VAS', 'distress_TQ', 'distress_VAS', 'anxiety_score', 'depression_score' ] for target in targets: target_vect = behavior_data[target].values.astype(float) logging.info('%s Running regression on %s' % (pu.ctime(), target)) eeg_regression(eeg_data=ml_data, target_data=target_vect, target_type=target, outdir=output_dir)
def eeg_multilabel_classify(ml_data, target_data, target_type, model, outdir): target_outdir = join(outdir, target_type) if not isdir(target_outdir): mkdir(target_outdir) feature_names = list(ml_data) # Create score dataframes, k-fold splitter n_splits = 10 skf = model_selection.StratifiedKFold(n_splits=n_splits, random_state=seed) # Oversample connectivity data, apply k-fold splitter """Note: LP-transformation has to be applied for resampling, even though we're not treating it as a OVR problem""" x_res, y_res, y_res_lp_transformed = resample_multilabel( ml_data, target_data) skf.get_n_splits(x_res, y_res_lp_transformed) fold_count = 0 classifier_objects, classifier_coefficients = {}, {} anx_balanced_acc, anx_chance_acc, anx_f1_scores = [], [], [] dep_balanced_acc, dep_chance_acc, dep_f1_scores = [], [], [] anx_cm_dict, anx_norm_cm_dict, dep_cm_dict, dep_norm_cm_dict = {}, {}, {}, {} for train_idx, test_idx in skf.split(x_res, y_res_lp_transformed): fold_count += 1 print('%s: Running FOLD %d for %s' % (pu.ctime(), fold_count, target_type)) foldname = 'Fold %02d' % fold_count # Stratified k-fold splitting x_train, x_test = x_res[train_idx], x_res[test_idx, :] y_train, y_test = y_res[train_idx], y_res[test_idx, :] if "categorical_sex_male" in feature_names: continuous_features = [ f for f in feature_names if 'categorical' not in f ] continuous_indices = [ ml_data.columns.get_loc(cont) for cont in continuous_features ] categorical_features = [ f for f in feature_names if 'categorical' in f ] categorical_indices = [ ml_data.columns.get_loc(cat) for cat in categorical_features ] x_train_feature_selected, x_test_feature_selected, cleaned_features = feature_selection_with_covariates( x_train, x_test, y_train, continuous_indices, categorical_indices, feature_names) else: x_train_feature_selected, x_test_feature_selected, cleaned_features = feature_selection_without_covariates( x_train, x_test, y_train, feature_names) if model is 'extra_trees': predicted, feature_importances, clf = extra_trees( x_train_feature_selected, y_train, x_test_feature_selected, cleaned_features) classifier_coefficients[foldname] = feature_importances elif model is 'knn': predicted, clf = knn(x_train_feature_selected, y_train, x_test_feature_selected) classifier_objects[foldname] = clf # Anxiety predictions yt, pred = y_test[:, 0], predicted[:, 0] balanced, chance, f1 = calc_scores(yt, pred) anx_balanced_acc.append(balanced) anx_chance_acc.append(chance) anx_f1_scores.append(f1) # Calculating fold confusion matrix anx_cm = metrics.confusion_matrix(yt, pred) anx_normalized_cm = anx_cm.astype('float') / anx_cm.sum( axis=1)[:, np.newaxis] classes = [] for subclass_list in clf.classes_: classes.extend(list(subclass_list)) anx_classes = [c for c in classes if 'anxiety' in c] dep_classes = [c for c in classes if 'depression' in c] anx_cm_dict[foldname] = pd.DataFrame(anx_cm, index=anx_classes, columns=anx_classes) anx_norm_cm_dict[foldname] = pd.DataFrame(anx_normalized_cm, index=anx_classes, columns=anx_classes) # Depression predictions yt, pred = y_test[:, 1], predicted[:, 1] balanced, chance, f1 = calc_scores(yt, pred) dep_balanced_acc.append(balanced) dep_chance_acc.append(chance) dep_f1_scores.append(f1) # Calculating fold confusion matrix dep_cm = metrics.confusion_matrix(yt, pred) dep_normalized_cm = dep_cm.astype('float') / dep_cm.sum( axis=1)[:, np.newaxis] dep_cm_dict[foldname] = pd.DataFrame(dep_cm, index=dep_classes, columns=dep_classes) dep_norm_cm_dict[foldname] = pd.DataFrame(dep_normalized_cm, index=dep_classes, columns=dep_classes) # Saving anxiety performance scores anx_f1_array = np.asarray(anx_f1_scores) anx_f1_class_averages = np.mean(anx_f1_array, axis=0) anx_f1_data = np.vstack((anx_f1_array, anx_f1_class_averages)) balanced_acc_avg = np.mean(anx_balanced_acc) chance_acc_avg = np.mean(anx_chance_acc) anx_balanced_acc.append(balanced_acc_avg) anx_chance_acc.append(chance_acc_avg) accuracy_data = np.asarray([anx_balanced_acc, anx_chance_acc]).T rownames = ['Fold %02d' % (n + 1) for n in range(n_splits)] rownames.append('Average') score_df = pd.DataFrame(data=accuracy_data, index=rownames, columns=['Balanced accuracy', 'Chance accuracy']) f1_df = pd.DataFrame(data=np.asarray(anx_f1_data), index=rownames, columns=anx_classes) scores_dict = {'accuracy scores': score_df, 'f1 scores': f1_df} pu.save_xls(scores_dict, join(target_outdir, 'anxiety_performance.xlsx')) # Saving performance scores dep_f1_array = np.asarray(dep_f1_scores) dep_f1_class_averages = np.mean(dep_f1_array, axis=0) dep_f1_data = np.vstack((dep_f1_array, dep_f1_class_averages)) balanced_acc_avg = np.mean(dep_balanced_acc) chance_acc_avg = np.mean(dep_chance_acc) dep_balanced_acc.append(balanced_acc_avg) dep_chance_acc.append(chance_acc_avg) accuracy_data = np.asarray([dep_balanced_acc, dep_chance_acc]).T rownames = ['Fold %02d' % (n + 1) for n in range(n_splits)] rownames.append('Average') score_df = pd.DataFrame(data=accuracy_data, index=rownames, columns=['Balanced accuracy', 'Chance accuracy']) f1_df = pd.DataFrame(data=np.asarray(dep_f1_data), index=rownames, columns=dep_classes) scores_dict = {'accuracy scores': score_df, 'f1 scores': f1_df} pu.save_xls(scores_dict, join(target_outdir, 'depression_performance.xlsx')) # Saving coefficients if bool(classifier_coefficients): pu.save_xls(classifier_coefficients, join(target_outdir, 'coefficients.xlsx')) # Saving confusion matrices pu.save_xls(anx_cm_dict, join(target_outdir, 'anxiety_confusion_matrices.xlsx')) pu.save_xls( anx_norm_cm_dict, join(target_outdir, 'anxiety_confusion_matrices_normalized.xlsx')) pu.save_xls(dep_cm_dict, join(target_outdir, 'depression_confusion_matrices.xlsx')) pu.save_xls( dep_norm_cm_dict, join(target_outdir, 'depression_confusion_matrices_normalized.xlsx')) # Saving classifier object with open(join(target_outdir, 'classifier_object.pkl'), 'wb') as file: pkl.dump(classifier_objects, file)
pu.save_xls( anx_norm_cm_dict, join(target_outdir, 'anxiety_confusion_matrices_normalized.xlsx')) pu.save_xls(dep_cm_dict, join(target_outdir, 'depression_confusion_matrices.xlsx')) pu.save_xls( dep_norm_cm_dict, join(target_outdir, 'depression_confusion_matrices_normalized.xlsx')) # Saving classifier object with open(join(target_outdir, 'classifier_object.pkl'), 'wb') as file: pkl.dump(classifier_objects, file) print('%s: Loading data' % pu.ctime()) behavior_data, conn_data = pu.load_data_full_subjects() conn_data.astype(float) categorical_variables = [ 'smoking', 'deanxit_antidepressants', 'rivotril_antianxiety', 'sex' ] categorical_data = behavior_data[categorical_variables] dummy_coded_categorical = pu.dummy_code_binary(categorical_data) covariate_data = pd.concat([behavior_data['age'], dummy_coded_categorical], axis=1) ml_data = pd.concat([conn_data, covariate_data], axis=1) multilabel_models = ['extra_trees', 'knn'] for model in multilabel_models: output_dir = './../data/%s/' % model
ax.tick_params(axis='both', labelsize='large') title = "ROI distribution of Cronbach's alpha values" ax.set_title(title, fontsize='xx-large') if fname is not None: fig.savefig(fname, bbox_inches='tight') if __name__ == "__main__": import mPLSC_functions as mf import sys sys.path.append("..") import proj_utils as pu print('%s: Starting...' % pu.ctime()) pdir = pu._get_proj_dir() pdObj = pu.proj_data() rois = pdObj.roiLabels meg_subj, meg_sess = pdObj.get_meg_metadata() mri_subj, mri_sess = pdObj.get_mri_metadata() subj_overlap = [s for s in mri_subj if s in meg_subj] meg_path = pdir + '/data/downsampled_MEG_truncated.hdf5' mri_path = pdir + '/data/multimodal_HCP.hdf5' roi_path = pdir + '/data/glasser_atlas/' fig_path = pdir + '/figures/cron_alpha' print('%s: Extracting average power in each ROI and subject, MRI' % pu.ctime()) mri_data = _extract_average_power(mri_path, mri_sess, subj_overlap, rois,
import matplotlib.pyplot as plt import sys sys.path.append("..") import proj_utils as pu def cron_alpha(array): k = array.shape[1] #Columns are the groups variances_sum = np.sum(np.var(array, axis=0, ddof=1)) variances_total = np.var(np.sum(array, axis=1), ddof=1) return (k / (k - 1)) * (1 - (variances_sum / variances_total)) print('%s: Starting' % pu.ctime()) print('%s: Getting metadata, parameters' % pu.ctime()) pdir = pu._get_proj_dir() pdObj = pu.proj_data() meg_subj, meg_sess = pdObj.get_meg_metadata() mri_subj, mri_sess = pdObj.get_mri_metadata() subj_overlap = [s for s in mri_subj if s in meg_subj] pData = pdObj.get_data() rois = pData['roiLabels'] band_dict = pData['bands'] slow_bands = ['BOLD', 'Slow 4', 'Slow 3', 'Slow 2', 'Slow 1'] #rows supra_bands = ['Delta', 'Theta', 'Alpha', 'Beta', 'Gamma'] #cols
average_power = np.mean(fft_power, axis=0) session_data.append(average_power) session_df = pd.DataFrame(np.asarray(session_data), index=subjects, columns=rois) power_data[sess] = session_df return power_data import sys sys.path.append("..") import proj_utils as pu print('%s: Starting...' % pu.ctime()) pdir = pu._get_proj_dir() pdObj = pu.proj_data() rois = pdObj.roiLabels meg_subj, meg_sess = pdObj.get_meg_metadata() mri_subj, mri_sess = pdObj.get_mri_metadata() subj_overlap = [s for s in mri_subj if s in meg_subj] meg_path = pdir + '/data/downsampled_MEG_truncated.hdf5' mri_path = pdir + '/data/multimodal_HCP.hdf5' roi_path = pdir + '/data/glasser_atlas/' fig_path = pdir + '/figures/cron_alpha' print('%s: BP - Extracting average power in each ROI and subject, MRI' % pu.ctime()) mri_data = _extract_average_power(mri_path, mri_sess, subj_overlap, rois,
def calc_phase_amp_power(how='location'): p_data = pu.proj_data() subjects, sessions = p_data.get_meg_metadata() rois = p_data.roiLabels fs = 500 if how is 'location': df_list = [] for session in sessions: session_df = pd.DataFrame(index=subjects) for subject in subjects: prog = "%s - %s" % (session, subject) print('%s: Calculating infraslow power for %s with location' % (pu.ctime(), prog)) database = h5py.File('../data/multimodal_HCP.hdf5', 'r+') dset = database[subject + '/MEG/' + session + '/timeseries'][...] for ROIindex in range(len(rois)): data = dset[:, ROIindex] label = rois[ROIindex] # Get real amplitudes of FFT (only in postive frequencies) # Squared to get power fft_power = np.absolute(np.fft.rfft(data)) ** 2 # Get frequencies for amplitudes in Hz fft_freq = np.fft.rfftfreq(len(data), 1.0 / fs) infraslow_band = (.01, .1) # ('BOLD bandpass', (.01, .1)) freq_ix = np.where((fft_freq >= infraslow_band[0]) & (fft_freq <= infraslow_band[1]))[0] colname = '%s %s' % (session, label) if colname not in session_df: session_df[colname] = np.nan avg_power = np.mean(fft_power[freq_ix]) session_df.loc[subject][colname] = avg_power database.close() df_list.append(session_df) grand_df = pd.concat(df_list, axis=1) return grand_df elif how is 'bandpass': df_list = [] for sess in sessions: session_colnames = ['%s %s' % (sess, r) for r in rois] session_df = pd.DataFrame(index=subjects, columns=session_colnames) for subj in subjects: prog = "%s - %s" % (sess, subj) print('%s: Calculating infraslow power for %s with bandpass' % (pu.ctime(), prog)) f = h5py.File('../data/multimodal_HCP.hdf5', 'r') data = f[subj + '/MEG/' + sess + '/timeseries'][...] f.close() data = _butter_filter(data, fs=500, cutoffs=[.01, .1]) fft_power = np.absolute(np.fft.rfft(data, axis=0)) ** 2 average_power = np.mean(fft_power, axis=0) session_df.loc[subj] = average_power df_list.append(session_df) grand_df = pd.concat(df_list, axis=1) return grand_df elif how is 'truncated': df_list = [] for sess in sessions: session_colnames = ['%s %s' % (sess, r) for r in rois] session_df = pd.DataFrame(index=subjects, columns=session_colnames) for subj in subjects: f = h5py.File('../data/downsampled_MEG_truncated.hdf5', 'r') data = f[subj + '/MEG/' + sess + '/resampled_truncated'][...] f.close() data = _butter_filter(data, fs=500, cutoffs=[.01, .1]) fft_power = np.absolute(np.fft.rfft(data, axis=0)) ** 2 average_power = np.mean(fft_power, axis=0) session_df.loc[subj] = average_power df_list.append(session_df) grand_df = pd.concat(df_list, axis=1) return grand_df print('%s: Finished' % pu.ctime())
tables[table_name] = cfc_table if outfile is not None: mf.save_xls(tables, outfile) return tables if __name__ == '__main__': from boredStats import pls_tools import sys sys.path.append("..") import proj_utils as pu print('%s: Loading data' % pu.ctime()) pdir = pu._get_proj_dir() ddir = pdir + '/data/' roi_path = ddir + '/glasser_atlas/' fig_path = pdir + '/figures/mPLSC_delta_theta/' pdObj = pu.proj_data() rois = pdObj.roiLabels colors = pdObj.colors meg_subj, meg_sessions = pdObj.get_meg_metadata() mri_subj, mri_sess = pdObj.get_mri_metadata() subj = [s for s in mri_subj if s in meg_subj] meg_sess = ['Session1', 'Session2', 'Session3'] pls_path = ddir + 'mPLSC_delta_theta_cfc.pkl' # check_0 = input('Run mPLSC? y/n ')
Created on Mon Mar 25 14:39:26 2019 """ import os import h5py import numpy as np import pandas as pd import pickle as pkl import mPLSC_functions as mf from boredStats import pls_tools import sys sys.path.append("..") import proj_utils as pu print('%s: Loading data' % pu.ctime()) pdir = pu._get_proj_dir() pdObj = pu.proj_data() rois = pdObj.roiLabels colors = pdObj.colors meg_subj, meg_sessions = pdObj.get_meg_metadata() mri_subj, mri_sess = pdObj.get_mri_metadata() subjects = [s for s in mri_subj if s in meg_subj] bands = ['Delta', 'Theta', 'Alpha', 'Beta', 'Gamma'] meg_sess = ['Session1', 'Session2', 'Session3'] ddir = pdir + '/data' roi_path = ddir + '/glasser_atlas/' fig_path = pdir + '/figures/mPLSC_cfc/'
meg_list = [meg_data[sess] for sess in list(meg_data)] meg_df = pd.concat(meg_list, axis=1) x = meg_df.values sleep_variables = [ 'PSQI_Comp1', 'PSQI_Comp2', 'PSQI_Comp3', 'PSQI_Comp4', 'PSQI_Comp5', 'PSQI_Comp6', 'PSQI_Comp7' ] behavior_raw = pd.read_excel('../data/hcp_behavioral.xlsx', index_col=0, sheet_name='cleaned') sleep_df = behavior_raw[sleep_variables] y = sleep_df.values.astype(float) logging.info('%s: Running PLSC' % pu.ctime()) p = PLSC(n_iters=1000, center_scale='ss1') # pres = p.permutation_tests(x, y) # # eigs = pres['true_eigs'] # print(eigs) # pvals = pres['p_values'] # print(pvals) # plot_scree(eigs=eigs, pvals=pvals) bres = p.bootstrap_tests(x, y) print(bres['y_zscores']) print(bres['x_zscores']) logging.info('%s: Finished' % pu.ctime())
pdObj = pu.proj_data() rois = pdObj.roiLabels colors = pdObj.colors meg_subj, meg_sessions = pdObj.get_meg_metadata() print(len(meg_subj)) mri_subj, mri_sess = pdObj.get_mri_metadata() print(len(mri_subj)) subject_overlap = [s for s in mri_subj if s in meg_subj] output_dir = ddir + '/mPLSC/' alpha = .001 z_test = 0 output_file = ddir + '/mPLSC/mPLSC_power_all_sessions.pkl' # '/mPLSC/mPLSC_power_all_sessions_sleep_only.pkl'# '/mPLSC/mPLSC_power_all_sessions_sustained_attention.pkl' check = input('Run multitable PLS-C? y/n ') if check is 'y': print('%s: Building subtables of power data for MEG' % pu.ctime()) meg_data = mf.extract_average_power(hdf5_file=ddir + '/downsampled_MEG_truncated.hdf5', sessions=meg_sessions, subjects=subject_overlap, rois=rois, image_type='MEG', bp=True) x_tables = [meg_data[session] for session in list(meg_data)] print('%s: Building subtables of behavior data' % pu.ctime()) behavior_metadata = pd.read_csv(ddir + '/sustained_attention_vars.txt', delimiter='\t', header=None) behavior_metadata.rename(dict(zip([0, 1], ['category', 'name'])), axis='columns',
for roi in rois: hdf5 = h5py.File(hdf5_path, 'r') rval_path = sess + '/' + subj + '/' + roi + '/' + 'r_vals' dset = hdf5.get(rval_path).value within_subj_data.append(dset[:, :]) hdf5.close() within_subj_array = np.arctanh(np.asarray(within_subj_data)) between_subj_data.append(within_subj_array) between_subj_array = np.asarray(between_subj_data) return between_subj_array print('%s: Getting metadata, parameters' % pu.ctime()) pdir = pu._get_proj_dir() pdObj = pu.proj_data() meg_subj, meg_sess = pdObj.get_meg_metadata() mri_subj, mri_sess = pdObj.get_mri_metadata() subj_overlap = [s for s in mri_subj if s in meg_subj] slow_bands = ['BOLD', 'Slow 4', 'Slow 3', 'Slow 2', 'Slow 1'] #rows reg_bands = ['Delta', 'Theta', 'Alpha', 'Beta', 'Gamma'] #cols pData = pdObj.get_data() rois = pData['roiLabels'] print('%s: Getting behavior data' % pu.ctime()) with open(pdir + '/data/cog_emotion_variables.txt', 'r') as boi:
def _plot_violin(dataframe): sns.set_style('darkgrid') sns.set_context('notebook', font_scale=2) fig = sns.catplot(x='Phase bands', y='Cross-Frequency Coupling', height=15, aspect=1.78, data=dataframe, hue='Amplitude bands', kind='violin') fig.set(yscale='log') fig.set(ylim=(.001, .1)) print('%s: Getting metadata, parameters' % pu.ctime()) pdir = pu._get_proj_dir() pdObj = pu.proj_data() meg_subj, meg_sess = pdObj.get_meg_metadata() mri_subj, mri_sess = pdObj.get_mri_metadata() subj_overlap = [s for s in mri_subj if s in meg_subj] pData = pdObj.get_data() rois = pData['roiLabels'] band_dict = pData['bands'] slow_bands = ['BOLD', 'Slow 4', 'Slow 3', 'Slow 2', 'Slow 1'] #rows reg_bands = ['Delta', 'Theta', 'Alpha', 'Beta', 'Gamma'] #cols #--- Infraslow results ---#
for reg_index, reg in enumerate(reg_bands): reg_group = meg_dataset.get(reg) reg_ts = reg_group.get('amplitude_data')[:, roi_index] r_val, p_val = pac.circCorr(slow_ts, reg_ts) r_mat[reg_index] = r_val p_mat[reg_index] = p_val return r_mat, p_mat import sys sys.path.append("..") import proj_utils as pu start = pu.ctime() print('%s: Starting' % pu.ctime()) print('%s: Getting metadata, parameters' % pu.ctime()) pdir = pu._get_proj_dir() pdObj = pu.proj_data() meg_subj, meg_sess = pdObj.get_meg_metadata() mri_subj, mri_sess = pdObj.get_mri_metadata() subj_overlap = [s for s in mri_subj if s in meg_subj] pData = pdObj.get_data() rois = pData['roiLabels'] database = pData['database'] band_dict = pData['bands'] min_meg_length = 111980 #118088