Example #1
0
def behavior_classification(behavior='tinnitus_side', covariates=True):
    ml_data, side_data = load_data(behavior, covariates=covariates)
    models = ['SVM', 'ExtraTrees', 'KNN']
    resample_methods = [None, 'under', 'over', 'smote']

    for model in models:
        for resamp in resample_methods:
            prog = '%s %s' % (model, resamp)
            print('%s: Running %s classification with %s' %
                  (pu.ctime(), behavior, prog))
            EC = EEG_Classifier(n_splits=10,
                                seed=seed,
                                classifier_type=model,
                                resample_type=resamp)
            scores, confusion_matrices, features, grid_df = EC.classify(
                eeg_data=ml_data, target_data=side_data)
            print('%s: Saving output for %s' % (pu.ctime(), prog))
            save_output(output_dir=output_dir,
                        behavior=behavior,
                        scores=scores,
                        confusion_matrices=confusion_matrices,
                        features=features,
                        grid_df=grid_df,
                        model=model,
                        resamp_method=resamp,
                        covariates=covariates)
Example #2
0
def main():
    logging.info('%s: Starting script' % proj_utils.ctime())

    bands = ['delta', 'theta', 'alpha', 'beta', 'gamma']
    data_df = proj_utils.load_connectivity_data(drop_behavior=True)

    dpath = os.path.abspath('./../data/subject_adjacency_matrices/')
    if not os.path.isdir(dpath):
        os.mkdir(dpath)

    # print('%s: Creating adjacency dicts' % proj_utils.ctime())
    # adj_dict = create_adjacency_dict(data_df, bands)

    # print('%s: Creating subject adjacency matrices' % proj_utils.ctime())
    # rois = parse_roi_names(list(data_df))
    # create_subj_adjacency_mats(adj_dict, bands, rois, dpath)

    test_res = test_graph_functions()

    logging.info('%s: Running graph theory analyses' % proj_utils.ctime())
    columns = list(test_res)
    subjects = np.arange(0, len(data_df.index))
    outpath = './../data/graph_theory_res/'
    if not os.path.isdir(outpath):
        os.mkdir(outpath)

    for band in bands:
        filelist = sorted([os.path.join(dpath, f) for f in os.listdir(dpath) if band in f])
        run_graph_theory(band, filelist, subjects, columns, outpath)

    logging.info('%s: Finished' % proj_utils.ctime())
Example #3
0
def test_gridsearch():
    def gridsearch_pipe(cv=None):
        from sklearn.pipeline import Pipeline
        from sklearn.preprocessing import StandardScaler
        from sklearn.feature_selection import SelectFromModel
        from sklearn.ensemble import ExtraTreesClassifier
        from sklearn.model_selection import GridSearchCV
        from sklearn.svm import SVC
        kernel_range = ('linear', 'rbf')  # , 'poly']
        c_range = [1, 10,
                   100]  # np.arange(start=1, stop=100, step=10, dtype=int)
        # gamma_range = np.arange(.01, 1, .01)
        param_grid = {
            'C': c_range
        }  # , 'gamma': gamma_range}  # , 'kernel': kernel_range}

        pipe = Pipeline([
            ('preprocess_data', StandardScaler()),
            ('feature_selection',
             SelectFromModel(ExtraTreesClassifier(random_state=13),
                             threshold="2*mean")),
            ('grid',
             GridSearchCV(SVC(kernel='rbf'),
                          param_grid=param_grid,
                          cv=cv,
                          scoring='balanced_accuracy'))
        ])

        return pipe

    print('%s: Loading data' % pu.ctime())
    behavior_data, conn_data = pu.load_data_full_subjects()
    ml_data_without_covariates = conn_data.astype(float)

    side_data = pu.convert_tin_to_str(
        behavior_data['tinnitus_side'].values.astype(float), 'tinnitus_side')

    resampler = SMOTE(sampling_strategy='not majority', random_state=seed)

    x_res, y_res = resampler.fit_resample(ml_data_without_covariates,
                                          side_data)

    n_splits = 10
    skf = model_selection.StratifiedKFold(n_splits=n_splits, random_state=seed)
    skf.get_n_splits(x_res, y_res)

    pipe = gridsearch_pipe(cv=skf).fit(x_res, y_res)
    gridsearch = pipe[-1]
    best_params = gridsearch.best_params_
    print(best_params)
    best_score = gridsearch.best_score_
    print(best_score)

    print('%s: Finished' % pu.ctime())
def run_pls(x, y, output_dir, n_iters=10000, scaling='ss1'):
    p = pls_functions.PLSC(n_iters=n_iters, center_scale=scaling)
    logging.info('%s: Running permutation tests' % pu.ctime())
    pres = p.permutation_tests(x, y)
    logging.info('%s: Running bootstrap tests' % pu.ctime())
    bres = p.bootstrap_tests(x, y)

    res = {'permutation tests': pres, 'bootstrap_tests': bres}
    with open(output_dir + '/pls_sleep.pkl', 'wb') as file:
        pkl.dump(res, file)

    return pres, bres
Example #5
0
def first_level_ppc(phase_amp_file, meg_subj, meg_sess, rois, attn_rois):
    # First-level analysis, BOLD phase phase coupling version
    attn_indices = []
    for aroi in attn_rois:
        for r, roi in enumerate(rois):
            if aroi == roi:
                attn_indices.append(r)

    conns = ['%s-%s' % (r1, r2) for r2 in attn_rois for r1 in attn_rois]
    res = []
    for sess in meg_sess:
        sess_conns = ['%s %s' % (sess, c) for c in conns]
        sess_res = pd.DataFrame(index=meg_subj, columns=sess_conns)
        count = 0
        for subj in meg_subj:
            logging.info(' %s: Phase-phase coupling for %s %s' %
                         (pu.ctime(), sess, subj))
            f = h5py.File(phase_amp_file)
            dset = f[subj + '/' + sess + '/BOLD bandpass/phase_data'][...]
            attn_data = dset[:, attn_indices]
            f.close()
            sess_res.loc[subj] = ppc(attn_data)
            count += 1
        res.append(sess_res)
    return pd.concat(res, axis=1)
def pca_by_band(data, n_iters=1000, res_dir=None):
    if res_dir is None:
        res_dir = os.path.dirname(__file__)
    conn_by_band = split_connectivity_by_band(data)
    band_results = {}

    for b in conn_by_band:
        band_df = conn_by_band[b]
        print(pu.ctime() + 'Running PCA on %s' % b)

        # scaled_data = norm_to_ss1(band_df.values)
        # scaled_data = RobustScaler().fit_transform(band_df.values)
        scaled_data = StandardScaler().fit_transform(band_df)
        pca = PCA(.97)
        pca.fit(scaled_data)

        band_res = pretty_pca_res(pca)
        band_results[b] = band_res
        print(pca.n_components_)
        del pca

        perm_res = perm_pca(data=band_df, n_iters=n_iters)
        p_values = p_from_perm_data(observed_df=band_res, perm_data=perm_res)

        plot_scree(band_res,
                   pvals=p_values,
                   percent=False,
                   fname=os.path.join(res_dir, '%s_pca_scree.png' % b))
        band_res.to_excel(os.path.join(res_dir, '%s_pca_res.xlsx' % b))
def full_matrix_second_level(data_df, output_path=None):
    logging.info('%s: Running second level for all subjects' %
                 proj_utils.ctime())
    full_matrix_res = run_second_level(data_df)
    if output_path is not None:
        with open(output_path, 'wb') as f:
            pkl.dump(full_matrix_res, f)
Example #8
0
def type_classification_drop_mixed(ml_data,
                                   behavior_data,
                                   output_dir,
                                   models=None):
    print(
        '%s: Running classification on tinnitus type, dropping mixed type subjects'
        % pu.ctime())
    ml_copy = deepcopy(ml_data)
    if models is None:
        models = ['extra_trees']
    resample_methods = [None, 'over', 'under']
    t = pu.convert_tin_to_str(
        behavior_data['tinnitus_type'].values.astype(float), 'tinnitus_type')
    t_df = pd.DataFrame(t, index=ml_copy.index)
    mixed_indices = [i for i, s in enumerate(t) if s == 'PT_and_NBN']

    type_data = ml_copy.iloc[mixed_indices]
    ml_copy.drop(index=type_data.index, inplace=True)
    t_df.drop(index=type_data.index, inplace=True)
    target_cleaned = np.ravel(t_df.values)
    for model in models:
        for res in resample_methods:
            eeg_classify(ml_copy,
                         target_cleaned,
                         'tinnitus_type_no_mixed',
                         model,
                         output_dir,
                         resample=res)
Example #9
0
def side_classification_drop_asym(ml_data,
                                  behavior_data,
                                  output_dir,
                                  models=None):
    print(
        '%s: Running classification on tinnitus side, dropping asymmetrical subjects'
        % pu.ctime())
    ml_copy = deepcopy(ml_data)
    if models is None:
        models = ['extra_trees']
    resample_methods = [None, 'over', 'under']
    t = pu.convert_tin_to_str(
        behavior_data['tinnitus_side'].values.astype(float), 'tinnitus_side')
    t_df = pd.DataFrame(t, index=ml_copy.index)
    asym_indices = []
    for asym in ['Right>Left', 'Left>Right']:
        asym_indices.extend([i for i, s in enumerate(t) if asym == s])

    asym_data = ml_copy.iloc[asym_indices]
    ml_copy.drop(index=asym_data.index, inplace=True)
    t_df.drop(index=asym_data.index, inplace=True)
    target_cleaned = np.ravel(t_df.values)

    for model in models:
        for res in resample_methods:
            eeg_classify(ml_copy,
                         target_cleaned,
                         'tinnitus_side_no_asym',
                         model,
                         output_dir,
                         resample=res)
def pls_psqi_with_power(sessions, rois, fig_dir, run_check=False):
    logging.info('%s: Running PLSC on PSQI components with power' % pu.ctime())
    if not os.path.isdir(fig_dir):
        os.mkdir(fig_dir)

    meg_df = pd.read_excel('../data/MEG_infraslow_power.xlsx',
                           sheet_name='location',
                           index_col=0)
    sleep_df, sleep_variables = load_psqi_data()

    if run_check:
        pres, bres = run_pls(x=meg_df.values,
                             y=sleep_df.values,
                             output_dir=fig_dir)
    else:
        logging.info('%s: Loading raw output' % pu.ctime())
        with open(fig_dir + '/pls_sleep.pkl', 'rb') as file:
            res = pkl.load(file)
        pres = res['permutation tests']
        bres = res['bootstrap_tests']

    alpha = .001
    nv = len(np.where(pres['p_values'] < alpha)[0])
    latent_vars = ['LV_%d' % (v + 1) for v in range(nv)]
    pls_functions.plot_scree(eigs=pres['true_eigs'],
                             pvals=pres['p_values'],
                             alpha=alpha,
                             fname=fig_dir + '/scree.png')

    behavior_df = pd.DataFrame(bres['y_zscores'][:nv, :],
                               index=latent_vars,
                               columns=sleep_variables)
    behavior_df.to_excel(fig_dir + '/behavior_res.xlsx')
    brain_res = organize_brain_sals(bres['x_zscores'],
                                    rois,
                                    sessions,
                                    latent_vars,
                                    comp='sign')
    pu.save_xls(brain_res, fig_dir + '/brain_res.xlsx')

    conj = brain_res['brain_conjunction']
    plot_roi_saliences(rois, conj, fig_dir, maxv=120, create_rois=False)

    logging.info('%s: Finished' % pu.ctime())
def group_matrices_second_level(data_df, index_dict, output_dir=None):
    for key in index_dict:
        logging.info('%s: Running second level for %s' %
                     (proj_utils.ctime(), key))
        index_list = index_dict[key]
        res_df = run_second_level(
            create_new_df_from_indices(index_list, data_df))

        with open(os.path.join(output_dir, '%s.pkl' % key), 'wb') as file:
            pkl.dump(res_df, file)
def grand_pca(data, res_dir=None):
    if res_dir is None:
        res_dir = os.path.dirname(__file__)
    print(pu.ctime() + 'Running grand PCA')
    pca = PCA(n_components=.99, whiten=True)
    zdata = StandardScaler().fit_transform(data)
    pca.fit(zdata)
    print(pca.n_components_)
    true_df = pretty_pca_res(pca)

    plot_scree(true_df,
               percent=False,
               fname=os.path.join(res_dir, 'grand_pca_scree.png'))
    true_df.to_excel(os.path.join(res_dir, 'grand_pca_res.xlsx'))
def run_ancova(connectivity_data,
               covariates,
               where_zero='ind',
               output_path=None):
    logging.info('%s: Running second level analysis' % proj_utils.ctime())
    intercept = np.zeros(len(covariates.index))

    # if where_zero is 'ind':
    #     covariates['intercept'] = intercept
    # elif where_zero is 'dep':
    #     pass

    res_df = pd.DataFrame(index=['F', 'P'], columns=list(connectivity_data))
    for c, conn_var in enumerate(list(connectivity_data)):
        if where_zero is 'ind':
            dep_ = connectivity_data[conn_var].values
            covariates['intercept'] = intercept
            ind_ = covariates.values
        elif where_zero is 'dep':
            dep_ = intercept
            covariates['predictor'] = connectivity_data[conn_var].values
            ind_ = covariates.values

        model = sm.OLS(dep_, ind_, hasconst=False)
        results = model.fit()

        res_df.loc['F'].iloc[c] = results.fvalue
        res_df.loc['P'].iloc[c] = results.f_pvalue

    if output_path is not None:
        with open(output_path, 'wb') as file:
            pkl.dump(res_df, file)

    logging.info('%s: Finished second level analysis' % proj_utils.ctime())

    return res_df
Example #14
0
def run_graph_theory(band, filelist, subjects, columns, outpath):
    thresholds = [0, .1, .2, .3, .4, .5, .6, .7, .8, .9]
    for thresh in thresholds:
        logging.info('%s: Running %s at %.2f' % (proj_utils.ctime(), band, thresh))
        s = 0
        graph_df = pd.DataFrame(index=subjects, columns=columns)
        for adj_file in filelist:
            if band in adj_file:
                with open(adj_file, 'rb') as f:
                    data_df = pkl.load(f)

                conn_res = calc_graph_measures(clean_df_to_numpy(data_df), thresh)
                for r, res_key in enumerate(conn_res):
                    graph_df.iloc[s, r] = conn_res[res_key]
                s += 1

        outfile = os.path.join(outpath, 'graph_results_%s_%.2f_thresh.pkl' % (band, thresh))
        with open(outfile, 'wb') as f:
            pkl.dump(graph_df, f)
Example #15
0
def first_level_pac(phase_amp_file, meg_subj, meg_sess, rois, attn_rois):
    # First-level analysis, BOLD - Alpha phase amplitude coupling version
    attn_indices = []
    for aroi in attn_rois:
        for r, roi in enumerate(rois):
            if aroi == roi:
                attn_indices.append(r)

    # f = h5py.File(phase_amp_file)
    # subj_level = f[meg_subj[0]]
    # sess_level = subj_level[meg_sess[0]]
    # band_level = f[meg_subj[0] + '/' + meg_sess[0] + '/BOLD bandpass']  # sess_level['BOLD bandpass']
    # d_level = band_level['phase_data']
    # # print(list(band_level))
    # print(d_level[...].shape)
    # f.close()

    conns = ['%s_%s' % (r1, r2) for r2 in attn_rois for r1 in attn_rois]
    res = []
    for sess in meg_sess:
        sess_conns = ['%s %s' % (sess, c) for c in conns]
        sess_res = pd.DataFrame(index=meg_subj, columns=sess_conns)
        for subj in meg_subj:
            logging.info(' %s: Phase-amplitude coupling for %s %s' %
                         (pu.ctime(), sess, subj))
            f = h5py.File(phase_amp_file)
            bold_dset = f[subj + '/' + sess + '/BOLD bandpass/phase_data'][...]
            bold_data = bold_dset[:, attn_indices]

            alpha_dset = f[subj + '/' + sess + '/Alpha/amplitude_data'][...]
            alpha_data = alpha_dset[:, attn_indices]
            f.close()

            res_df = pac(bold_data, alpha_data, attn_rois)
            sess_res.loc[subj] = res_df.loc['pac'].values

        res.append(sess_res)

    return pd.concat(res, axis=1)
    score_df = find_n_components(data, step=5)
    score_df.to_excel('./pca_cross_val_scores_test.xlsx')


def grand_pca(data, res_dir=None):
    if res_dir is None:
        res_dir = os.path.dirname(__file__)
    print(pu.ctime() + 'Running grand PCA')
    pca = PCA(n_components=.99, whiten=True)
    zdata = StandardScaler().fit_transform(data)
    pca.fit(zdata)
    print(pca.n_components_)
    true_df = pretty_pca_res(pca)

    plot_scree(true_df,
               percent=False,
               fname=os.path.join(res_dir, 'grand_pca_scree.png'))
    true_df.to_excel(os.path.join(res_dir, 'grand_pca_res.xlsx'))


if __name__ == "__main__":
    print(pu.ctime() + 'Loading data')
    data = pu.load_connectivity_data()
    res_dir = os.path.abspath('./../results/pca')
    if not os.path.isdir(res_dir):
        os.mkdir(res_dir)

    grand_pca(data)

    pca_by_band(data, n_iters=0, res_dir=res_dir)
Example #17
0
    output_dir = './../data/eeg_regression/extra_trees/'
    if not os.path.isdir(output_dir):
        os.mkdir(output_dir)

    behavior_data, conn_data = pu.load_data_full_subjects()
    conn_data.astype(float)

    categorical_variables = [
        'smoking', 'deanxit_antidepressants', 'rivotril_antianxiety', 'sex'
    ]
    categorical_data = behavior_data[categorical_variables]
    dummy_coded_categorical = pu.dummy_code_binary(categorical_data)
    covariate_data = pd.concat([behavior_data['age'], dummy_coded_categorical],
                               axis=1)

    ml_data = pd.concat([conn_data, covariate_data], axis=1)
    target = behavior_data['distress_TQ'].values.astype(float)

    targets = [
        'loudness_VAS', 'distress_TQ', 'distress_VAS', 'anxiety_score',
        'depression_score'
    ]
    for target in targets:
        target_vect = behavior_data[target].values.astype(float)
        logging.info('%s Running regression on %s' % (pu.ctime(), target))
        eeg_regression(eeg_data=ml_data,
                       target_data=target_vect,
                       target_type=target,
                       outdir=output_dir)
def eeg_multilabel_classify(ml_data, target_data, target_type, model, outdir):
    target_outdir = join(outdir, target_type)
    if not isdir(target_outdir):
        mkdir(target_outdir)

    feature_names = list(ml_data)

    # Create score dataframes, k-fold splitter
    n_splits = 10
    skf = model_selection.StratifiedKFold(n_splits=n_splits, random_state=seed)

    # Oversample connectivity data, apply k-fold splitter
    """Note: LP-transformation has to be applied for resampling, even though we're not treating it as a OVR problem"""
    x_res, y_res, y_res_lp_transformed = resample_multilabel(
        ml_data, target_data)
    skf.get_n_splits(x_res, y_res_lp_transformed)

    fold_count = 0
    classifier_objects, classifier_coefficients = {}, {}
    anx_balanced_acc, anx_chance_acc, anx_f1_scores = [], [], []
    dep_balanced_acc, dep_chance_acc, dep_f1_scores = [], [], []
    anx_cm_dict, anx_norm_cm_dict, dep_cm_dict, dep_norm_cm_dict = {}, {}, {}, {}

    for train_idx, test_idx in skf.split(x_res, y_res_lp_transformed):
        fold_count += 1
        print('%s: Running FOLD %d for %s' %
              (pu.ctime(), fold_count, target_type))
        foldname = 'Fold %02d' % fold_count

        # Stratified k-fold splitting
        x_train, x_test = x_res[train_idx], x_res[test_idx, :]
        y_train, y_test = y_res[train_idx], y_res[test_idx, :]

        if "categorical_sex_male" in feature_names:
            continuous_features = [
                f for f in feature_names if 'categorical' not in f
            ]
            continuous_indices = [
                ml_data.columns.get_loc(cont) for cont in continuous_features
            ]

            categorical_features = [
                f for f in feature_names if 'categorical' in f
            ]
            categorical_indices = [
                ml_data.columns.get_loc(cat) for cat in categorical_features
            ]

            x_train_feature_selected, x_test_feature_selected, cleaned_features = feature_selection_with_covariates(
                x_train, x_test, y_train, continuous_indices,
                categorical_indices, feature_names)
        else:
            x_train_feature_selected, x_test_feature_selected, cleaned_features = feature_selection_without_covariates(
                x_train, x_test, y_train, feature_names)

        if model is 'extra_trees':
            predicted, feature_importances, clf = extra_trees(
                x_train_feature_selected, y_train, x_test_feature_selected,
                cleaned_features)
            classifier_coefficients[foldname] = feature_importances

        elif model is 'knn':
            predicted, clf = knn(x_train_feature_selected, y_train,
                                 x_test_feature_selected)

        classifier_objects[foldname] = clf

        # Anxiety predictions
        yt, pred = y_test[:, 0], predicted[:, 0]
        balanced, chance, f1 = calc_scores(yt, pred)
        anx_balanced_acc.append(balanced)
        anx_chance_acc.append(chance)
        anx_f1_scores.append(f1)

        # Calculating fold confusion matrix
        anx_cm = metrics.confusion_matrix(yt, pred)
        anx_normalized_cm = anx_cm.astype('float') / anx_cm.sum(
            axis=1)[:, np.newaxis]

        classes = []
        for subclass_list in clf.classes_:
            classes.extend(list(subclass_list))
        anx_classes = [c for c in classes if 'anxiety' in c]
        dep_classes = [c for c in classes if 'depression' in c]

        anx_cm_dict[foldname] = pd.DataFrame(anx_cm,
                                             index=anx_classes,
                                             columns=anx_classes)
        anx_norm_cm_dict[foldname] = pd.DataFrame(anx_normalized_cm,
                                                  index=anx_classes,
                                                  columns=anx_classes)

        # Depression predictions
        yt, pred = y_test[:, 1], predicted[:, 1]
        balanced, chance, f1 = calc_scores(yt, pred)
        dep_balanced_acc.append(balanced)
        dep_chance_acc.append(chance)
        dep_f1_scores.append(f1)

        # Calculating fold confusion matrix
        dep_cm = metrics.confusion_matrix(yt, pred)
        dep_normalized_cm = dep_cm.astype('float') / dep_cm.sum(
            axis=1)[:, np.newaxis]

        dep_cm_dict[foldname] = pd.DataFrame(dep_cm,
                                             index=dep_classes,
                                             columns=dep_classes)
        dep_norm_cm_dict[foldname] = pd.DataFrame(dep_normalized_cm,
                                                  index=dep_classes,
                                                  columns=dep_classes)

    # Saving anxiety performance scores
    anx_f1_array = np.asarray(anx_f1_scores)
    anx_f1_class_averages = np.mean(anx_f1_array, axis=0)
    anx_f1_data = np.vstack((anx_f1_array, anx_f1_class_averages))

    balanced_acc_avg = np.mean(anx_balanced_acc)
    chance_acc_avg = np.mean(anx_chance_acc)

    anx_balanced_acc.append(balanced_acc_avg)
    anx_chance_acc.append(chance_acc_avg)

    accuracy_data = np.asarray([anx_balanced_acc, anx_chance_acc]).T

    rownames = ['Fold %02d' % (n + 1) for n in range(n_splits)]
    rownames.append('Average')
    score_df = pd.DataFrame(data=accuracy_data,
                            index=rownames,
                            columns=['Balanced accuracy', 'Chance accuracy'])

    f1_df = pd.DataFrame(data=np.asarray(anx_f1_data),
                         index=rownames,
                         columns=anx_classes)
    scores_dict = {'accuracy scores': score_df, 'f1 scores': f1_df}

    pu.save_xls(scores_dict, join(target_outdir, 'anxiety_performance.xlsx'))

    # Saving performance scores
    dep_f1_array = np.asarray(dep_f1_scores)
    dep_f1_class_averages = np.mean(dep_f1_array, axis=0)
    dep_f1_data = np.vstack((dep_f1_array, dep_f1_class_averages))

    balanced_acc_avg = np.mean(dep_balanced_acc)
    chance_acc_avg = np.mean(dep_chance_acc)

    dep_balanced_acc.append(balanced_acc_avg)
    dep_chance_acc.append(chance_acc_avg)

    accuracy_data = np.asarray([dep_balanced_acc, dep_chance_acc]).T

    rownames = ['Fold %02d' % (n + 1) for n in range(n_splits)]
    rownames.append('Average')
    score_df = pd.DataFrame(data=accuracy_data,
                            index=rownames,
                            columns=['Balanced accuracy', 'Chance accuracy'])

    f1_df = pd.DataFrame(data=np.asarray(dep_f1_data),
                         index=rownames,
                         columns=dep_classes)
    scores_dict = {'accuracy scores': score_df, 'f1 scores': f1_df}

    pu.save_xls(scores_dict, join(target_outdir,
                                  'depression_performance.xlsx'))

    # Saving coefficients
    if bool(classifier_coefficients):
        pu.save_xls(classifier_coefficients,
                    join(target_outdir, 'coefficients.xlsx'))

    # Saving confusion matrices
    pu.save_xls(anx_cm_dict,
                join(target_outdir, 'anxiety_confusion_matrices.xlsx'))
    pu.save_xls(
        anx_norm_cm_dict,
        join(target_outdir, 'anxiety_confusion_matrices_normalized.xlsx'))

    pu.save_xls(dep_cm_dict,
                join(target_outdir, 'depression_confusion_matrices.xlsx'))
    pu.save_xls(
        dep_norm_cm_dict,
        join(target_outdir, 'depression_confusion_matrices_normalized.xlsx'))

    # Saving classifier object
    with open(join(target_outdir, 'classifier_object.pkl'), 'wb') as file:
        pkl.dump(classifier_objects, file)
    pu.save_xls(
        anx_norm_cm_dict,
        join(target_outdir, 'anxiety_confusion_matrices_normalized.xlsx'))

    pu.save_xls(dep_cm_dict,
                join(target_outdir, 'depression_confusion_matrices.xlsx'))
    pu.save_xls(
        dep_norm_cm_dict,
        join(target_outdir, 'depression_confusion_matrices_normalized.xlsx'))

    # Saving classifier object
    with open(join(target_outdir, 'classifier_object.pkl'), 'wb') as file:
        pkl.dump(classifier_objects, file)


print('%s: Loading data' % pu.ctime())
behavior_data, conn_data = pu.load_data_full_subjects()
conn_data.astype(float)

categorical_variables = [
    'smoking', 'deanxit_antidepressants', 'rivotril_antianxiety', 'sex'
]
categorical_data = behavior_data[categorical_variables]
dummy_coded_categorical = pu.dummy_code_binary(categorical_data)
covariate_data = pd.concat([behavior_data['age'], dummy_coded_categorical],
                           axis=1)

ml_data = pd.concat([conn_data, covariate_data], axis=1)
multilabel_models = ['extra_trees', 'knn']
for model in multilabel_models:
    output_dir = './../data/%s/' % model
Example #20
0
    ax.tick_params(axis='both', labelsize='large')

    title = "ROI distribution of Cronbach's alpha values"
    ax.set_title(title, fontsize='xx-large')

    if fname is not None:
        fig.savefig(fname, bbox_inches='tight')


if __name__ == "__main__":
    import mPLSC_functions as mf
    import sys
    sys.path.append("..")
    import proj_utils as pu

    print('%s: Starting...' % pu.ctime())
    pdir = pu._get_proj_dir()
    pdObj = pu.proj_data()
    rois = pdObj.roiLabels
    meg_subj, meg_sess = pdObj.get_meg_metadata()
    mri_subj, mri_sess = pdObj.get_mri_metadata()
    subj_overlap = [s for s in mri_subj if s in meg_subj]

    meg_path = pdir + '/data/downsampled_MEG_truncated.hdf5'
    mri_path = pdir + '/data/multimodal_HCP.hdf5'
    roi_path = pdir + '/data/glasser_atlas/'
    fig_path = pdir + '/figures/cron_alpha'

    print('%s: Extracting average power in each ROI and subject, MRI' %
          pu.ctime())
    mri_data = _extract_average_power(mri_path, mri_sess, subj_overlap, rois,
Example #21
0
import matplotlib.pyplot as plt

import sys
sys.path.append("..")
import proj_utils as pu


def cron_alpha(array):
    k = array.shape[1]  #Columns are the groups
    variances_sum = np.sum(np.var(array, axis=0, ddof=1))
    variances_total = np.var(np.sum(array, axis=1), ddof=1)

    return (k / (k - 1)) * (1 - (variances_sum / variances_total))


print('%s: Starting' % pu.ctime())

print('%s: Getting metadata, parameters' % pu.ctime())
pdir = pu._get_proj_dir()

pdObj = pu.proj_data()
meg_subj, meg_sess = pdObj.get_meg_metadata()
mri_subj, mri_sess = pdObj.get_mri_metadata()
subj_overlap = [s for s in mri_subj if s in meg_subj]

pData = pdObj.get_data()
rois = pData['roiLabels']
band_dict = pData['bands']

slow_bands = ['BOLD', 'Slow 4', 'Slow 3', 'Slow 2', 'Slow 1']  #rows
supra_bands = ['Delta', 'Theta', 'Alpha', 'Beta', 'Gamma']  #cols
Example #22
0
            average_power = np.mean(fft_power, axis=0)
            session_data.append(average_power)

        session_df = pd.DataFrame(np.asarray(session_data),
                                  index=subjects,
                                  columns=rois)
        power_data[sess] = session_df

    return power_data


import sys
sys.path.append("..")
import proj_utils as pu

print('%s: Starting...' % pu.ctime())
pdir = pu._get_proj_dir()
pdObj = pu.proj_data()
rois = pdObj.roiLabels
meg_subj, meg_sess = pdObj.get_meg_metadata()
mri_subj, mri_sess = pdObj.get_mri_metadata()
subj_overlap = [s for s in mri_subj if s in meg_subj]

meg_path = pdir + '/data/downsampled_MEG_truncated.hdf5'
mri_path = pdir + '/data/multimodal_HCP.hdf5'
roi_path = pdir + '/data/glasser_atlas/'
fig_path = pdir + '/figures/cron_alpha'

print('%s: BP - Extracting average power in each ROI and subject, MRI' %
      pu.ctime())
mri_data = _extract_average_power(mri_path, mri_sess, subj_overlap, rois,
Example #23
0
def calc_phase_amp_power(how='location'):
    p_data = pu.proj_data()
    subjects, sessions = p_data.get_meg_metadata()
    rois = p_data.roiLabels
    fs = 500

    if how is 'location':
        df_list = []
        for session in sessions:
            session_df = pd.DataFrame(index=subjects)
            for subject in subjects:
                prog = "%s - %s" % (session, subject)
                print('%s: Calculating infraslow power for %s with location' % (pu.ctime(), prog))

                database = h5py.File('../data/multimodal_HCP.hdf5', 'r+')
                dset = database[subject + '/MEG/' + session + '/timeseries'][...]
                for ROIindex in range(len(rois)):
                    data = dset[:, ROIindex]
                    label = rois[ROIindex]

                    # Get real amplitudes of FFT (only in postive frequencies)
                    # Squared to get power
                    fft_power = np.absolute(np.fft.rfft(data)) ** 2
                    # Get frequencies for amplitudes in Hz

                    fft_freq = np.fft.rfftfreq(len(data), 1.0 / fs)
                    infraslow_band = (.01, .1)  # ('BOLD bandpass', (.01, .1))

                    freq_ix = np.where((fft_freq >= infraslow_band[0]) &
                                       (fft_freq <= infraslow_band[1]))[0]
                    colname = '%s %s' % (session, label)
                    if colname not in session_df:
                        session_df[colname] = np.nan

                    avg_power = np.mean(fft_power[freq_ix])
                    session_df.loc[subject][colname] = avg_power

                database.close()

            df_list.append(session_df)

        grand_df = pd.concat(df_list, axis=1)
        return grand_df

    elif how is 'bandpass':
        df_list = []
        for sess in sessions:
            session_colnames = ['%s %s' % (sess, r) for r in rois]
            session_df = pd.DataFrame(index=subjects, columns=session_colnames)
            for subj in subjects:
                prog = "%s - %s" % (sess, subj)
                print('%s: Calculating infraslow power for %s with bandpass' % (pu.ctime(), prog))

                f = h5py.File('../data/multimodal_HCP.hdf5', 'r')
                data = f[subj + '/MEG/' + sess + '/timeseries'][...]
                f.close()

                data = _butter_filter(data, fs=500, cutoffs=[.01, .1])

                fft_power = np.absolute(np.fft.rfft(data, axis=0)) ** 2
                average_power = np.mean(fft_power, axis=0)

                session_df.loc[subj] = average_power

            df_list.append(session_df)

        grand_df = pd.concat(df_list, axis=1)
        return grand_df

    elif how is 'truncated':
        df_list = []
        for sess in sessions:
            session_colnames = ['%s %s' % (sess, r) for r in rois]
            session_df = pd.DataFrame(index=subjects, columns=session_colnames)
            for subj in subjects:
                f = h5py.File('../data/downsampled_MEG_truncated.hdf5', 'r')
                data = f[subj + '/MEG/' + sess + '/resampled_truncated'][...]
                f.close()

                data = _butter_filter(data, fs=500, cutoffs=[.01, .1])

                fft_power = np.absolute(np.fft.rfft(data, axis=0)) ** 2
                average_power = np.mean(fft_power, axis=0)

                session_df.loc[subj] = average_power

            df_list.append(session_df)

        grand_df = pd.concat(df_list, axis=1)
        return grand_df

    print('%s: Finished' % pu.ctime())
            tables[table_name] = cfc_table

    if outfile is not None:
        mf.save_xls(tables, outfile)

    return tables


if __name__ == '__main__':
    from boredStats import pls_tools

    import sys
    sys.path.append("..")
    import proj_utils as pu

    print('%s: Loading data' % pu.ctime())
    pdir = pu._get_proj_dir()
    ddir = pdir + '/data/'
    roi_path = ddir + '/glasser_atlas/'
    fig_path = pdir + '/figures/mPLSC_delta_theta/'

    pdObj = pu.proj_data()
    rois = pdObj.roiLabels
    colors = pdObj.colors
    meg_subj, meg_sessions = pdObj.get_meg_metadata()
    mri_subj, mri_sess = pdObj.get_mri_metadata()
    subj = [s for s in mri_subj if s in meg_subj]
    meg_sess = ['Session1', 'Session2', 'Session3']

    pls_path = ddir + 'mPLSC_delta_theta_cfc.pkl'
    # check_0 = input('Run mPLSC? y/n ')
Created on Mon Mar 25 14:39:26 2019
"""

import os
import h5py
import numpy as np
import pandas as pd
import pickle as pkl
import mPLSC_functions as mf
from boredStats import pls_tools

import sys
sys.path.append("..")
import proj_utils as pu

print('%s: Loading data' % pu.ctime())
pdir = pu._get_proj_dir()
pdObj = pu.proj_data()
rois = pdObj.roiLabels
colors = pdObj.colors
meg_subj, meg_sessions = pdObj.get_meg_metadata()
mri_subj, mri_sess = pdObj.get_mri_metadata()
subjects = [s for s in mri_subj if s in meg_subj]

bands = ['Delta', 'Theta', 'Alpha', 'Beta', 'Gamma']
meg_sess = ['Session1', 'Session2', 'Session3']

ddir = pdir + '/data'
roi_path = ddir + '/glasser_atlas/'
fig_path = pdir + '/figures/mPLSC_cfc/'
    meg_list = [meg_data[sess] for sess in list(meg_data)]
    meg_df = pd.concat(meg_list, axis=1)
    x = meg_df.values

    sleep_variables = [
        'PSQI_Comp1', 'PSQI_Comp2', 'PSQI_Comp3', 'PSQI_Comp4', 'PSQI_Comp5',
        'PSQI_Comp6', 'PSQI_Comp7'
    ]
    behavior_raw = pd.read_excel('../data/hcp_behavioral.xlsx',
                                 index_col=0,
                                 sheet_name='cleaned')

    sleep_df = behavior_raw[sleep_variables]
    y = sleep_df.values.astype(float)

    logging.info('%s: Running PLSC' % pu.ctime())
    p = PLSC(n_iters=1000, center_scale='ss1')
    # pres = p.permutation_tests(x, y)
    #
    # eigs = pres['true_eigs']
    # print(eigs)
    # pvals = pres['p_values']
    # print(pvals)
    # plot_scree(eigs=eigs, pvals=pvals)

    bres = p.bootstrap_tests(x, y)
    print(bres['y_zscores'])
    print(bres['x_zscores'])

    logging.info('%s: Finished' % pu.ctime())
Example #27
0
    pdObj = pu.proj_data()
    rois = pdObj.roiLabels
    colors = pdObj.colors
    meg_subj, meg_sessions = pdObj.get_meg_metadata()
    print(len(meg_subj))
    mri_subj, mri_sess = pdObj.get_mri_metadata()
    print(len(mri_subj))
    subject_overlap = [s for s in mri_subj if s in meg_subj]

    output_dir = ddir + '/mPLSC/'
    alpha = .001
    z_test = 0
    output_file = ddir + '/mPLSC/mPLSC_power_all_sessions.pkl'  #  '/mPLSC/mPLSC_power_all_sessions_sleep_only.pkl'# '/mPLSC/mPLSC_power_all_sessions_sustained_attention.pkl'
    check = input('Run multitable PLS-C? y/n ')
    if check is 'y':
        print('%s: Building subtables of power data for MEG' % pu.ctime())
        meg_data = mf.extract_average_power(hdf5_file=ddir +
                                            '/downsampled_MEG_truncated.hdf5',
                                            sessions=meg_sessions,
                                            subjects=subject_overlap,
                                            rois=rois,
                                            image_type='MEG',
                                            bp=True)
        x_tables = [meg_data[session] for session in list(meg_data)]

        print('%s: Building subtables of behavior data' % pu.ctime())
        behavior_metadata = pd.read_csv(ddir + '/sustained_attention_vars.txt',
                                        delimiter='\t',
                                        header=None)
        behavior_metadata.rename(dict(zip([0, 1], ['category', 'name'])),
                                 axis='columns',
        for roi in rois:
            hdf5 = h5py.File(hdf5_path, 'r')
            rval_path = sess  + '/' + subj + '/' + roi + '/' + 'r_vals'
            
            dset = hdf5.get(rval_path).value
            within_subj_data.append(dset[:, :])
            hdf5.close()
            
        within_subj_array = np.arctanh(np.asarray(within_subj_data))
        between_subj_data.append(within_subj_array)
        
    between_subj_array = np.asarray(between_subj_data)
    
    return between_subj_array

print('%s: Getting metadata, parameters' % pu.ctime())
pdir = pu._get_proj_dir()

pdObj = pu.proj_data()
meg_subj, meg_sess = pdObj.get_meg_metadata()
mri_subj, mri_sess = pdObj.get_mri_metadata()
subj_overlap = [s for s in mri_subj if s in meg_subj]

slow_bands = ['BOLD', 'Slow 4', 'Slow 3', 'Slow 2', 'Slow 1'] #rows
reg_bands = ['Delta', 'Theta', 'Alpha', 'Beta', 'Gamma'] #cols

pData = pdObj.get_data()
rois = pData['roiLabels']

print('%s: Getting behavior data' % pu.ctime())
with open(pdir + '/data/cog_emotion_variables.txt', 'r') as boi:
Example #29
0
def _plot_violin(dataframe):
    sns.set_style('darkgrid')
    sns.set_context('notebook', font_scale=2)
    fig = sns.catplot(x='Phase bands',
                      y='Cross-Frequency Coupling',
                      height=15,
                      aspect=1.78,
                      data=dataframe,
                      hue='Amplitude bands',
                      kind='violin')
    fig.set(yscale='log')
    fig.set(ylim=(.001, .1))


print('%s: Getting metadata, parameters' % pu.ctime())
pdir = pu._get_proj_dir()

pdObj = pu.proj_data()
meg_subj, meg_sess = pdObj.get_meg_metadata()
mri_subj, mri_sess = pdObj.get_mri_metadata()
subj_overlap = [s for s in mri_subj if s in meg_subj]

pData = pdObj.get_data()
rois = pData['roiLabels']
band_dict = pData['bands']

slow_bands = ['BOLD', 'Slow 4', 'Slow 3', 'Slow 2', 'Slow 1']  #rows
reg_bands = ['Delta', 'Theta', 'Alpha', 'Beta', 'Gamma']  #cols

#--- Infraslow results ---#
Example #30
0
    for reg_index, reg in enumerate(reg_bands):
        reg_group = meg_dataset.get(reg)
        reg_ts = reg_group.get('amplitude_data')[:, roi_index]

        r_val, p_val = pac.circCorr(slow_ts, reg_ts)
        r_mat[reg_index] = r_val
        p_mat[reg_index] = p_val

    return r_mat, p_mat


import sys
sys.path.append("..")
import proj_utils as pu
start = pu.ctime()
print('%s: Starting' % pu.ctime())

print('%s: Getting metadata, parameters' % pu.ctime())
pdir = pu._get_proj_dir()

pdObj = pu.proj_data()
meg_subj, meg_sess = pdObj.get_meg_metadata()
mri_subj, mri_sess = pdObj.get_mri_metadata()
subj_overlap = [s for s in mri_subj if s in meg_subj]

pData = pdObj.get_data()
rois = pData['roiLabels']
database = pData['database']
band_dict = pData['bands']
min_meg_length = 111980  #118088