Beispiel #1
0
def main():
    logging.info('%s: Starting script' % proj_utils.ctime())

    bands = ['delta', 'theta', 'alpha', 'beta', 'gamma']
    data_df = proj_utils.load_connectivity_data(drop_behavior=True)

    dpath = os.path.abspath('./../data/subject_adjacency_matrices/')
    if not os.path.isdir(dpath):
        os.mkdir(dpath)

    # print('%s: Creating adjacency dicts' % proj_utils.ctime())
    # adj_dict = create_adjacency_dict(data_df, bands)

    # print('%s: Creating subject adjacency matrices' % proj_utils.ctime())
    # rois = parse_roi_names(list(data_df))
    # create_subj_adjacency_mats(adj_dict, bands, rois, dpath)

    test_res = test_graph_functions()

    logging.info('%s: Running graph theory analyses' % proj_utils.ctime())
    columns = list(test_res)
    subjects = np.arange(0, len(data_df.index))
    outpath = './../data/graph_theory_res/'
    if not os.path.isdir(outpath):
        os.mkdir(outpath)

    for band in bands:
        filelist = sorted([os.path.join(dpath, f) for f in os.listdir(dpath) if band in f])
        run_graph_theory(band, filelist, subjects, columns, outpath)

    logging.info('%s: Finished' % proj_utils.ctime())
def main():
    output_dir = './../data/eeg_second_level/'
    if not os.path.isdir(output_dir):
        os.mkdir(output_dir)

    behavior = proj_utils.load_behavior_data()
    ml_targets = ['loudness_VAS', 'distress_TQ', 'distress_VAS']

    covariates_df = dummy_code_categorical(behavior)
    covariates_df.drop(labels=ml_targets, axis=1, inplace=True)

    data_df = proj_utils.load_connectivity_data()
    data_df_filt = data_df.filter(items=covariates_df.index, axis=0)

    wz = 'dep'
    if wz is 'ind':
        output_path = os.path.join(
            output_dir, 'second_level_f_tests_zero_as_independent_var.pkl')
    else:
        output_path = os.path.join(
            output_dir, 'second_level_f_tests_zero_as_dependent_var.pkl')
    res = run_ancova(connectivity_data=data_df_filt,
                     covariates=covariates_df,
                     where_zero=wz,
                     output_path=output_path)
    print(res)

    truth, corrected_p_df = multiple_correction_ancova_res(output_path,
                                                           method='bonferroni',
                                                           alpha=1e-20)
    print(corrected_p_df)
    print(np.count_nonzero(truth))
Beispiel #3
0
def load_data():
    # Remove EEG subjects that don't have behavior data
    behavior_df = pu.load_behavior_data()
    conn_df = pu.load_connectivity_data()
    filt_df = conn_df.filter(
        items=behavior_df.index,
        axis=0)  # Remove EEG subjects with missing rowvals in behavior_df
    return behavior_df, filt_df
def plot_extra_trees_features():
    variables = ['tinnitus_side', 'tinnitus_type', 'TQ_grade', 'TQ_high_low']

    conn_data = pu.load_connectivity_data()
    conn_variables = list(conn_data)
    band_list, roi_list = [], []
    for c in conn_variables:
        band = c.split('_')[0]
        roi_1 = c.split('_')[1]
        roi_2 = c.split('_')[2]
        band_list.append(band)
        roi_list.append(roi_1)
        roi_list.append(roi_2)

    bands = pd.unique(band_list)
    rois = pd.unique(roi_list)

    conn_matrix = pd.DataFrame(index=rois, columns=rois)
    band_matrices_master = {}
    for band in bands:
        band_matrices_master[band] = conn_matrix

    for tin_variable in variables:
        output_dir = './../data/%s/' % tin_variable
        for model in listdir(output_dir):
            if 'extra_trees' in model:
                tin_dir = join(output_dir, model)
                xls = pd.ExcelFile(join(tin_dir, 'coefficients.xlsx'))
                fold_data = {}
                for sheet in xls.sheet_names:
                    band_matrices = deepcopy(band_matrices_master)
                    feature_df = pd.read_excel(xls,
                                               sheet_name=sheet,
                                               index_col=0)
                    for feat in list(feature_df):
                        feat_str = feat.split('_')
                        if any([True for b in bands if b in feat_str
                                ]):  # check if feature is connectivity data
                            feat_band, r1, r2 = feat_str[0], feat_str[
                                1], feat_str[2]
                            input_matrix = band_matrices[feat_band]
                            input_matrix.loc[r1][r2] = feature_df[feat].values[
                                0]
                    fold_data[sheet] = band_matrices

                with open(join(tin_dir, 'connectivity_matrices.pkl'),
                          'wb') as file:
                    pkl.dump(obj=fold_data, file=file)
    score_df = find_n_components(data, step=5)
    score_df.to_excel('./pca_cross_val_scores_test.xlsx')


def grand_pca(data, res_dir=None):
    if res_dir is None:
        res_dir = os.path.dirname(__file__)
    print(pu.ctime() + 'Running grand PCA')
    pca = PCA(n_components=.99, whiten=True)
    zdata = StandardScaler().fit_transform(data)
    pca.fit(zdata)
    print(pca.n_components_)
    true_df = pretty_pca_res(pca)

    plot_scree(true_df,
               percent=False,
               fname=os.path.join(res_dir, 'grand_pca_scree.png'))
    true_df.to_excel(os.path.join(res_dir, 'grand_pca_res.xlsx'))


if __name__ == "__main__":
    print(pu.ctime() + 'Loading data')
    data = pu.load_connectivity_data()
    res_dir = os.path.abspath('./../results/pca')
    if not os.path.isdir(res_dir):
        os.mkdir(res_dir)

    grand_pca(data)

    pca_by_band(data, n_iters=0, res_dir=res_dir)