def main():
    options = tools.parse_options()

    data = options.data
    additional_data = options.additional_data

    file = open(additional_data + "/subject_name.txt", "r")
    ids = file.read().split()
    ids = [int(float(id)) for id in ids]
    gdf = pd.read_csv(additional_data + '/n300.csv')
    gdf['subject_cont'] = ids
    gdf = gdf[['KJØNN', 'subject_cont', 'ALDER']].copy()
    gdf = gdf.rename(columns={'KJØNN': 'gender', 'ALDER': 'age'})

    mat_files = os.listdir(data)
    n_back_file = list(filter(lambda x: 'nBack' in x, mat_files))[0]
    face_file = list(filter(lambda x: 'Faces' in x, mat_files))[0]
    contrasts = [n_back_file, face_file]
    t_test_scores = pd.DataFrame(
        columns=['statistic', 'pvalue', 'user group', 'task_name'])
    scoresdf = pd.DataFrame(
        columns=['beta', 'pvalue', 'Labels', 'variable', 'task_name'])
    params = ['age', 'gender']
    print(contrasts)
    for mat_file in contrasts:
        for param in params:
            df1, df2, df3, contrast_name = tools.data_extraction(
                data, 2, mat_file)
            df1.fillna(df1.mean(), inplace=True)
            df2.fillna(df2.mean(), inplace=True)
            df3.fillna(df3.mean(), inplace=True)

            df1 = pd.merge(df1, gdf, on=['subject_cont'], how='inner')
            df2 = pd.merge(df2, gdf, on=['subject_cont'], how='inner')
            df3 = pd.merge(df3, gdf, on=['subject_cont'], how='inner')

            df = df1.append(df2).append(df3)
            df = df.loc[:, df.columns.intersection([param, 'label'])]

            df12 = df1.append(df2)
            df23 = df2.append(df3)
            df31 = df3.append(df1)

            task_name = mat_file.split("_")[0]
            if param == "age":
                plot_age_box_plot(df1, df2, df3, df, task_name, options)

            scoresdf = run_glm_fit(df12, 12, scoresdf, param, task_name)
            scoresdf = run_glm_fit(df23, 23, scoresdf, param, task_name)
            scoresdf = run_glm_fit(df31, 31, scoresdf, param, task_name)

            t_test_scores = t_test(df12, "BD-Sc", t_test_scores, task_name)
            t_test_scores = t_test(df23, "Sc-Co", t_test_scores, task_name)
            t_test_scores = t_test(df31, "Co-BD", t_test_scores, task_name)

        plot_age_dist(df1, df2, df3, task_name, options)
    plot_gender_dist(options)
    print("\nGLM fit with age and gender variable used individually\n")
    print(scoresdf)

    print("\n\nT-test scores to analyse age distribution\n")
    print(t_test_scores)
def main():
    options = tools.parse_options()
    start = time.time()
    if os.path.isfile(
            options.input
    ):  # if results are already stored then use that as input
        scoresdf = pd.read_csv(options.input)
    else:  # in previous experiments, if results are not stored then create new dataframe to store the results
        scoresdf = pd.DataFrame(columns=[
            'Score', 'Type', 'Model', 'Classifier', 'Contrast_name',
            'Balanced_accuracy'
        ])

    mat_files = os.listdir(options.data)
    contrast_list = list(
        filter(None, filter(lambda x: re.search('.*_.....mat', x), mat_files)))
    n_back_list = list(
        filter(lambda x: 'nBack' in x and ('2' in x or '3' in x),
               contrast_list))
    faces_list = list(
        filter(lambda x: 'Faces' in x and ('5' in x or '4' in x or '3' in x),
               contrast_list))
    relevant_contrast_list = n_back_list + faces_list  # extracted nBack 2,3 and Faces 3,4,5 contrasts

    # Age and gender information along with subject id is extracted
    file = open(options.additional_data + "/subject_name.txt", "r")
    ids = file.read().split()
    ids = [int(float(id)) for id in ids]
    edf = pd.read_csv(options.additional_data + '/n300.csv')
    edf['subject_cont'] = ids
    edf = edf[['KJØNN', 'subject_cont', 'ALDER']]
    edf = edf.rename(columns={'KJØNN': 'gender', 'ALDER': 'age'})

    for contrast in relevant_contrast_list:
        contrast_name = contrast.split(".")[0]
        if len(scoresdf[scoresdf["Contrast_name"] == contrast_name]):
            continue

        for nClass in range(2, 4, 1):
            #  Considering all classes: Bipolar, Schizo and Control
            if nClass == 3:
                df, contrast_name = tools.data_extraction(
                    options.data, nClass, contrast, options.data_type)
                df = mlu.missing_values(df)
                df = pd.merge(df, edf, on=['subject_cont'], how='inner')
                scoresdf = run_no_gender_ml(df, options, 123, scoresdf,
                                            contrast_name)

            #  Considering combination of 2 classes: Bipolar-Schizo, Schizo-Control and Control-Bipolar
            elif nClass == 2:
                df1, df2, df3, contrast_name = tools.data_extraction(
                    options.data, nClass, contrast, options.data_type)

                # Combining two pairs off all combination
                df12 = df1.append(df2)
                df23 = df2.append(df3)
                df31 = df3.append(df1)

                # Handle missing values
                df12 = mlu.missing_values(df12)
                df23 = mlu.missing_values(df23)
                df31 = mlu.missing_values(df31)

                # Adding age and gender data for Standardization purpose. This additional data will be removed in
                # data preprocessing
                df12 = pd.merge(df12, edf, on=['subject_cont'], how='inner')
                df23 = pd.merge(df23, edf, on=['subject_cont'], how='inner')
                df31 = pd.merge(df31, edf, on=['subject_cont'], how='inner')

                scoresdf = run_no_gender_ml(df12, options, 12, scoresdf,
                                            contrast_name)
                scoresdf = run_no_gender_ml(df23, options, 23, scoresdf,
                                            contrast_name)
                scoresdf = run_no_gender_ml(df31, options, 31, scoresdf,
                                            contrast_name)

        scoresdf.to_csv(options.output + "no_gender_individual.csv",
                        index=False)

    print(
        "It took %s seconds to run %s iterations for %s model after removing gender effect"
        % (time.time() - start, options.number_iterations, options.model))
Esempio n. 3
0
        X, y = mlu.get_features_labels(shuffle(df))
        X = min_max_scaler.fit_transform(X)
        clf_cv.fit(X, y)
        coefs_.append(clf_cv.coef_.ravel().copy())

    a = np.asarray(coefs_)
    np.savetxt(out + "%s_%s_logisticLasso.csv" %
               (contrast_name, classifier_no),
               a,
               delimiter=",")


if __name__ == '__main__':
    print("Logistic Regression with Lasso penalty")

    options = tools.parse_options()

    mat_files = os.listdir(options.data)
    contrast_list = list(
        filter(None, filter(lambda x: re.search('.*_.....mat', x), mat_files)))
    n_back_list = list(
        filter(lambda x: 'nBack' in x and ('2' in x or '3' in x),
               contrast_list))
    faces_list = list(
        filter(lambda x: 'Faces' in x and ('5' in x or '4' in x),
               contrast_list))
    relevant_mat_files = n_back_list + faces_list
    start = time()
    for mat_file in relevant_mat_files:
        print(mat_file)
        df1, df2, df3, contrast_name = tools.data_extraction(
Esempio n. 4
0
def main():
    print("NI Thesis")
    options = tools.parse_options()
    start = time.time()

    if options.combine:
        o_subtitle = 'combined'
    else:
        o_subtitle = 'individual'

    if os.path.isfile(options.input):
        scoresdf = pd.read_csv(options.input)
    else:
        scoresdf = pd.DataFrame(columns=[
            'Score', 'Type', 'Model', 'Classifier', 'Contrast_name',
            'Balanced_accuracy'
        ])

    mat_files = os.listdir(options.data)
    print(mat_files)
    #To get matfiles which does not ends with 389.mat or 487.mat. Selecting only minified mat files like
    #contrast_list = ['Faces_con_0003.mat', 'Faces_con_0002.mat', 'Faces_con_0001.mat', 'Faces_con_0005.mat',
    #                 'Faces_con_0004.mat', 'nBack_con_0001.mat', 'nBack_con_0002.mat', 'nBack_con_0003.mat']

    contrast_list = list(
        filter(None, filter(lambda x: re.search('.*_.....mat', x), mat_files)))
    #TODO remove this for old Data
    contrast_list = mat_files
    combi_contrast = contrast_permutation(contrast_list)

    if options.combine:
        clist = combi_contrast
    else:
        clist = contrast_list

    for i in range(len(clist)):

        #Getting Contrast name
        if options.combine:
            c1_name = clist[i][0].split(".")[0]
            c2_name = clist[i][1].split(",")[0]
            contrast_name = c1_name + '&' + c2_name
        else:
            contrast_name = clist[i].split(".")[0]

        # Checking if the training is already made for the particular contrast
        # TODO Uncomment this for checking if contrast is present in the file
        if len(scoresdf[scoresdf['Contrast_name'] == contrast_name]):
            continue

        for nClass in range(2, 4, 1):

            if nClass == 3:

                # Read Data and put it into panda data frame. Initially considering only means
                if options.combine:
                    df, contrast_name = tools.combine_contrast(
                        options.data, nClass, clist[i][0], clist[i][1],
                        options.data_type)
                else:
                    df, contrast_name = tools.data_extraction(
                        options.data, nClass, clist[i], options.data_type)
                df = mlu.missing_values(df)
                scoresdf = run_basic_ml(df, options, 123, scoresdf,
                                        contrast_name)

            elif nClass == 2:

                if options.combine:
                    df1, df2, df3, contrast_name = tools.combine_contrast(
                        options.data, nClass, clist[i][0], clist[i][1],
                        options.data_type)

                else:
                    df1, df2, df3, contrast_name = tools.data_extraction(
                        options.data, nClass, clist[i], options.data_type)
                # Combining two pairs off all combination
                df12 = df1.append(df2)
                df23 = df2.append(df3)
                df31 = df3.append(df1)

                # Handle missing values
                df12 = mlu.missing_values(df12)
                df23 = mlu.missing_values(df23)
                df31 = mlu.missing_values(df31)

                scoresdf = run_basic_ml(df12, options, 12, scoresdf,
                                        contrast_name)
                scoresdf = run_basic_ml(df23, options, 23, scoresdf,
                                        contrast_name)
                scoresdf = run_basic_ml(df31, options, 31, scoresdf,
                                        contrast_name)

        scoresdf.to_csv(options.output + "%s.csv" % (o_subtitle), index=False)

    print("It took %s seconds to run %s iterations for %s model" %
          (time.time() - start, options.number_iterations, options.model))

    logger.info(
        "It took %s seconds to run %s iterations for all models for not normalized"
        % (time.time() - start, options.number_iterations))
Esempio n. 5
0
def main():
    options = tools.parse_options()
    start = time.time()
    if os.path.isfile(options.input):
        scoresdf = pd.read_csv(options.input)
    else:
        scoresdf = pd.DataFrame(columns=['Score', 'Type', 'Model', 'Classifier', 'Contrast_name', 'Balanced_accuracy'])

    if options.combine:
        o_subtitle = 'combined'
    else:
        o_subtitle = 'individual'

    ## Gender information and adding it as label to the data by linking the subject_cont
    file = open(options.additional_data + "/subject_name.txt", "r")
    ids = file.read().split()
    ids = [int(float(id)) for id in ids]
    gdf = pd.read_csv(options.additional_data + '/n300.csv')
    gdf['subject_cont'] = ids
    gdf = gdf[['KJØNN', 'subject_cont','ALDER']].copy()
    gdf = gdf.rename(columns={'KJØNN':'gender', 'ALDER':'age'})


    label = 'gender'
    label = 'age'
    label = options.age_gender

    mat_files = os.listdir(options.data)
    contrast_list = list(filter(None, filter(lambda x: re.search('.*_.....mat', x), mat_files)))
    n_back_list = list(filter(lambda x: 'nBack' in x and ('2' in x or '3' in x), contrast_list))
    faces_list = list(filter(lambda x: 'Faces' in x and ('5' in x or '4' in x or '3' in x), contrast_list))
    relevant_mat_files = n_back_list + faces_list
    relevant_mat_files = relevant_mat_files[0:2]
    print(relevant_mat_files)
    for mat_file in relevant_mat_files:
        print(mat_file)
        for nClass in range(2, 4, 1):
            if nClass == 3:
                df, contrast_name = tools.data_extraction(options.data, nClass, mat_file, options.data_type)
                # Adding Age and gender to the dataframe
                df = pd.merge(df, gdf, on=['subject_cont'], how='inner')
                df = mlu.missing_values(df)
                scoresdf = run_gender_cor(df, options, 123, scoresdf, contrast_name, label)

            elif nClass == 2:
                df1, df2, df3, contrast_name = tools.data_extraction(options.data, nClass, mat_file, options.data_type)

                #Adding Age and gender to the dataframe
                df1 = pd.merge(df1, gdf, on=['subject_cont'], how='inner')
                df2 = pd.merge(df2, gdf, on=['subject_cont'], how='inner')
                df3 = pd.merge(df3, gdf, on=['subject_cont'], how='inner')

                # Combining two pairs off all combination
                df12 = df1.append(df2)
                df23 = df2.append(df3)
                df31 = df3.append(df1)

                # Handle missing values
                df12 = mlu.missing_values(df12)
                df23 = mlu.missing_values(df23)
                df31 = mlu.missing_values(df31)

                df1 = mlu.missing_values(df1)
                df2 = mlu.missing_values(df2)
                df3 = mlu.missing_values(df3)


                scoresdf = run_gender_cor(df12, options, 12, scoresdf, contrast_name, label)
                scoresdf = run_gender_cor(df23, options, 23, scoresdf, contrast_name, label)
                scoresdf = run_gender_cor(df31, options, 31, scoresdf, contrast_name, label)

                scoresdf = run_gender_cor(df1, options, 1, scoresdf, contrast_name, label)
                scoresdf = run_gender_cor(df2, options, 2, scoresdf, contrast_name, label)
                scoresdf = run_gender_cor(df3, options, 3, scoresdf, contrast_name, label)
        scoresdf.to_csv(options.output + "%s.csv" % (o_subtitle), index=False)

    print("It took %s seconds to run %s iterations for %s model" % (time.time() - start, options.number_iterations,
                                                                    options.model))
Esempio n. 6
0
def main():
    options = tools.parse_options()
    start = time.time()
    if options.combine:
        o_subtitle = 'combined'
    else:
        o_subtitle = 'individual'

    if os.path.isfile(
            options.input
    ):  # if results are already stored then use that as input
        scoresdf = pd.read_csv(options.input)
    else:  # in previous experiments, if results are not stored then create new dataframe to store the results
        scoresdf = pd.DataFrame(columns=[
            'Score', 'Type', 'Model', 'Classifier', 'Contrast_name',
            'Balanced_accuracy'
        ])

    mat_files = os.listdir(options.data)
    contrast_list = list(
        filter(None, filter(lambda x: re.search('.*_.....mat', x), mat_files)))
    combi_contrast = contrast_permutation(contrast_list)

    if options.combine:
        clist = combi_contrast
    else:
        clist = contrast_list

    for i in range(len(clist)):

        #Getting Contrast name
        if options.combine:
            c1_name = clist[i][0].split(".")[0]
            c2_name = clist[i][1].split(",")[0]
            contrast_name = c1_name + '&' + c2_name
        else:
            contrast_name = clist[i].split(".")[0]

        # Checking if the training is already made for the particular contrast
        # TODO Uncomment this for checking if contrast is present in the file
        if len(scoresdf[scoresdf['Contrast_name'] == contrast_name]):
            continue

        for nClass in range(2, 4, 1):

            if nClass == 3:

                # Read Data and put it into panda data frame. Initially considering only means
                if options.combine:
                    df, contrast_name = tools.combine_contrast(
                        options.data, nClass, clist[i][0], clist[i][1],
                        options.data_type)
                else:
                    df, contrast_name = tools.data_extraction(
                        options.data, nClass, clist[i], options.data_type)
                df = mlu.missing_values(df)
                scoresdf = run_basic_ml(df, options, 123, scoresdf,
                                        contrast_name)

            elif nClass == 2:

                if options.combine:
                    df1, df2, df3, contrast_name = tools.combine_contrast(
                        options.data, nClass, clist[i][0], clist[i][1],
                        options.data_type)

                else:
                    df1, df2, df3, contrast_name = tools.data_extraction(
                        options.data, nClass, clist[i], options.data_type)
                # Combining two pairs off all combination
                df12 = df1.append(df2)
                df23 = df2.append(df3)
                df31 = df3.append(df1)

                # Handle missing values
                df12 = mlu.missing_values(df12)
                df23 = mlu.missing_values(df23)
                df31 = mlu.missing_values(df31)

                scoresdf = run_basic_ml(df12, options, 12, scoresdf,
                                        contrast_name)
                scoresdf = run_basic_ml(df23, options, 23, scoresdf,
                                        contrast_name)
                scoresdf = run_basic_ml(df31, options, 31, scoresdf,
                                        contrast_name)

        scoresdf.to_csv(options.output + "basic_%s.csv" % (o_subtitle),
                        index=False)

    print("It took %s seconds to run %s iterations for %s model" %
          (time.time() - start, options.number_iterations, options.model))

    print(
        "It took %s seconds to run %s iterations for %s model after removing gender effect"
        % (time.time() - start, options.number_iterations, options.model))
def main():

    options = tools.parse_options()
    start = time.time()

    ## Get Age, Gender and Subject_cont information ###

    file = open(options.additional_data + "subject_name.txt", "r")
    ids = file.read().split()
    ids = [int(float(id)) for id in ids]
    gdf = pd.read_csv(options.additional_data + 'n300.csv')
    gdf.loc[:, 'subject_cont'] = ids
    gdf = gdf[['KJØNN', 'subject_cont', 'ALDER']]
    gdf = gdf.rename(columns={'KJØNN': 'gender', 'ALDER': 'age'})

    mat_files = os.listdir(options.data)
    contrast_list = list(
        filter(None, filter(lambda x: re.search('.*_.....mat', x), mat_files)))
    n_back_list = list(
        filter(lambda x: 'nBack' in x and ('2' in x or '3' in x),
               contrast_list))
    faces_list = list(
        filter(lambda x: 'Faces' in x and ('5' in x or '4' in x or '3' in x),
               contrast_list))
    relevant_contrast_list = n_back_list + faces_list  # extracted nBack 2,3 and Faces 3,4,5 contrasts

    if os.path.isfile(options.input):
        scoresdf = pd.read_csv(options.input)
    else:
        scoresdf = pd.DataFrame(columns=[
            'feature', 'beta_f', 'beta_a', 'beta_g', 'pvalue_f', 'pvalue_a',
            'pvalue_g', 'Contrast_name', 'Labels'
        ])

    for contrast in relevant_contrast_list:
        contrast_name = contrast.split(".")[0]
        if len(scoresdf[scoresdf["Contrast_name"] == contrast_name]):
            continue

        df1, df2, df3, contrast_name = tools.data_extraction(
            options.data, 2, contrast, options.data_type)

        # Combining two pairs off all combination
        df12 = df1.append(df2)
        df23 = df2.append(df3)
        df31 = df3.append(df1)

        # Handle missing values
        df12 = mlu.missing_values(df12)
        df23 = mlu.missing_values(df23)
        df31 = mlu.missing_values(df31)

        # Adding age and gender data for Standardization purpose. This additional data will be removed in
        # data preprocessing
        df12 = pd.merge(df12, gdf, on=['subject_cont'], how='inner')
        df23 = pd.merge(df23, gdf, on=['subject_cont'], how='inner')
        df31 = pd.merge(df31, gdf, on=['subject_cont'], how='inner')

        scoresdf = run_glm_fit(df12, 12, contrast_name, scoresdf)
        scoresdf = run_glm_fit(df23, 23, contrast_name, scoresdf)
        scoresdf = run_glm_fit(df31, 31, contrast_name, scoresdf)

        scoresdf.to_csv(options.output + "individual.csv", index=False)

    fdr_analysis(options)