コード例 #1
0
ファイル: predict.py プロジェクト: alexhanganu/nimb
def get_stats_df(len_df_X, atlas, nr_threads, env_name, time_started):
    '''
    script to save the parameters that are used for each specific analysis
    '''

    import sklearn
    import matplotlib

    stats = Table().get_clean_df()
    d = {
        'pandas version': Table().pd_ver,
        'numpy version': np.__version__,
        'matplotlib version': matplotlib.__version__,
        'sklearn version': sklearn.__version__,
        'number of iterations': definitions.prediction_defs['NUM_ITER'],
        'atlas': atlas,
        'nr of features': len_df_X,
        'nr of threads': nr_threads,
        'remote name': env_name,
        'analysis started at': time_started,
    }
    i = 0
    for key in d:
        stats.at[i, 'stats'] = key
        stats.at[i, 'values'] = d[key]
        i += 1
    return stats
コード例 #2
0
    def __init__(self, all_vars):
        self.project = all_vars.params.project
        self.project_vars = all_vars.projects[self.project]
        self.stats_paths = self.project_vars['STATS_PATHS']
        self.stats_params = self.project_vars['STATS_PARAMS']
        self.group_col = self.project_vars['group_col']
        self.dir_stats_home = self.stats_paths["STATS_HOME"]
        self.atlas = ('DK', 'DS', 'DKDS')[1]
        self.get_steps(all_vars)

        print(
            f'    Performing statistical analysis in folder: {self.dir_stats_home}'
        )
        print('    materials located at: {:<50}'.format(
            self.project_vars['materials_DIR'][1]))
        print('    file for analysis: {:<50}'.format(
            self.project_vars['fname_groups']))
        print('    id column: {:<50}'.format(str(self.project_vars['id_col'])))
        print('    group column: {:<50}'.format(
            str(self.project_vars['group_col'])))
        # print('    variables to analyse: {:<50}'.format(str(self.project_vars['variables_for_glm'])))

        self.tab = Table()
        self.preproc = preprocessing.Preprocess()
        self.df_user_stats, self.df_final_grid,\
            self.df_adjusted,\
            self.cols_X,\
            self.groups = MakeGrid(self.project_vars).grid()
コード例 #3
0
def linreg_moderation_results(df_X_linreg, ls_cols_X_atlas, group_param,
                              regression_param, path_dir_save_results, group):
    '''perform moderation analysis
    Args:
        df_X_linreg:        pandas.DataFrame with columns and data for analysis
        ls_cols_X_atlas:    ls of cols from df_X_linreg that will be used for linear regression analysis
        group_param:        str, name of the column from df_X_linreg that was used to create the groups
        regression_param:   str, name of the column from df_X_linreg that will be used for regression analysis (e.g., Age)
        path_dir_save:      abspath to save csv file
        group:              group name to use for the results csv file
    Return:
        none
        creates csv file
    '''
    d_result = compute_linreg_data(df_X_linreg, ls_cols_X_atlas, group_param,
                                   regression_param)
    df_result = Table().create_df_from_dict(d_result)
    Table().save_df(
        df_result,
        path.join(path_dir_save_results, f'linreg_moderation_{group}.csv'))
コード例 #4
0
 def __init__(self,
              df,
              params_y,
              ls_cols4anova,
              path2save,
              p_thresh=0.05,
              intercept_thresh=0.05,
              print_not_FS=False):
     self.df = df
     self.params_y = params_y
     self.ls_cols4anova = ls_cols4anova
     self.sig_cols = dict()
     self.tab = Table()
     self.print_not_FS = print_not_FS
     self.fs_struc_meas = fs_definitions.GetFSStructureMeasurement()
     self.run_anova(p_thresh, intercept_thresh, path2save)
コード例 #5
0
ファイル: stats_stats.py プロジェクト: alexhanganu/nimb
 def __init__(self,
              df,
              group_col,
              ls_cols,
              groups,
              path_save_res,
              p_thresh=0.05):
     self.df = df
     self.group_col = group_col
     self.ls_cols = ls_cols
     self.groups = groups
     self.path_save_res = path_save_res
     self.ls_meas = get_names_of_measurements()
     self.ls_struct = get_names_of_structures()
     self.res_ttest = self.compute_ttest_for_col(p_thresh)
     self.tab = Table()
コード例 #6
0
 def __init__(self, nimb_vars, fs_vars, proj_vars, f_ids_processed,
              f_GLM_group, FS_GLM_dir):
     self.proj_vars = proj_vars
     self.vars_fs = fs_vars
     self.FS_SUBJECTS_DIR = fs_vars['SUBJECTS_DIR']
     self.NIMB_PROCESSED_FS = fs_vars['NIMB_PROCESSED']
     self.f_ids_processed = f_ids_processed
     self.f_GLM_group = f_GLM_group
     self.FS_GLM_dir = FS_GLM_dir
     self.archive_type = '.zip'
     self.tab = Table()
     self.miss = dict()
     self.ids_4fs_glm = dict()
     self.df = self.tab.get_df(self.f_GLM_group)
     self.bids_ids = self.df[self.proj_vars['id_col']].tolist()
     self.ids_exclude_glm = os.path.join(self.FS_GLM_dir,
                                         'excluded_from_glm.json')
コード例 #7
0
    def __init__(self,
                file_abspath,
                result_abspath):
        from stats.db_processing import Table
        self.contrasts = fs_definitions.GLMcontrasts['contrasts']
        self.get_explanations()

        self.col_4constrasts = "Contrast"
        self.header = ("ClusterNo",
                      "Max", "VtxMax", "Size(mm^2)", 
                      "TalX", "TalY", "TalZ",
                      "CWP", "CWPLow", "CWPHi",
                      "NVtxs", "WghtVtx",
                      "Annot", self.col_4constrasts, "Explanation")
        self.length_matrix = len(self.header)
        self.content = open(file_abspath, 'r').readlines()
        self.result_abspath = result_abspath
        self.tab = Table()
        self.ls_vals_2chk = self.contrasts.keys()
        self.run()
コード例 #8
0
ファイル: predict.py プロジェクト: alexhanganu/nimb
def feature_ranking(X_scaled, y_transform, cols_X):
    """
    get the ranking of all features
    :param X_scaled:
    :param y_transform:
    :return: the pandas Dataframe of all ranking feature in a sorted way
    """
    clf = RandomForestClassifier()
    feature_selector = RFE(clf)
    feature_selector.fit(X_scaled, y_transform)

    features_rfe_and_rank_df = Table().create_df(feature_selector.ranking_,
                                                 index_col=cols_X,
                                                 cols=['ranking']).sort_values(
                                                     ['ranking'])

    # features_rfe_and_rank_df = pd.DataFrame(feature_selector.ranking_,
    #                                     index=cols_X, columns=['ranking']).sort_values(['ranking'])
    features_rfe_and_rank_df['feature'] = features_rfe_and_rank_df.index
    return features_rfe_and_rank_df['feature'], features_rfe_and_rank_df
コード例 #9
0
    def get_groups_and_variables(self, proj_vars, GLM_file_group, vars_fs):
        """creating working variables and dictionaries
        """
        cols_2use = proj_vars["variables_for_glm"] + [
            self.id_col, self.group_col
        ]
        df_groups_clin = Table().get_df_with_columns(GLM_file_group, cols_2use)

        self.ls_groups = pd.unique(df_groups_clin[self.group_col]).tolist()
        self.ids = self.get_ids_ready4glm(df_groups_clin[self.id_col].tolist(),
                                          vars_fs)

        d_init = df_groups_clin.to_dict()
        self.d_subjid = {}
        self.ls_vars_stats = [key for key in d_init if key != self.id_col]
        for rownr in d_init[self.id_col]:
            _id = d_init[self.id_col][rownr]
            if _id in self.ids:
                self.d_subjid[_id] = {}
                for var in self.ls_vars_stats:
                    self.d_subjid[_id][var] = d_init[var][rownr]
        self.ls_vars_stats.remove(self.group_col)
        self.make_subjects_per_group(df_groups_clin)
コード例 #10
0
ファイル: predict.py プロジェクト: alexhanganu/nimb
def save_features(dic_feat_comps,
                  expl_variance,
                  file2save,
                  img2save,
                  lang="EN"):
    """features extracted from PCA
        are being saved to a table
        and image
    Args:
        dic_feat_comps = {feature_name: explained_variance}
        expl_variance  = PCA explained_variance_
        file2save      = abspath to the csv file to save the table
        img2save       = abspath to the image png file to save the table
        lang           = language used to describe the results
    """
    df_feat_comps = Table().create_df(dic_feat_comps.values(),
                                      index_col=dic_feat_comps.keys(),
                                      cols=['explained_variance'])
    df_feat_comps.to_csv(file2save)
    plotting.plot_simple(vals=np.cumsum(expl_variance),
                         xlabel=params_lang[lang]['nr_components'],
                         ylabel=params_lang[lang]['expl_cum_var'],
                         path_to_save_file=img2save)
コード例 #11
0
ファイル: stats_stats.py プロジェクト: alexhanganu/nimb
def save_df_Emmanuelle(df,
                       groups,
                       stats_dic,
                       cols2color_sig,
                       path2save,
                       make_with_colors,
                       extensions=('xlsx', 'csv', 'json')):

    if 'xlsx' in extensions:
        import openpyxl
        import string
        df.to_excel('stats_new.xlsx')
        ########## MERGE MEAN/STD SUB-INDEXES ################
        file = openpyxl.load_workbook('stats_new.xlsx')
        sheet = file['Sheet1']
        alpha = string.ascii_uppercase
        for ltr in range(len(alpha))[1:(2 * len(groups)) + 1:2]:
            cell1, cell2 = alpha[ltr] + str(2), alpha[ltr + 1] + str(2)
            sheet.merge_cells(str(cell1 + ':' + cell2))
        file.save('stats_new.xlsx')

    if 'json' in extensions:
        utilities.save_json(stats_dic, os.path.join(path2save, 'stats.json'))

    if 'csv' in extensions:
        tab = Table()
        tab.save_df(df,
                    os.path.join(path2save, 'stats_new.csv'),
                    sheet_name='stats')

    if make_with_colors:
        save_2xlsx_with_colors_Emmanuelle(df,
                                          'stats_new.xlsx',
                                          path2save,
                                          'stats_wcolors.xlsx',
                                          cols2color_sig=cols2color_sig)
コード例 #12
0
ファイル: stats_stats.py プロジェクト: alexhanganu/nimb
def mkstatisticsf_Emmanuelle(
    df_4stats,
    groups,
    group_col,
):
    '''Creates discriptive statistical file for publication,
        based on provided pandas.DataFrame
        Works only on 2 groups
        author: version adjusted by Emmanuelle Mazur-Lainé 202206
    Args: df_4stats: pandas.DataFrame
        group: list/ tuple of groups as str/int
        group_col: column name in df_4stats that has the group names from group
        path_2save: abspath to save the descriptive files
        make_with_colors: will create an additional .xlsx file with 
                        colored significant results,
                        provided xlwt is installed
    Return:
        json file with results
        .csv file with results
        .xlsx file with results with red colored significant ones
    '''
    df_4stats = df_4stats.astype(float)

    tab = Table()

    groups_df = dict()
    for group in groups:
        groups_df[group] = tab.get_df_per_parameter(df_4stats, group_col,
                                                    group)

    stats_dic = dict()
    vals2chk = df_4stats.columns.tolist()

    if group_col in vals2chk:
        vals2chk.remove(group_col)

    cols2color_sig = list()
    groups = list(groups_df.keys())

    ################################
    if len(groups) == 1:
        ls_tests = ('mean', 'std', 'kurtosis', 'skewness')
    elif len(groups) <= 2:
        ls_tests = ('mean', 'std', 'kurtosis', 'skewness', 'TTest', 'Welch',
                    'MannWhitneyu')
    elif len(groups) > 2:
        ls_tests = ('mean', 'std', 'kurtosis', 'skewness', 'Bartlett',
                    'Kruskal', 'ANOVA')

    for test in ls_tests:
        for val in vals2chk:
            values_per_gr = []
            for i in range(0, len(groups)):
                gr_i = groups_df[groups[i]][val].values
                arr = np.array(gr_i)
                arr_without_nan = arr[np.logical_not(np.isnan(arr))]
                values_per_gr.append(arr_without_nan)

            results, params = get_stats_Emmanuelle(test, groups, values_per_gr)

            if test == 'mean':
                for i in range(len(groups)):
                    for tst in ('mean', 'std'):
                        results, params = get_stats_Emmanuelle(
                            tst, groups, values_per_gr)
                        key = f'{groups[i]}, {params}'
                        if key not in stats_dic:
                            stats_dic[key] = dict()
                        stats_dic[key][val] = f'{results[i]}'

            if test in ('kurtosis', 'skewness'):
                for i in range(len(groups)):
                    key = f'{groups[i]}, {params}'
                    if key not in stats_dic:
                        stats_dic[key] = dict()
                    stats_dic[key][val] = f'{results[i]}'

            elif test in ('TTest', 'Welch', 'Bartlett', 'MannWhitneyu',
                          'Kruskal', 'ANOVA'):
                for i in range(len(groups)):
                    key1 = f'{test}, {params[0]}'
                    key2 = f'{test}, {params[1]}'
                    for key in (key1, key2):
                        if key not in stats_dic:
                            stats_dic[key] = dict()
                    stats_dic[key1][val] = f'{results[0]}'
                    stats_dic[key2][val] = f'{results[1]}'

                cols2color_sig.append(key2)

    df = tab.create_df_from_dict(stats_dic)
    df = df.astype(float)

    # Creating new adjusted DataFrame with sub-indexes
    ls_tests_dup = []
    ls_param = []
    ls_keys = list(stats_dic.keys())

    mean_gr_done = False

    for test in ls_tests:
        if test in ('mean', 'std'):
            for i in range(0, len(groups)):
                ls_tests_dup.append('mean/std')
            if mean_gr_done == False:
                for i in range(len(groups)):
                    ls_param.append('gr' + str(i + 1) + ' (val=' +
                                    f'{groups[i]}' + ')')
                    ls_param.append('gr' + str(i + 1) + ' (val=' +
                                    f'{groups[i]}' + ')')
                    mean_gr_done = True

        elif test in ('kurtosis', 'skewness'):
            for i in range(0, len(groups)):
                ls_tests_dup.append(test)
                ls_param.append('gr' + str(i + 1) + ' (val=' + f'{groups[i]}' +
                                ')')

        elif test in ('TTest', 'Welch', 'Bartlett', 'MannWhitneyu', 'Kruskal',
                      'ANOVA'):
            ls_tests_dup.append(test)
            ls_tests_dup.append(test)

    for key in ls_keys[4 * (len(groups)):]:
        ls_param.append((str(key))[-1])

    col = [ls_tests_dup, ls_param]
    tuples = list(zip(*col))

    df_new = pd.DataFrame(df.values,
                          index=pd.Index(df.index),
                          columns=pd.MultiIndex.from_tuples(tuples))

    df_new = df_new.round(3)

    return df_new, stats_dic, cols2color_sig
コード例 #13
0
ファイル: stats_stats.py プロジェクト: alexhanganu/nimb
def mkstatisticsf(df_4stats,
                  groups,
                  group_col,
                  path2save,
                  make_with_colors=True):
    '''Creates discriptive statistical file for publication,
        based on provided pandas.DataFrame
        Works only on 2 groups
    Args: df_4stats: pandas.DataFrame
        group: list/ tuple of groups as str/int
        group_col: str() column name in df_4stats that has the group names from group
        path_2save: abspath to save the descrptive files
        make_with_colors: will create an additional .xlsx file with 
                        colored significant results,
                        provided xlwt is installed
    Return:
        json file with results
        .csv file with results
        .xlsx file with results with red colored significant ones
    '''

    tab = Table()
    ls_tests = ('mean', 'std', 'kurtosis', 'skewness', 'TTest', 'Welch',
                'ANOVA', 'Bartlett', 'MannWhitneyu', 'Kruskal')

    groups_df = dict()
    for group in groups:
        groups_df[group] = tab.get_df_per_parameter(df_4stats, group_col,
                                                    group)

    stats_dic = dict()
    vals2chk = df_4stats.columns.tolist()
    if group_col in vals2chk:
        vals2chk.remove(group_col)

    cols2color_sig = list()
    groups = list(groups_df.keys())
    group1 = groups_df[groups[0]]
    group2 = groups_df[groups[1]]
    for test in ls_tests:
        for val in vals2chk:
            results, params = get_stats(test, group1[val], group2[val])
            if test in ('mean', 'std', 'kurtosis', 'skewness'):
                key1 = f'{groups[0]}, {params[0]}'
                key2 = f'{groups[1]}, {params[0]}'
            else:
                key1 = f'{test}, {params[0]}'
                key2 = f'{test}, {params[1]}'
                cols2color_sig.append(key2)
            for key in (key1, key2):
                if key not in stats_dic:
                    stats_dic[key] = dict()
            stats_dic[key1][val] = f'{results[0]}'
            stats_dic[key2][val] = f'{results[1]}'

    df = tab.create_df_from_dict(stats_dic)
    tab.save_df(df,
                os.path.join(path2save, 'stats_general.csv'),
                sheet_name='stats')
    utilities.save_json(stats_dic, os.path.join(path2save,
                                                'stats_general.json'))
    if make_with_colors:
        save_2xlsx_with_colors(df,
                               path2save=path2save,
                               cols2color_sig=cols2color_sig)