Python Table.save_df Exemples, stats.db_processing.Table.save_df Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : stats_stats.py Projet : alexhanganu/nimb

def save_df_Emmanuelle(df,
                       groups,
                       stats_dic,
                       cols2color_sig,
                       path2save,
                       make_with_colors,
                       extensions=('xlsx', 'csv', 'json')):

    if 'xlsx' in extensions:
        import openpyxl
        import string
        df.to_excel('stats_new.xlsx')
        ########## MERGE MEAN/STD SUB-INDEXES ################
        file = openpyxl.load_workbook('stats_new.xlsx')
        sheet = file['Sheet1']
        alpha = string.ascii_uppercase
        for ltr in range(len(alpha))[1:(2 * len(groups)) + 1:2]:
            cell1, cell2 = alpha[ltr] + str(2), alpha[ltr + 1] + str(2)
            sheet.merge_cells(str(cell1 + ':' + cell2))
        file.save('stats_new.xlsx')

    if 'json' in extensions:
        utilities.save_json(stats_dic, os.path.join(path2save, 'stats.json'))

    if 'csv' in extensions:
        tab = Table()
        tab.save_df(df,
                    os.path.join(path2save, 'stats_new.csv'),
                    sheet_name='stats')

    if make_with_colors:
        save_2xlsx_with_colors_Emmanuelle(df,
                                          'stats_new.xlsx',
                                          path2save,
                                          'stats_wcolors.xlsx',
                                          cols2color_sig=cols2color_sig)

Exemple #2

0

Afficher le fichier

class ClusterFile2CSV():

    def __init__(self,
                file_abspath,
                result_abspath):
        from stats.db_processing import Table
        self.contrasts = fs_definitions.GLMcontrasts['contrasts']
        self.get_explanations()

        self.col_4constrasts = "Contrast"
        self.header = ("ClusterNo",
                      "Max", "VtxMax", "Size(mm^2)", 
                      "TalX", "TalY", "TalZ",
                      "CWP", "CWPLow", "CWPHi",
                      "NVtxs", "WghtVtx",
                      "Annot", self.col_4constrasts, "Explanation")
        self.length_matrix = len(self.header)
        self.content = open(file_abspath, 'r').readlines()
        self.result_abspath = result_abspath
        self.tab = Table()
        self.ls_vals_2chk = self.contrasts.keys()
        self.run()

    def run(self):
        d = dict()
        i = 0

        while i < len(self.content):
            line = self.content[i].replace('\n','')
            if self.chk_if_vals_in_line(line):
                expl = self.content[i+1].replace('\n','').replace(';','.')
                d[i] = ['','','','','','','','','','','','','', line, expl,]
                i += 2
            else:
                line = self.clean_nans_from_list(line.split(' '))
                i += 1
                if len(line) != 0:
                    d[i] = line + ['','']
        self.save_2table(d)

    def save_2table(self, d):
        df = self.tab.create_df_from_dict(d).T
        column_names = {i[0]:i[1] for i in list(zip(df.columns, self.header))}
        df = df.rename(columns = column_names)
        df = df.set_index(df[self.col_4constrasts])
        df = df.drop(columns = [self.col_4constrasts])
        self.tab.save_df(df, self.result_abspath)

    def chk_if_vals_in_line(self, line):
        '''will use each value from self.ls_vals_2chk
            if present in the line:
            will return True and break
            else: return False
        '''
        exists     = False

        for val_2chk in self.ls_vals_2chk:
            if val_2chk in line:
                exists = True
                break
        return exists

    def clean_nans_from_list(self, ls):
        for i in ls[::-1]:
            if i == '':
                ls.remove(i)
        return ls

    def get_explanations(self):
        self.explanations = list()
        for key in self.contrasts:
            for file_name in self.contrasts[key]:
                self.explanations.append(self.contrasts[key][file_name][1])

Exemple #3

0

Afficher le fichier

class CheckIfReady4GLM():
    def __init__(self, nimb_vars, fs_vars, proj_vars, f_ids_processed,
                 f_GLM_group, FS_GLM_dir):
        self.proj_vars = proj_vars
        self.vars_fs = fs_vars
        self.FS_SUBJECTS_DIR = fs_vars['SUBJECTS_DIR']
        self.NIMB_PROCESSED_FS = fs_vars['NIMB_PROCESSED']
        self.f_ids_processed = f_ids_processed
        self.f_GLM_group = f_GLM_group
        self.FS_GLM_dir = FS_GLM_dir
        self.archive_type = '.zip'
        self.tab = Table()
        self.miss = dict()
        self.ids_4fs_glm = dict()
        self.df = self.tab.get_df(self.f_GLM_group)
        self.bids_ids = self.df[self.proj_vars['id_col']].tolist()
        self.ids_exclude_glm = os.path.join(self.FS_GLM_dir,
                                            'excluded_from_glm.json')

    def chk_if_subjects_ready(self):

        fs_proc_ids = self.get_ids_processed()
        miss_bids_ids = [
            i for i in self.bids_ids if i not in fs_proc_ids.keys()
        ]
        if miss_bids_ids:
            print(
                f'    {len(miss_bids_ids)} IDs are missing from file: {self.f_ids_processed}'
            )
            print(f'        first 5 IDs are: {self.f_ids_processed[:5]}')
            for bids_id in miss_bids_ids:
                self.add_to_miss(bids_id, 'id_missing')

        if len(miss_bids_ids) < len(fs_proc_ids.keys()):
            for bids_id in [
                    i for i in self.bids_ids if i not in miss_bids_ids
            ]:
                fs_proc_id = fs_proc_ids[bids_id].replace(
                    self.archive_type, '')
                if os.path.exists(os.path.join(self.FS_SUBJECTS_DIR, bids_id)):
                    self.ids_4fs_glm[bids_id] = bids_id
                    self.chk_glm_files(bids_id)
                elif os.path.exists(
                        os.path.join(self.FS_SUBJECTS_DIR, fs_proc_id)):
                    self.ids_4fs_glm[bids_id] = fs_proc_id
                    self.chk_glm_files(fs_proc_id)
                else:
                    print(f'id {bids_id} or freesurfer id {fs_proc_id} \
                        are missing from the {self.FS_SUBJECTS_DIR} folder')
                    self.add_to_miss(bids_id, 'id_missing')
            if self.miss.keys():
                print("    missing files and ids: ", self.miss)
                save_json(self.miss, self.ids_exclude_glm, print_space=8)
                subjs_missing = len(self.miss.keys())
                subjs_present = len(self.ids_4fs_glm.keys())
                print(f'    Number of participants ready for FreeSurfer GLM:')
                print(f'        in the folder: {self.FS_SUBJECTS_DIR}')
                print(f'        {subjs_present} present')
                print(f'        {subjs_missing} missing')
                not_ready = [
                    i for i in self.miss if "id_missing" not in self.miss[i]
                ]
                maybe_archived = [i for i in self.miss if i not in not_ready]
                if maybe_archived:
                    print("   MAYBE archived: ", maybe_archived)
                    q = "    EXCEPTION! Some IDs are missing, but they could be archived.\n\
                    Do you want to do glm analysis with current subjects (y) or try to check the archive (n) ? (y/n)\n\
                        (note: if you answer NO, you will be asked to unarchive the \n\
                        processed folders of IDs if they are present in FREESURFER_PROCESSED)"

                    if get_yes_no(q) == 1:
                        self.create_fs_glm_df()
                        return True, list()
                    else:
                        return False, maybe_archived
                if not_ready:
                    print(
                        "    MISSING FILES: these participant CANNOT be included in the GLM analysis: ",
                        not_ready)
                    q = "    EXCEPTION! Some IDs have missing files and they MUST be excluded from analysis.\n\
                    Do you want to continue without excluded IDs ? (y/n)"

                    if get_yes_no(q) == 1:
                        self.create_fs_glm_df()
                        return True, list()
                    else:
                        return False, not_ready
            else:
                self.create_fs_glm_df()
                return True, list()
        else:
            print('    no ids found')
            return False, list()

    def chk_glm_files(self, bids_id):
        '''it is expected that the BIDS IDs are located in FREESURFER -> SUBJECTS_DIR
            script checks if subjects are present
        Args:
            bids_id: ID of the subject to chk
        Return:
            populates list of missing subjects
            populates dict with ids
        '''
        files_not_ok = fs_definitions.ChkFSQcache(self.FS_SUBJECTS_DIR,
                                                  bids_id, self.vars_fs).miss
        if files_not_ok:
            for file in files_not_ok[bids_id]:
                self.add_to_miss(bids_id, file)
            return False
        else:
            return True

    def create_fs_glm_df(self):
        self.rm_missing_ids()
        tmp_id = 'fs_id'
        print('    creating the glm file for FreeSurfer GLM analysis')
        d_ids = {
            self.proj_vars['id_col']:
            [i for i in list(self.ids_4fs_glm.keys())],
            tmp_id: [i for i in list(self.ids_4fs_glm.values())]
        }
        fs_proc_df = self.tab.create_df_from_dict(d_ids)
        fs_proc_df = self.tab.change_index(fs_proc_df,
                                           self.proj_vars['id_col'])
        grid_fs_df_pre = self.tab.change_index(self.df,
                                               self.proj_vars['id_col'])
        self.df_ids = self.tab.join_dfs(grid_fs_df_pre,
                                        fs_proc_df,
                                        how='outer')
        self.df_ids.rename(columns={tmp_id: self.proj_vars['id_col']},
                           inplace=True)
        self.df_ids = self.tab.change_index(self.df_ids,
                                            self.proj_vars['id_col'])
        self.tab.save_df(self.df_ids, self.f_GLM_group)
        PrepareForGLM(self.FS_SUBJECTS_DIR, self.FS_GLM_dir, self.f_GLM_group,
                      self.proj_vars, self.vars_fs)

    def rm_missing_ids(self):
        ls_ix_2rm = list()
        for ix in self.df.index:
            bids_id = self.df.at[ix, self.proj_vars['id_col']]
            if bids_id not in self.ids_4fs_glm.keys():
                ls_ix_2rm.append(ix)
        len_miss = len(ls_ix_2rm)
        if len_miss == 0:
            print(f'        ALL subjects are present')
        else:
            print(f'        {len_miss} subjects are missing')
            print(f'            they will be removed from futher analysis')
        self.df = self.df.drop(ls_ix_2rm)

    def get_ids_processed(self):
        '''retrieves the bids names of the IDs provided in the GLM file.
            It is expected that each project had a group of subjects that are present in the dataset
            it is expected that BIDS names are the ones used in the groups_glm file for the ids
            the f_ids.json has the BIDS names of the subjects, and for each BIDS name
            has the corresponding names of the source file/freesurfer/nilearn/dipy processed ziped files
            see nimb/example/f_ids.json
        '''
        print('    extracting list of ids that were processed with FreeSurfer')
        print(f'        in the file{self.f_ids_processed}')
        self.ids_bids_proc_all = self.read_json(self.f_ids_processed)
        return {
            i: self.ids_bids_proc_all[i][DEFAULT.freesurfer_key]
            for i in self.ids_bids_proc_all
        }
        # return {i: 'path' for i in self.ids_bids_proc_all if self.ids_bids_proc_all[i]['source'] in ids_src_glm_file} #old version

    def add_to_miss(self, bids_id, file):
        '''add to the list of missing subjects
        '''
        if bids_id not in self.miss:
            self.miss[bids_id] = list()
        self.miss[bids_id].append(file)
        if bids_id in self.ids_4fs_glm:
            self.ids_4fs_glm.pop(bids_id, None)

    def read_json(self, f):
        '''read a json file
        '''
        with open(f, 'r') as jf:
            return json.load(jf)

Exemple #4

0

Afficher le fichier

Fichier : stats_stats.py Projet : alexhanganu/nimb

def mkstatisticsf(df_4stats,
                  groups,
                  group_col,
                  path2save,
                  make_with_colors=True):
    '''Creates discriptive statistical file for publication,
        based on provided pandas.DataFrame
        Works only on 2 groups
    Args: df_4stats: pandas.DataFrame
        group: list/ tuple of groups as str/int
        group_col: str() column name in df_4stats that has the group names from group
        path_2save: abspath to save the descrptive files
        make_with_colors: will create an additional .xlsx file with 
                        colored significant results,
                        provided xlwt is installed
    Return:
        json file with results
        .csv file with results
        .xlsx file with results with red colored significant ones
    '''

    tab = Table()
    ls_tests = ('mean', 'std', 'kurtosis', 'skewness', 'TTest', 'Welch',
                'ANOVA', 'Bartlett', 'MannWhitneyu', 'Kruskal')

    groups_df = dict()
    for group in groups:
        groups_df[group] = tab.get_df_per_parameter(df_4stats, group_col,
                                                    group)

    stats_dic = dict()
    vals2chk = df_4stats.columns.tolist()
    if group_col in vals2chk:
        vals2chk.remove(group_col)

    cols2color_sig = list()
    groups = list(groups_df.keys())
    group1 = groups_df[groups[0]]
    group2 = groups_df[groups[1]]
    for test in ls_tests:
        for val in vals2chk:
            results, params = get_stats(test, group1[val], group2[val])
            if test in ('mean', 'std', 'kurtosis', 'skewness'):
                key1 = f'{groups[0]}, {params[0]}'
                key2 = f'{groups[1]}, {params[0]}'
            else:
                key1 = f'{test}, {params[0]}'
                key2 = f'{test}, {params[1]}'
                cols2color_sig.append(key2)
            for key in (key1, key2):
                if key not in stats_dic:
                    stats_dic[key] = dict()
            stats_dic[key1][val] = f'{results[0]}'
            stats_dic[key2][val] = f'{results[1]}'

    df = tab.create_df_from_dict(stats_dic)
    tab.save_df(df,
                os.path.join(path2save, 'stats_general.csv'),
                sheet_name='stats')
    utilities.save_json(stats_dic, os.path.join(path2save,
                                                'stats_general.json'))
    if make_with_colors:
        save_2xlsx_with_colors(df,
                               path2save=path2save,
                               cols2color_sig=cols2color_sig)