Ejemplo n.º 1
0
 def get_local_remote_dir(self, dir_data, _dir='None'):
     location = dir_data[0]
     dir_abspath = dir_data[1]
     print(f'{LogLVL.lvl2}folder {dir_abspath}')
     print(f'{LogLVL.lvl3}is located on: {location}')
     if location == 'local':
         if not os.path.exists(dir_abspath):
             dir_abspath = get_userdefined_paths(f'{_dir} folder',
                                                 dir_abspath,
                                                 '',
                                                 create=False)
             makedir_ifnot_exist(dir_abspath)
             if _dir != 'None':
                 from setup.get_credentials_home import _get_credentials_home
                 if _dir in self.all_vars.projects[self.project]:
                     self.all_vars.projects[
                         self.project][_dir][1] = dir_abspath
                     abs_path_projects = os.path.join(
                         _get_credentials_home(), 'projects.json')
                     save_json(self.all_vars.projects, abs_path_projects)
                 else:
                     print(
                         '    folder to change is not located in the projects.json variables'
                     )
             else:
                 print(
                     '    Folder to change is not defined, cannot create a new one.'
                 )
         return True, dir_abspath, 'local'
     else:
         return False, dir_abspath, location
Ejemplo n.º 2
0
    def chk_stats(self):
        """
        check if variables are defined in json
        :param config_file: path to configuration json file
        :return: new version, populated with missing values
        """
        default_stats = self.load_file('stats', default=True)

        update_stats = False
        for key in [i for i in default_stats.keys() if 'EXPLANATION' not in i]:
            if key not in self.stats_vars:
                print('adding missing key {} to stats'.format(key))
                self.stats_vars[key] = default_stats[key]
                update_stats = True
            for subkey in default_stats[key]:
                if subkey not in self.stats_vars[key]:
                    print('adding missing subkey {} to stats group: {}'.format(
                        subkey, key))
                    self.stats_vars[key][subkey] = default_stats[key][subkey]
                    self.stats_vars['EXPLANATION'][subkey] = default_stats[
                        'EXPLANATION'][subkey]
                    update_stats = True
                if isinstance(subkey, list):
                    if not isinstance(self.stats_vars[key][subkey], list):
                        print('    types are different {}'.format(subkey))
        if update_stats:
            save_json(self.stats_vars,
                      path.join(self.credentials_home, 'stats.json'))
Ejemplo n.º 3
0
    def chk_location_vars(self):
        default_local = self.load_file('local', default=True)

        update = False
        for location in self.location_vars:
            for Key in default_local:
                if Key not in self.location_vars[location]:
                    print('adding missing key {} to location: {}'.format(
                        Key, location))
                    self.location_vars[location][Key] = default_local[Key]
                    update = True
                for subkey in default_local[Key]:
                    if subkey not in self.location_vars[location][Key]:
                        print(
                            'adding missing subkey {} to location: {}, key: {}'
                            .format(subkey, location, Key))
                        self.location_vars[location][Key][
                            subkey] = default_local[Key][subkey]
                        update = True
            if location == 'local':
                self.chk_paths(self.location_vars[location])
            if update:
                self.location_vars[location]['EXPLANATION'] = default_local[
                    'EXPLANATION']
                print('must update location: {}'.format(location))
                save_json(self.location_vars[location],
                          path.join(self.credentials_home, location + '.json'))
Ejemplo n.º 4
0
    def chk_project_vars(self):
        """
        check if variables are defined in json
        :param config_file: path to configuration json file
        :return: new version, populated with missing values
        """
        update = False
        if self.params:
            update = self.set_stats()

        default_project = self.load_file('projects', default=True)
        for subkey in default_project[DEFAULT.default_project]:
            if subkey not in self.projects[self.project]:
                print('adding missing subkey {} to project: {}'.format(
                    subkey, self.project))
                self.projects[self.project][subkey] = default_project[
                    DEFAULT.default_project][subkey]
                self.projects['EXPLANATION'][subkey] = default_project[
                    'EXPLANATION'][subkey]
                update = True
            if isinstance(subkey, list):
                if not isinstance(self.projects[self.project][subkey], list):
                    print('types are different {}'.format(subkey))
        if update:
            save_json(self.projects,
                      path.join(self.credentials_home, 'projects.json'))

        for project in DEFAULT.project_ids:
            if project not in self.projects:
                self.projects[project] = default_project[
                    DEFAULT.default_project]
Ejemplo n.º 5
0
 def make_f_subjects_2b_processed(self, location, unprocessed_d):
     NIMB_tmp_loc = self.locations[location]['NIMB_PATHS']['NIMB_tmp']
     f_abspath = os.path.join(NIMB_tmp_loc, DEFAULT.f_subjects2proc)
     print(f'{LogLVL.lvl2}creating file: {f_abspath}')
     # print(unprocessed_d)
     for _id_bids in unprocessed_d:
         unprocessed_d[_id_bids] = self.adjust_paths_2data(
             NIMB_tmp_loc, unprocessed_d[_id_bids])
         print(unprocessed_d[_id_bids])
     save_json(unprocessed_d, f_abspath)
Ejemplo n.º 6
0
 def set_project(self, location):
     if path.exists(path.join(self.credentials_home, 'projects.json')):
         projects = load_json(
             path.join(self.credentials_home, 'projects.json'))
         projects['LOCATION'].append(location)
         save_json(projects,
                   path.join(self.credentials_home, 'projects.json'))
         # self.save_json('projects.json', projects, self.credentials_home)
         new_loc = load_json(
             path.join(self.credentials_home, 'remote1.json'))
         new_loc['USER']['user'] = self.username
         save_json(new_loc,
                   path.join(self.credentials_home, location + '.json'))
Ejemplo n.º 7
0
 def chk_spaces(self):
     if self.spaces_in_paths:
         f_paths_spaces = os.path.join(self.NIMB_tmp,
                                       'paths_with_spaces.json')
         save_json(self.spaces_in_paths, f_paths_spaces)
         len_spaces = len(self.spaces_in_paths)
         log.info(
             f'    ATTENTION: ERR: paths of {len_spaces} subjects have spaces \
             and will not be processed by FreeSurfer')
         log.info(
             f'    ATTENTION: paths with spaces can be found here: {f_paths_spaces}'
         )
         log.info(
             '    ATTENTION: nimb can change spaces to underscores when adding the parameter: -fix-spaces; \
             example: python nimb.py -process classify -project Project -fix-spaces'
         )
Ejemplo n.º 8
0
    def update_config(self):
        """....."""
        self.add_criterion = False
        self.config = load_json(self.config_file)
        criterion1 = 'SeriesDescription'
        sidecar_crit1 = self.sidecar_content[criterion1]

        list_criteria = list()
        for des in self.config['descriptions']:
            if des['dataType'] == self.data_Type and \
                des["modalityLabel"] == self.modalityLabel:
                list_criteria.append(des)
        if len(list_criteria) > 0:
            print(
                f'{" " *12}> there is at least one configuration with dataType: {self.data_Type}'
            )
            for des in list_criteria[::-1]:
                if criterion1 in des['criteria']:
                    if des['criteria'][criterion1] == sidecar_crit1:
                        print(
                            f'{" " *12} sidecar is present in the config file. Add another sidecar criterion in the dcm2bids_helper.py script'
                        )
                        self.add_criterion = True
                        sys.exit(0)
                    else:
                        list_criteria.remove(des)
        if len(list_criteria) > 0:
            print(
                f'{" " *12}> cannot find a correct sidecar location. Please add more parameters.'
            )
        if len(list_criteria) == 0:
            print(f'{" " *12}> updating config with value: {sidecar_crit1}')
            new_des = {
                'dataType': self.data_Type,
                'modalityLabel': self.modalityLabel,
                'criteria': {
                    criterion1: sidecar_crit1
                }
            }
            self.config['descriptions'].append(new_des)
            self.update = True

        if self.update:
            self.run_stt = 0
            save_json(self.config, self.config_file, print_space=12)
        else:
            print(f'{" " *12}criterion {criterion1} present in config file')
Ejemplo n.º 9
0
 def check_nan(self, df, err_file_abspath):
     d_err = dict()
     cols_with_nans = list()
     for col in df.columns:
         if df[col].isnull().values.any():
             ls = df[col].isnull().tolist()
             for val in ls:
                 if val:
                     ix = df.index[ls.index(val)]
                     if ix not in d_err:
                         d_err[ix] = list()
                     if col not in d_err[ix]:
                         d_err[ix].append(col)
                     if col not in cols_with_nans:
                         cols_with_nans.append(col)
     save_json(d_err, err_file_abspath)
     return d_err, cols_with_nans
Ejemplo n.º 10
0
    def run(self):
        self.dir_2classify = self.get_dirs2classify()
        for self._dir in self.dir_2classify:
            self.archived = False
            dir_abspath = os.path.join(self.MAIN_DIR, self._dir)
            self.main = self.get_dict_4classification(dir_abspath)
            paths_2mris = self._get_MR_paths(dir_abspath)

            if paths_2mris:
                if self.archived:
                    bids_ids = self.get_bids_ids(paths_2mris)
                    for bids_id in bids_ids:
                        # self.main[bids_id] = dict()
                        paths_2classify = self.get_content_per_bids_id(
                            paths_2mris, bids_id)
                        BIDS_classifed = self.classify_2bids(paths_2classify)
                        self.main[bids_id] = BIDS_classifed
                        self.main[bids_id]['archived'] = str(dir_abspath)
                else:
                    # self.main[self._dir] = dict()
                    paths_2classify = paths_2mris
                    BIDS_classifed = self.classify_2bids(paths_2classify)
                    self.main[self._dir] = BIDS_classifed
                    self.main[self._dir]['archived'] = ''
                log.info("    saving classification file")
                save_json(self.main, self.f_nimb_classified)
            else:
                log.info(
                    f'    there are no file or folders in the provided path to read: {dir_abspath}'
                )
        log.info(
            f"classification of new subjects is complete, file located at: {self.f_nimb_classified}"
        )
        if self.multiple_T1 == 1:
            from classification.get_mr_params import verify_MRIs_for_similarity
            self.main = verify_MRIs_for_similarity(self.main, self.NIMB_tmp,
                                                   self.flair_t2_add)
        else:
            self.main = self.keep_only1_T1()

        self.chk_spaces()
        if os.path.exists(self.f_nimb_classified):
            return True, self.main
        else:
            return False, self.main
Ejemplo n.º 11
0
def save_df_Emmanuelle(df,
                       groups,
                       stats_dic,
                       cols2color_sig,
                       path2save,
                       make_with_colors,
                       extensions=('xlsx', 'csv', 'json')):

    if 'xlsx' in extensions:
        import openpyxl
        import string
        df.to_excel('stats_new.xlsx')
        ########## MERGE MEAN/STD SUB-INDEXES ################
        file = openpyxl.load_workbook('stats_new.xlsx')
        sheet = file['Sheet1']
        alpha = string.ascii_uppercase
        for ltr in range(len(alpha))[1:(2 * len(groups)) + 1:2]:
            cell1, cell2 = alpha[ltr] + str(2), alpha[ltr + 1] + str(2)
            sheet.merge_cells(str(cell1 + ':' + cell2))
        file.save('stats_new.xlsx')

    if 'json' in extensions:
        utilities.save_json(stats_dic, os.path.join(path2save, 'stats.json'))

    if 'csv' in extensions:
        tab = Table()
        tab.save_df(df,
                    os.path.join(path2save, 'stats_new.csv'),
                    sheet_name='stats')

    if make_with_colors:
        save_2xlsx_with_colors_Emmanuelle(df,
                                          'stats_new.xlsx',
                                          path2save,
                                          'stats_wcolors.xlsx',
                                          cols2color_sig=cols2color_sig)
Ejemplo n.º 12
0
    def __init__(self,
                all_vars,
                PATHglm,
                sig_fdr_thresh = 3.0):
        '''
        sig_fdr_thresh at 3.0 corresponds to p = 0.001;
        for p=0.05 use value 1.3,
        but it should be used ONLY for visualisation.
        '''

        vars_fs                    = all_vars.location_vars['local']["FREESURFER"]
        self.FREESURFER_HOME       = vars_fs["FREESURFER_HOME"]
        self.SUBJECTS_DIR          = vars_fs["SUBJECTS_DIR"]
        self.measurements          = vars_fs["GLM_measurements"]
        self.thresholds            = vars_fs["GLM_thresholds"]
        self.mc_cache_thresh       = vars_fs["GLM_MCz_cache"]
        param                      = fs_definitions.FSGLMParams(PATHglm)
        self.PATHglm               = PATHglm
        self.sig_fdr_thresh        = sig_fdr_thresh

        self.PATHglm_glm           = param.PATHglm_glm
        self.PATH_img              = param.PATH_img
        self.PATHglm_results       = param.PATHglm_results
        self.sig_fdr_json          = param.sig_fdr_json
        self.sig_mc_json           = param.sig_mc_json
        self.err_mris_preproc_file = param.err_mris_preproc_file
        self.mcz_sim_direction     = param.mcz_sim_direction
        self.hemispheres           = fs_definitions.hemi
        self.GLM_sim_fwhm4csd      = param.GLM_sim_fwhm4csd
        self.GLM_MCz_meas_codes    = param.GLM_MCz_meas_codes
        self.cluster_stats         = param.cluster_stats
        self.cluster_stats_2csv    = param.cluster_stats_2csv
        self.sig_contrasts         = param.sig_contrasts

        RUN = True
        # get files_glm.
        try:
            files_glm = load_json(param.files_for_glm)
            print(f'    successfully uploaded file: {param.files_for_glm}')
        except ImportError as e:
            print(e)
            print(f'    file {param.files_for_glm} is missing')
            RUN = False

        # get file with subjects per group
        try:
            subjects_per_group = load_json(param.subjects_per_group)
            print(f'    successfully uploaded file: {param.subjects_per_group}')
        except Exception as e:
            print(e)
            print(f'    file {param.subjects_per_group} is missing')
            RUN = False

        # checking that all subjects are present
        print('    subjects are located in: {}'.format(self.SUBJECTS_DIR))
        for group in subjects_per_group:
            for subject in subjects_per_group[group]:
                if subject not in os.listdir(self.SUBJECTS_DIR):
                    print(f' subject is missing from FreeSurfer Subjects folder: {subject}')
                    RUN = False
                    break

        for subdir in (self.PATHglm_glm, self.PATHglm_results, self.PATH_img):
            if not os.path.isdir(subdir): os.makedirs(subdir)
        if not os.path.isfile(self.sig_contrasts):
            open(self.sig_contrasts,'w').close()

        if RUN:
            self.err_preproc  = list()
            self.sig_fdr_data = dict()
            self.sig_mc_data  = dict()
            self.run_loop(files_glm)
            if self.err_preproc:
                save_json(self.err_preproc, self.err_mris_preproc_file)
            if self.sig_fdr_data:
                save_json(self.sig_fdr_data, self.sig_fdr_json)
            if self.sig_mc_data:
                save_json(self.sig_mc_data, self.sig_mc_json)
            if os.path.exists(self.cluster_stats):
                ClusterFile2CSV(self.cluster_stats,
                                self.cluster_stats_2csv)
            print('\n\nGLM DONE')
        else:
            sys.exit('some ERRORS were found. Cannot perform FreeSurfer GLM')
Ejemplo n.º 13
0
    def setup_default_local_nimb(self):
        shutil.copy(
            os.path.join(os.path.dirname(os.path.abspath(__file__)),
                         'local.json'),
            os.path.join(self.credentials_home, 'remote1.json'))

        local_vars = self.load_file('local', default=True)
        local_vars['USER']['user'] = _get_username()
        '''setting NIMB paths'''
        NIMB_PATHS = local_vars['NIMB_PATHS']
        NIMB_HOME = os.path.abspath(
            os.path.join(os.path.dirname(__file__), '..'))
        print('NIMB_HOME is: ', NIMB_HOME)
        NIMB_PATHS['NIMB_HOME'] = NIMB_HOME
        new_NIMB_tmp = get_userdefined_paths(
            'NIMB temporary folder nimb_tmp',
            os.path.join(NIMB_HOME.replace('/nimb/nimb', ''), 'nimb_tmp'),
            'nimb_tmp')
        if not path.exists(new_NIMB_tmp):
            makedirs(new_NIMB_tmp)
        NIMB_PATHS['NIMB_tmp'] = new_NIMB_tmp
        NIMB_PATHS['NIMB_NEW_SUBJECTS'] = os.path.join(new_NIMB_tmp,
                                                       'nimb_new_subjects')
        '''setting FREESURFER paths'''
        new_freesurfer_path = get_userdefined_paths(
            'FreeSurfer folder',
            os.path.join(NIMB_HOME.replace('/nimb/nimb', ''), 'freesurfer'),
            'freesurfer')

        new_conda_path = new_freesurfer_path.replace("freesurfer", "conda3")
        NIMB_PATHS['conda_home'] = new_conda_path
        NIMB_PATHS['miniconda_python_run'] = os.path.join(
            new_conda_path, 'bin',
            'python3.7').replace(os.path.expanduser("~"), "~")
        local_vars['NIMB_PATHS'] = NIMB_PATHS

        FS_PATHS = local_vars['FREESURFER']
        FS_PATHS['FREESURFER_HOME'] = new_freesurfer_path
        FS_PATHS['FS_SUBJECTS_DIR'] = os.path.join(new_freesurfer_path,
                                                   'subjects')
        FS_PATHS[
            'export_FreeSurfer_cmd'] = "export FREESURFER_HOME=" + new_freesurfer_path
        if not os.path.exists(new_freesurfer_path):
            FreeSurfer_install = get_yes_no(
                f'do you want to install FreeSurfer at the provided location {new_freesurfer_path}? (y/n)'
            )
            FS_PATHS['FreeSurfer_install'] = FreeSurfer_install
            if FreeSurfer_install == 1:
                freesurfer_license = get_FS_license()
                FS_PATHS['freesurfer_license'] = freesurfer_license
        else:
            FS_PATHS['FreeSurfer_install'] = 1
        local_vars['FREESURFER'] = FS_PATHS
        '''setting PROCESSING paths'''
        environ = get_yes_no(
            "Will this account use slurm or tmux for processing ? (y/n; y=slurm/ n=tmux)"
        )
        if environ == 1:
            local_vars['PROCESSING']['processing_env'] = 'slurm'
            supervisor = input(
                "For some slurm environments a supervisor account is required. Please type supervisor account name or leave blank:"
            )
            if supervisor:
                print('supervisor account name is: {}'.format(supervisor))
                local_vars['USER']['supervisor_account'] = str(supervisor)
                local_vars['PROCESSING']['supervisor_account'] = str(
                    supervisor)
                local_vars['PROCESSING']['text4_scheduler'][1] = local_vars[
                    'PROCESSING']['text4_scheduler'][1].replace(
                        'def-supervisor', supervisor)
            else:
                print('supervisor account not provided')
                local_vars['USER']['supervisor_account'] = ''
                local_vars['PROCESSING']['supervisor_account'] = ''
                local_vars['PROCESSING']['text4_scheduler'].remove(
                    local_vars['PROCESSING']['text4_scheduler'][1])
        else:
            print('environment for processing is: {}'.format(environ))
            local_vars['PROCESSING']['processing_env'] = 'tmux'
        save_json(local_vars, os.path.join(self.credentials_home,
                                           'local.json'))
        self.get_all_locations_vars()
Ejemplo n.º 14
0
    def chk_if_subjects_ready(self):

        fs_proc_ids = self.get_ids_processed()
        miss_bids_ids = [
            i for i in self.bids_ids if i not in fs_proc_ids.keys()
        ]
        if miss_bids_ids:
            print(
                f'    {len(miss_bids_ids)} IDs are missing from file: {self.f_ids_processed}'
            )
            print(f'        first 5 IDs are: {self.f_ids_processed[:5]}')
            for bids_id in miss_bids_ids:
                self.add_to_miss(bids_id, 'id_missing')

        if len(miss_bids_ids) < len(fs_proc_ids.keys()):
            for bids_id in [
                    i for i in self.bids_ids if i not in miss_bids_ids
            ]:
                fs_proc_id = fs_proc_ids[bids_id].replace(
                    self.archive_type, '')
                if os.path.exists(os.path.join(self.FS_SUBJECTS_DIR, bids_id)):
                    self.ids_4fs_glm[bids_id] = bids_id
                    self.chk_glm_files(bids_id)
                elif os.path.exists(
                        os.path.join(self.FS_SUBJECTS_DIR, fs_proc_id)):
                    self.ids_4fs_glm[bids_id] = fs_proc_id
                    self.chk_glm_files(fs_proc_id)
                else:
                    print(f'id {bids_id} or freesurfer id {fs_proc_id} \
                        are missing from the {self.FS_SUBJECTS_DIR} folder')
                    self.add_to_miss(bids_id, 'id_missing')
            if self.miss.keys():
                print("    missing files and ids: ", self.miss)
                save_json(self.miss, self.ids_exclude_glm, print_space=8)
                subjs_missing = len(self.miss.keys())
                subjs_present = len(self.ids_4fs_glm.keys())
                print(f'    Number of participants ready for FreeSurfer GLM:')
                print(f'        in the folder: {self.FS_SUBJECTS_DIR}')
                print(f'        {subjs_present} present')
                print(f'        {subjs_missing} missing')
                not_ready = [
                    i for i in self.miss if "id_missing" not in self.miss[i]
                ]
                maybe_archived = [i for i in self.miss if i not in not_ready]
                if maybe_archived:
                    print("   MAYBE archived: ", maybe_archived)
                    q = "    EXCEPTION! Some IDs are missing, but they could be archived.\n\
                    Do you want to do glm analysis with current subjects (y) or try to check the archive (n) ? (y/n)\n\
                        (note: if you answer NO, you will be asked to unarchive the \n\
                        processed folders of IDs if they are present in FREESURFER_PROCESSED)"

                    if get_yes_no(q) == 1:
                        self.create_fs_glm_df()
                        return True, list()
                    else:
                        return False, maybe_archived
                if not_ready:
                    print(
                        "    MISSING FILES: these participant CANNOT be included in the GLM analysis: ",
                        not_ready)
                    q = "    EXCEPTION! Some IDs have missing files and they MUST be excluded from analysis.\n\
                    Do you want to continue without excluded IDs ? (y/n)"

                    if get_yes_no(q) == 1:
                        self.create_fs_glm_df()
                        return True, list()
                    else:
                        return False, not_ready
            else:
                self.create_fs_glm_df()
                return True, list()
        else:
            print('    no ids found')
            return False, list()
Ejemplo n.º 15
0
 def run_anova(self, p_thresh, intercept_thresh, path2save):
     ls_err = list()
     for param_y in self.params_y:
         x = np.array(self.df[param_y])
         df_result = self.tab.get_clean_df()
         df_result_list = df_result.copy()
         df_result[param_y] = ''
         df_result_list[param_y] = ''
         ix = 1
         ixx = 1
         # print(f'    analysing {len(self.ls_cols4anova)} features for parameter: {param_y}')
         for col in self.ls_cols4anova:
             y = np.array(self.df[col])
             data_tmp = pd.DataFrame({'x': x, col: y})
             model = ols(col + " ~ x", data=data_tmp).fit()
             if model.pvalues.Intercept < p_thresh and model.pvalues.x < intercept_thresh:
                 measurement, structure, ls_err = self.fs_struc_meas.get(
                     col, ls_err)
                 if param_y not in self.sig_cols:
                     self.sig_cols[param_y] = dict()
                 self.sig_cols[param_y][col] = {
                     'rsquared': model.rsquared,
                     'rsquared-adjusted': model.rsquared_adj,
                     'F-statistic': model.fvalue,
                     'AIC': model.aic,
                     'BIC': model.bic,
                     'pvalue_slope': model.pvalues.x,
                     'pvalue_intercept': model.pvalues.Intercept,
                     'tvalue_slope': model.tvalues.x,
                     'tvalue_intercept': model.tvalues.Intercept,
                     'meas': measurement,
                     'struct': structure
                 }
                 df_result_list = self.populate_df(
                     df_result_list, ixx, {
                         param_y: structure,
                         'measure': measurement,
                         'pvalue': '%.4f' % model.pvalues.x
                     })
                 if structure not in df_result[param_y].tolist():
                     df_result = self.populate_df(
                         df_result, ix, {
                             param_y: structure,
                             measurement: '%.4f' % model.pvalues.x
                         })
                     ix += 1
                 else:
                     df_result = self.populate_df(
                         df_result,
                         df_result[param_y].tolist().index(structure),
                         {measurement: '%.4f' % model.pvalues.x})
                 ixx += 1
         self.tab.save_df_tocsv(
             df_result_list,
             path.join(path2save, f'anova_per_significance_{param_y}.csv'))
         self.tab.save_df_tocsv(
             df_result,
             path.join(path2save, f'anova_per_structure_{param_y}.csv'))
     save_json(self.sig_cols,
               path.join(path2save, f'anova_significant_features.json'))
     if self.print_not_FS:
         print('NOT freesurfer structures: ', ls_err)
Ejemplo n.º 16
0
def mkstatisticsf(df_4stats,
                  groups,
                  group_col,
                  path2save,
                  make_with_colors=True):
    '''Creates discriptive statistical file for publication,
        based on provided pandas.DataFrame
        Works only on 2 groups
    Args: df_4stats: pandas.DataFrame
        group: list/ tuple of groups as str/int
        group_col: str() column name in df_4stats that has the group names from group
        path_2save: abspath to save the descrptive files
        make_with_colors: will create an additional .xlsx file with 
                        colored significant results,
                        provided xlwt is installed
    Return:
        json file with results
        .csv file with results
        .xlsx file with results with red colored significant ones
    '''

    tab = Table()
    ls_tests = ('mean', 'std', 'kurtosis', 'skewness', 'TTest', 'Welch',
                'ANOVA', 'Bartlett', 'MannWhitneyu', 'Kruskal')

    groups_df = dict()
    for group in groups:
        groups_df[group] = tab.get_df_per_parameter(df_4stats, group_col,
                                                    group)

    stats_dic = dict()
    vals2chk = df_4stats.columns.tolist()
    if group_col in vals2chk:
        vals2chk.remove(group_col)

    cols2color_sig = list()
    groups = list(groups_df.keys())
    group1 = groups_df[groups[0]]
    group2 = groups_df[groups[1]]
    for test in ls_tests:
        for val in vals2chk:
            results, params = get_stats(test, group1[val], group2[val])
            if test in ('mean', 'std', 'kurtosis', 'skewness'):
                key1 = f'{groups[0]}, {params[0]}'
                key2 = f'{groups[1]}, {params[0]}'
            else:
                key1 = f'{test}, {params[0]}'
                key2 = f'{test}, {params[1]}'
                cols2color_sig.append(key2)
            for key in (key1, key2):
                if key not in stats_dic:
                    stats_dic[key] = dict()
            stats_dic[key1][val] = f'{results[0]}'
            stats_dic[key2][val] = f'{results[1]}'

    df = tab.create_df_from_dict(stats_dic)
    tab.save_df(df,
                os.path.join(path2save, 'stats_general.csv'),
                sheet_name='stats')
    utilities.save_json(stats_dic, os.path.join(path2save,
                                                'stats_general.json'))
    if make_with_colors:
        save_2xlsx_with_colors(df,
                               path2save=path2save,
                               cols2color_sig=cols2color_sig)