def get_local_remote_dir(self, dir_data, _dir='None'): location = dir_data[0] dir_abspath = dir_data[1] print(f'{LogLVL.lvl2}folder {dir_abspath}') print(f'{LogLVL.lvl3}is located on: {location}') if location == 'local': if not os.path.exists(dir_abspath): dir_abspath = get_userdefined_paths(f'{_dir} folder', dir_abspath, '', create=False) makedir_ifnot_exist(dir_abspath) if _dir != 'None': from setup.get_credentials_home import _get_credentials_home if _dir in self.all_vars.projects[self.project]: self.all_vars.projects[ self.project][_dir][1] = dir_abspath abs_path_projects = os.path.join( _get_credentials_home(), 'projects.json') save_json(self.all_vars.projects, abs_path_projects) else: print( ' folder to change is not located in the projects.json variables' ) else: print( ' Folder to change is not defined, cannot create a new one.' ) return True, dir_abspath, 'local' else: return False, dir_abspath, location
def chk_stats(self): """ check if variables are defined in json :param config_file: path to configuration json file :return: new version, populated with missing values """ default_stats = self.load_file('stats', default=True) update_stats = False for key in [i for i in default_stats.keys() if 'EXPLANATION' not in i]: if key not in self.stats_vars: print('adding missing key {} to stats'.format(key)) self.stats_vars[key] = default_stats[key] update_stats = True for subkey in default_stats[key]: if subkey not in self.stats_vars[key]: print('adding missing subkey {} to stats group: {}'.format( subkey, key)) self.stats_vars[key][subkey] = default_stats[key][subkey] self.stats_vars['EXPLANATION'][subkey] = default_stats[ 'EXPLANATION'][subkey] update_stats = True if isinstance(subkey, list): if not isinstance(self.stats_vars[key][subkey], list): print(' types are different {}'.format(subkey)) if update_stats: save_json(self.stats_vars, path.join(self.credentials_home, 'stats.json'))
def chk_location_vars(self): default_local = self.load_file('local', default=True) update = False for location in self.location_vars: for Key in default_local: if Key not in self.location_vars[location]: print('adding missing key {} to location: {}'.format( Key, location)) self.location_vars[location][Key] = default_local[Key] update = True for subkey in default_local[Key]: if subkey not in self.location_vars[location][Key]: print( 'adding missing subkey {} to location: {}, key: {}' .format(subkey, location, Key)) self.location_vars[location][Key][ subkey] = default_local[Key][subkey] update = True if location == 'local': self.chk_paths(self.location_vars[location]) if update: self.location_vars[location]['EXPLANATION'] = default_local[ 'EXPLANATION'] print('must update location: {}'.format(location)) save_json(self.location_vars[location], path.join(self.credentials_home, location + '.json'))
def chk_project_vars(self): """ check if variables are defined in json :param config_file: path to configuration json file :return: new version, populated with missing values """ update = False if self.params: update = self.set_stats() default_project = self.load_file('projects', default=True) for subkey in default_project[DEFAULT.default_project]: if subkey not in self.projects[self.project]: print('adding missing subkey {} to project: {}'.format( subkey, self.project)) self.projects[self.project][subkey] = default_project[ DEFAULT.default_project][subkey] self.projects['EXPLANATION'][subkey] = default_project[ 'EXPLANATION'][subkey] update = True if isinstance(subkey, list): if not isinstance(self.projects[self.project][subkey], list): print('types are different {}'.format(subkey)) if update: save_json(self.projects, path.join(self.credentials_home, 'projects.json')) for project in DEFAULT.project_ids: if project not in self.projects: self.projects[project] = default_project[ DEFAULT.default_project]
def make_f_subjects_2b_processed(self, location, unprocessed_d): NIMB_tmp_loc = self.locations[location]['NIMB_PATHS']['NIMB_tmp'] f_abspath = os.path.join(NIMB_tmp_loc, DEFAULT.f_subjects2proc) print(f'{LogLVL.lvl2}creating file: {f_abspath}') # print(unprocessed_d) for _id_bids in unprocessed_d: unprocessed_d[_id_bids] = self.adjust_paths_2data( NIMB_tmp_loc, unprocessed_d[_id_bids]) print(unprocessed_d[_id_bids]) save_json(unprocessed_d, f_abspath)
def set_project(self, location): if path.exists(path.join(self.credentials_home, 'projects.json')): projects = load_json( path.join(self.credentials_home, 'projects.json')) projects['LOCATION'].append(location) save_json(projects, path.join(self.credentials_home, 'projects.json')) # self.save_json('projects.json', projects, self.credentials_home) new_loc = load_json( path.join(self.credentials_home, 'remote1.json')) new_loc['USER']['user'] = self.username save_json(new_loc, path.join(self.credentials_home, location + '.json'))
def chk_spaces(self): if self.spaces_in_paths: f_paths_spaces = os.path.join(self.NIMB_tmp, 'paths_with_spaces.json') save_json(self.spaces_in_paths, f_paths_spaces) len_spaces = len(self.spaces_in_paths) log.info( f' ATTENTION: ERR: paths of {len_spaces} subjects have spaces \ and will not be processed by FreeSurfer') log.info( f' ATTENTION: paths with spaces can be found here: {f_paths_spaces}' ) log.info( ' ATTENTION: nimb can change spaces to underscores when adding the parameter: -fix-spaces; \ example: python nimb.py -process classify -project Project -fix-spaces' )
def update_config(self): """.....""" self.add_criterion = False self.config = load_json(self.config_file) criterion1 = 'SeriesDescription' sidecar_crit1 = self.sidecar_content[criterion1] list_criteria = list() for des in self.config['descriptions']: if des['dataType'] == self.data_Type and \ des["modalityLabel"] == self.modalityLabel: list_criteria.append(des) if len(list_criteria) > 0: print( f'{" " *12}> there is at least one configuration with dataType: {self.data_Type}' ) for des in list_criteria[::-1]: if criterion1 in des['criteria']: if des['criteria'][criterion1] == sidecar_crit1: print( f'{" " *12} sidecar is present in the config file. Add another sidecar criterion in the dcm2bids_helper.py script' ) self.add_criterion = True sys.exit(0) else: list_criteria.remove(des) if len(list_criteria) > 0: print( f'{" " *12}> cannot find a correct sidecar location. Please add more parameters.' ) if len(list_criteria) == 0: print(f'{" " *12}> updating config with value: {sidecar_crit1}') new_des = { 'dataType': self.data_Type, 'modalityLabel': self.modalityLabel, 'criteria': { criterion1: sidecar_crit1 } } self.config['descriptions'].append(new_des) self.update = True if self.update: self.run_stt = 0 save_json(self.config, self.config_file, print_space=12) else: print(f'{" " *12}criterion {criterion1} present in config file')
def check_nan(self, df, err_file_abspath): d_err = dict() cols_with_nans = list() for col in df.columns: if df[col].isnull().values.any(): ls = df[col].isnull().tolist() for val in ls: if val: ix = df.index[ls.index(val)] if ix not in d_err: d_err[ix] = list() if col not in d_err[ix]: d_err[ix].append(col) if col not in cols_with_nans: cols_with_nans.append(col) save_json(d_err, err_file_abspath) return d_err, cols_with_nans
def run(self): self.dir_2classify = self.get_dirs2classify() for self._dir in self.dir_2classify: self.archived = False dir_abspath = os.path.join(self.MAIN_DIR, self._dir) self.main = self.get_dict_4classification(dir_abspath) paths_2mris = self._get_MR_paths(dir_abspath) if paths_2mris: if self.archived: bids_ids = self.get_bids_ids(paths_2mris) for bids_id in bids_ids: # self.main[bids_id] = dict() paths_2classify = self.get_content_per_bids_id( paths_2mris, bids_id) BIDS_classifed = self.classify_2bids(paths_2classify) self.main[bids_id] = BIDS_classifed self.main[bids_id]['archived'] = str(dir_abspath) else: # self.main[self._dir] = dict() paths_2classify = paths_2mris BIDS_classifed = self.classify_2bids(paths_2classify) self.main[self._dir] = BIDS_classifed self.main[self._dir]['archived'] = '' log.info(" saving classification file") save_json(self.main, self.f_nimb_classified) else: log.info( f' there are no file or folders in the provided path to read: {dir_abspath}' ) log.info( f"classification of new subjects is complete, file located at: {self.f_nimb_classified}" ) if self.multiple_T1 == 1: from classification.get_mr_params import verify_MRIs_for_similarity self.main = verify_MRIs_for_similarity(self.main, self.NIMB_tmp, self.flair_t2_add) else: self.main = self.keep_only1_T1() self.chk_spaces() if os.path.exists(self.f_nimb_classified): return True, self.main else: return False, self.main
def save_df_Emmanuelle(df, groups, stats_dic, cols2color_sig, path2save, make_with_colors, extensions=('xlsx', 'csv', 'json')): if 'xlsx' in extensions: import openpyxl import string df.to_excel('stats_new.xlsx') ########## MERGE MEAN/STD SUB-INDEXES ################ file = openpyxl.load_workbook('stats_new.xlsx') sheet = file['Sheet1'] alpha = string.ascii_uppercase for ltr in range(len(alpha))[1:(2 * len(groups)) + 1:2]: cell1, cell2 = alpha[ltr] + str(2), alpha[ltr + 1] + str(2) sheet.merge_cells(str(cell1 + ':' + cell2)) file.save('stats_new.xlsx') if 'json' in extensions: utilities.save_json(stats_dic, os.path.join(path2save, 'stats.json')) if 'csv' in extensions: tab = Table() tab.save_df(df, os.path.join(path2save, 'stats_new.csv'), sheet_name='stats') if make_with_colors: save_2xlsx_with_colors_Emmanuelle(df, 'stats_new.xlsx', path2save, 'stats_wcolors.xlsx', cols2color_sig=cols2color_sig)
def __init__(self, all_vars, PATHglm, sig_fdr_thresh = 3.0): ''' sig_fdr_thresh at 3.0 corresponds to p = 0.001; for p=0.05 use value 1.3, but it should be used ONLY for visualisation. ''' vars_fs = all_vars.location_vars['local']["FREESURFER"] self.FREESURFER_HOME = vars_fs["FREESURFER_HOME"] self.SUBJECTS_DIR = vars_fs["SUBJECTS_DIR"] self.measurements = vars_fs["GLM_measurements"] self.thresholds = vars_fs["GLM_thresholds"] self.mc_cache_thresh = vars_fs["GLM_MCz_cache"] param = fs_definitions.FSGLMParams(PATHglm) self.PATHglm = PATHglm self.sig_fdr_thresh = sig_fdr_thresh self.PATHglm_glm = param.PATHglm_glm self.PATH_img = param.PATH_img self.PATHglm_results = param.PATHglm_results self.sig_fdr_json = param.sig_fdr_json self.sig_mc_json = param.sig_mc_json self.err_mris_preproc_file = param.err_mris_preproc_file self.mcz_sim_direction = param.mcz_sim_direction self.hemispheres = fs_definitions.hemi self.GLM_sim_fwhm4csd = param.GLM_sim_fwhm4csd self.GLM_MCz_meas_codes = param.GLM_MCz_meas_codes self.cluster_stats = param.cluster_stats self.cluster_stats_2csv = param.cluster_stats_2csv self.sig_contrasts = param.sig_contrasts RUN = True # get files_glm. try: files_glm = load_json(param.files_for_glm) print(f' successfully uploaded file: {param.files_for_glm}') except ImportError as e: print(e) print(f' file {param.files_for_glm} is missing') RUN = False # get file with subjects per group try: subjects_per_group = load_json(param.subjects_per_group) print(f' successfully uploaded file: {param.subjects_per_group}') except Exception as e: print(e) print(f' file {param.subjects_per_group} is missing') RUN = False # checking that all subjects are present print(' subjects are located in: {}'.format(self.SUBJECTS_DIR)) for group in subjects_per_group: for subject in subjects_per_group[group]: if subject not in os.listdir(self.SUBJECTS_DIR): print(f' subject is missing from FreeSurfer Subjects folder: {subject}') RUN = False break for subdir in (self.PATHglm_glm, self.PATHglm_results, self.PATH_img): if not os.path.isdir(subdir): os.makedirs(subdir) if not os.path.isfile(self.sig_contrasts): open(self.sig_contrasts,'w').close() if RUN: self.err_preproc = list() self.sig_fdr_data = dict() self.sig_mc_data = dict() self.run_loop(files_glm) if self.err_preproc: save_json(self.err_preproc, self.err_mris_preproc_file) if self.sig_fdr_data: save_json(self.sig_fdr_data, self.sig_fdr_json) if self.sig_mc_data: save_json(self.sig_mc_data, self.sig_mc_json) if os.path.exists(self.cluster_stats): ClusterFile2CSV(self.cluster_stats, self.cluster_stats_2csv) print('\n\nGLM DONE') else: sys.exit('some ERRORS were found. Cannot perform FreeSurfer GLM')
def setup_default_local_nimb(self): shutil.copy( os.path.join(os.path.dirname(os.path.abspath(__file__)), 'local.json'), os.path.join(self.credentials_home, 'remote1.json')) local_vars = self.load_file('local', default=True) local_vars['USER']['user'] = _get_username() '''setting NIMB paths''' NIMB_PATHS = local_vars['NIMB_PATHS'] NIMB_HOME = os.path.abspath( os.path.join(os.path.dirname(__file__), '..')) print('NIMB_HOME is: ', NIMB_HOME) NIMB_PATHS['NIMB_HOME'] = NIMB_HOME new_NIMB_tmp = get_userdefined_paths( 'NIMB temporary folder nimb_tmp', os.path.join(NIMB_HOME.replace('/nimb/nimb', ''), 'nimb_tmp'), 'nimb_tmp') if not path.exists(new_NIMB_tmp): makedirs(new_NIMB_tmp) NIMB_PATHS['NIMB_tmp'] = new_NIMB_tmp NIMB_PATHS['NIMB_NEW_SUBJECTS'] = os.path.join(new_NIMB_tmp, 'nimb_new_subjects') '''setting FREESURFER paths''' new_freesurfer_path = get_userdefined_paths( 'FreeSurfer folder', os.path.join(NIMB_HOME.replace('/nimb/nimb', ''), 'freesurfer'), 'freesurfer') new_conda_path = new_freesurfer_path.replace("freesurfer", "conda3") NIMB_PATHS['conda_home'] = new_conda_path NIMB_PATHS['miniconda_python_run'] = os.path.join( new_conda_path, 'bin', 'python3.7').replace(os.path.expanduser("~"), "~") local_vars['NIMB_PATHS'] = NIMB_PATHS FS_PATHS = local_vars['FREESURFER'] FS_PATHS['FREESURFER_HOME'] = new_freesurfer_path FS_PATHS['FS_SUBJECTS_DIR'] = os.path.join(new_freesurfer_path, 'subjects') FS_PATHS[ 'export_FreeSurfer_cmd'] = "export FREESURFER_HOME=" + new_freesurfer_path if not os.path.exists(new_freesurfer_path): FreeSurfer_install = get_yes_no( f'do you want to install FreeSurfer at the provided location {new_freesurfer_path}? (y/n)' ) FS_PATHS['FreeSurfer_install'] = FreeSurfer_install if FreeSurfer_install == 1: freesurfer_license = get_FS_license() FS_PATHS['freesurfer_license'] = freesurfer_license else: FS_PATHS['FreeSurfer_install'] = 1 local_vars['FREESURFER'] = FS_PATHS '''setting PROCESSING paths''' environ = get_yes_no( "Will this account use slurm or tmux for processing ? (y/n; y=slurm/ n=tmux)" ) if environ == 1: local_vars['PROCESSING']['processing_env'] = 'slurm' supervisor = input( "For some slurm environments a supervisor account is required. Please type supervisor account name or leave blank:" ) if supervisor: print('supervisor account name is: {}'.format(supervisor)) local_vars['USER']['supervisor_account'] = str(supervisor) local_vars['PROCESSING']['supervisor_account'] = str( supervisor) local_vars['PROCESSING']['text4_scheduler'][1] = local_vars[ 'PROCESSING']['text4_scheduler'][1].replace( 'def-supervisor', supervisor) else: print('supervisor account not provided') local_vars['USER']['supervisor_account'] = '' local_vars['PROCESSING']['supervisor_account'] = '' local_vars['PROCESSING']['text4_scheduler'].remove( local_vars['PROCESSING']['text4_scheduler'][1]) else: print('environment for processing is: {}'.format(environ)) local_vars['PROCESSING']['processing_env'] = 'tmux' save_json(local_vars, os.path.join(self.credentials_home, 'local.json')) self.get_all_locations_vars()
def chk_if_subjects_ready(self): fs_proc_ids = self.get_ids_processed() miss_bids_ids = [ i for i in self.bids_ids if i not in fs_proc_ids.keys() ] if miss_bids_ids: print( f' {len(miss_bids_ids)} IDs are missing from file: {self.f_ids_processed}' ) print(f' first 5 IDs are: {self.f_ids_processed[:5]}') for bids_id in miss_bids_ids: self.add_to_miss(bids_id, 'id_missing') if len(miss_bids_ids) < len(fs_proc_ids.keys()): for bids_id in [ i for i in self.bids_ids if i not in miss_bids_ids ]: fs_proc_id = fs_proc_ids[bids_id].replace( self.archive_type, '') if os.path.exists(os.path.join(self.FS_SUBJECTS_DIR, bids_id)): self.ids_4fs_glm[bids_id] = bids_id self.chk_glm_files(bids_id) elif os.path.exists( os.path.join(self.FS_SUBJECTS_DIR, fs_proc_id)): self.ids_4fs_glm[bids_id] = fs_proc_id self.chk_glm_files(fs_proc_id) else: print(f'id {bids_id} or freesurfer id {fs_proc_id} \ are missing from the {self.FS_SUBJECTS_DIR} folder') self.add_to_miss(bids_id, 'id_missing') if self.miss.keys(): print(" missing files and ids: ", self.miss) save_json(self.miss, self.ids_exclude_glm, print_space=8) subjs_missing = len(self.miss.keys()) subjs_present = len(self.ids_4fs_glm.keys()) print(f' Number of participants ready for FreeSurfer GLM:') print(f' in the folder: {self.FS_SUBJECTS_DIR}') print(f' {subjs_present} present') print(f' {subjs_missing} missing') not_ready = [ i for i in self.miss if "id_missing" not in self.miss[i] ] maybe_archived = [i for i in self.miss if i not in not_ready] if maybe_archived: print(" MAYBE archived: ", maybe_archived) q = " EXCEPTION! Some IDs are missing, but they could be archived.\n\ Do you want to do glm analysis with current subjects (y) or try to check the archive (n) ? (y/n)\n\ (note: if you answer NO, you will be asked to unarchive the \n\ processed folders of IDs if they are present in FREESURFER_PROCESSED)" if get_yes_no(q) == 1: self.create_fs_glm_df() return True, list() else: return False, maybe_archived if not_ready: print( " MISSING FILES: these participant CANNOT be included in the GLM analysis: ", not_ready) q = " EXCEPTION! Some IDs have missing files and they MUST be excluded from analysis.\n\ Do you want to continue without excluded IDs ? (y/n)" if get_yes_no(q) == 1: self.create_fs_glm_df() return True, list() else: return False, not_ready else: self.create_fs_glm_df() return True, list() else: print(' no ids found') return False, list()
def run_anova(self, p_thresh, intercept_thresh, path2save): ls_err = list() for param_y in self.params_y: x = np.array(self.df[param_y]) df_result = self.tab.get_clean_df() df_result_list = df_result.copy() df_result[param_y] = '' df_result_list[param_y] = '' ix = 1 ixx = 1 # print(f' analysing {len(self.ls_cols4anova)} features for parameter: {param_y}') for col in self.ls_cols4anova: y = np.array(self.df[col]) data_tmp = pd.DataFrame({'x': x, col: y}) model = ols(col + " ~ x", data=data_tmp).fit() if model.pvalues.Intercept < p_thresh and model.pvalues.x < intercept_thresh: measurement, structure, ls_err = self.fs_struc_meas.get( col, ls_err) if param_y not in self.sig_cols: self.sig_cols[param_y] = dict() self.sig_cols[param_y][col] = { 'rsquared': model.rsquared, 'rsquared-adjusted': model.rsquared_adj, 'F-statistic': model.fvalue, 'AIC': model.aic, 'BIC': model.bic, 'pvalue_slope': model.pvalues.x, 'pvalue_intercept': model.pvalues.Intercept, 'tvalue_slope': model.tvalues.x, 'tvalue_intercept': model.tvalues.Intercept, 'meas': measurement, 'struct': structure } df_result_list = self.populate_df( df_result_list, ixx, { param_y: structure, 'measure': measurement, 'pvalue': '%.4f' % model.pvalues.x }) if structure not in df_result[param_y].tolist(): df_result = self.populate_df( df_result, ix, { param_y: structure, measurement: '%.4f' % model.pvalues.x }) ix += 1 else: df_result = self.populate_df( df_result, df_result[param_y].tolist().index(structure), {measurement: '%.4f' % model.pvalues.x}) ixx += 1 self.tab.save_df_tocsv( df_result_list, path.join(path2save, f'anova_per_significance_{param_y}.csv')) self.tab.save_df_tocsv( df_result, path.join(path2save, f'anova_per_structure_{param_y}.csv')) save_json(self.sig_cols, path.join(path2save, f'anova_significant_features.json')) if self.print_not_FS: print('NOT freesurfer structures: ', ls_err)
def mkstatisticsf(df_4stats, groups, group_col, path2save, make_with_colors=True): '''Creates discriptive statistical file for publication, based on provided pandas.DataFrame Works only on 2 groups Args: df_4stats: pandas.DataFrame group: list/ tuple of groups as str/int group_col: str() column name in df_4stats that has the group names from group path_2save: abspath to save the descrptive files make_with_colors: will create an additional .xlsx file with colored significant results, provided xlwt is installed Return: json file with results .csv file with results .xlsx file with results with red colored significant ones ''' tab = Table() ls_tests = ('mean', 'std', 'kurtosis', 'skewness', 'TTest', 'Welch', 'ANOVA', 'Bartlett', 'MannWhitneyu', 'Kruskal') groups_df = dict() for group in groups: groups_df[group] = tab.get_df_per_parameter(df_4stats, group_col, group) stats_dic = dict() vals2chk = df_4stats.columns.tolist() if group_col in vals2chk: vals2chk.remove(group_col) cols2color_sig = list() groups = list(groups_df.keys()) group1 = groups_df[groups[0]] group2 = groups_df[groups[1]] for test in ls_tests: for val in vals2chk: results, params = get_stats(test, group1[val], group2[val]) if test in ('mean', 'std', 'kurtosis', 'skewness'): key1 = f'{groups[0]}, {params[0]}' key2 = f'{groups[1]}, {params[0]}' else: key1 = f'{test}, {params[0]}' key2 = f'{test}, {params[1]}' cols2color_sig.append(key2) for key in (key1, key2): if key not in stats_dic: stats_dic[key] = dict() stats_dic[key1][val] = f'{results[0]}' stats_dic[key2][val] = f'{results[1]}' df = tab.create_df_from_dict(stats_dic) tab.save_df(df, os.path.join(path2save, 'stats_general.csv'), sheet_name='stats') utilities.save_json(stats_dic, os.path.join(path2save, 'stats_general.json')) if make_with_colors: save_2xlsx_with_colors(df, path2save=path2save, cols2color_sig=cols2color_sig)