def run(config_file, subject_list_file, p_name = None): # Import packages import time # take date+time stamp for run identification purposes unique_pipeline_id = strftime("%Y%m%d%H%M%S") pipeline_start_stamp = strftime("%Y-%m-%d_%H:%M:%S") try: if not os.path.exists(config_file): raise IOError else: c = Configuration(yaml.load(open(os.path.realpath(config_file), 'r'))) except IOError: print("config file %s doesn't exist" % config_file) raise except Exception: print("Error reading config file - %s" % config_file) raise Exception #do some validation validate(c) # get the pipeline name p_name = c.pipelineName try: sublist = yaml.load(open(os.path.realpath(subject_list_file), 'r')) except: print("Subject list is not in proper YAML format. Please check your file") raise Exception # NOTE: strategies list is only needed in cpac_pipeline prep_workflow for # creating symlinks strategies = sorted(build_strategies(c)) print("strategies ---> ") print(strategies) sub_scan_map ={} print("subject list: ") print(sublist) try: for sub in sublist: if sub['unique_id']: s = sub['subject_id']+"_" + sub["unique_id"] else: s = sub['subject_id'] scan_ids = ['scan_anat'] for id in sub['rest']: scan_ids.append('scan_'+ str(id)) sub_scan_map[s] = scan_ids except: print("\n\n" + "ERROR: Subject list file not in proper format - check if you loaded the correct file?" + "\n" + \ "Error name: cpac_runner_0001" + "\n\n") raise Exception create_group_log_template(sub_scan_map, os.path.join(c.outputDirectory, 'logs')) seeds_created = [] if not (c.seedSpecificationFile is None): try: if os.path.exists(c.seedSpecificationFile): seeds_created = create_seeds_(c.seedOutputLocation, c.seedSpecificationFile, c.FSLDIR) print('seeds created %s -> ' % seeds_created) except: raise IOError('Problem in seedSpecificationFile') if 1 in c.runVoxelTimeseries: if 'roi_voxelwise' in c.useSeedInAnalysis: c.maskSpecificationFile = append_seeds_to_file(c.workingDirectory, seeds_created, c.maskSpecificationFile) if 1 in c.runROITimeseries: if 'roi_average' in c.useSeedInAnalysis: c.roiSpecificationFile = append_seeds_to_file(c.workingDirectory, seeds_created, c.roiSpecificationFile) if 1 in c.runSCA: if 'roi_average' in c.useSeedInAnalysis: c.roiSpecificationFileForSCA = append_seeds_to_file(c.workingDirectory, seeds_created, c.roiSpecificationFileForSCA) if 1 in c.runNetworkCentrality: if 'centrality_outputs_smoothed' in c.useSeedInAnalysis: c.templateSpecificationFile = append_seeds_to_file(c.workingDirectory, seeds_created, c.templateSpecificationFile) pipeline_timing_info = [] pipeline_timing_info.append(unique_pipeline_id) pipeline_timing_info.append(pipeline_start_stamp) pipeline_timing_info.append(len(sublist)) if not c.runOnGrid: # Import packages from CPAC.pipeline.cpac_pipeline import prep_workflow # Init variables procss = [Process(target=prep_workflow, args=(sub, c, strategies, 1, pipeline_timing_info, p_name)) \ for sub in sublist] pid = open(os.path.join(c.outputDirectory, 'pid.txt'), 'w') # Init job queue jobQueue = [] # If we're allocating more processes than are subjects, run them all if len(sublist) <= c.numSubjectsAtOnce: """ Stream all the subjects as sublist is less than or equal to the number of subjects that need to run """ for p in procss: p.start() print(p.pid, file=pid) # Otherwise manage resources to run processes incrementally else: """ Stream the subject workflows for preprocessing. At Any time in the pipeline c.numSubjectsAtOnce will run, unless the number remaining is less than the value of the parameter stated above """ idx = 0 while(idx < len(sublist)): # If the job queue is empty and we haven't started indexing if len(jobQueue) == 0 and idx == 0: # Init subject process index idc = idx # Launch processes (one for each subject) for p in procss[idc: idc + c.numSubjectsAtOnce]: p.start() print(p.pid, file=pid) jobQueue.append(p) idx += 1 # Otherwise, jobs are running - check them else: # Check every job in the queue's status for job in jobQueue: # If the job is not alive if not job.is_alive(): # Find job and delete it from queue print('found dead job ', job) loc = jobQueue.index(job) del jobQueue[loc] # ...and start the next available process (subject) procss[idx].start() # Append this to job queue and increment index jobQueue.append(procss[idx]) idx += 1 # Add sleep so while loop isn't consuming 100% of CPU time.sleep(2) pid.close() else: import subprocess import pickle temp_files_dir = os.path.join(os.getcwd(), 'cluster_temp_files') print(subprocess.getoutput("mkdir -p %s" % temp_files_dir)) strategies_file = os.path.join(temp_files_dir, 'strategies.obj') f = open(strategies_file, 'w') pickle.dump(strategies, f) f.close() if 'sge' in c.resourceManager.lower(): run_sge_jobs(c, config_file, strategies_file, subject_list_file, p_name) elif 'pbs' in c.resourceManager.lower(): run_pbs_jobs(c, config_file, strategies_file, subject_list_file, p_name) elif 'condor' in c.resourceManager.lower(): run_condor_jobs(c, config_file, strategies_file, subject_list_file, p_name)
def prep_group_analysis_workflow(c, resource, subject_infos): p_id, s_ids, scan_ids, s_paths = (list(tup) for tup in zip(*subject_infos)) # print "p_id -%s, s_ids -%s, scan_ids -%s, s_paths -%s" %(p_id, s_ids, scan_ids, s_paths) def get_phenotypic_file(phenotypic_file, m_dict, m_list, mod_path): # print "phenotypic_file, m_dict", phenotypic_file, m_dict import csv reader = csv.reader(open(phenotypic_file, "rU")) columns = {} order = {} count = 0 headers = reader.next() for h in headers: columns[h] = [] order[h] = count count += 1 for r in reader: for h, v in zip(headers, r): if v: columns[h].append(str(v)) if m_dict: for measure in m_list: if measure in headers: # check if 'MeanFD is present' if len(columns[measure]) < 1: for sub in columns["sub_id"]: if m_dict.get(sub): if m_dict.get(sub).get(measure): columns[measure].append(m_dict[sub][measure]) else: raise Exception("Couldn't find %s value for subject %s" % (measure, sub)) else: raise Exception("Couldn't find subject %s in the parameter file" % sub) b = zip(*([k] + columns[k] for k in sorted(columns, key=order.get))) try: os.makedirs(mod_path) except: print "%s already exist" % (mod_path) new_phenotypic_file = os.path.join(mod_path, os.path.basename(phenotypic_file)) a = csv.writer(open(new_phenotypic_file, "w")) for col in b: a.writerow(list(col)) return new_phenotypic_file threshold_val = None measure_dict = None measure_list = ["MeanFD", "MeanFD_Jenkinson", "MeanDVARS"] model_sub_list = [] if re.search("(?<=/_threshold_)\d+.\d+", s_paths[0]): threshold_val = re.search("(?<=/_threshold_)\d+.\d+", s_paths[0]).group(0) elif len(c.scrubbingThreshold) == 1: threshold_val = c.scrubbingThreshold[0] else: print ("Found Multiple threshold value ") print "threhsold_val -->", threshold_val if threshold_val: try: parameter_file = os.path.join( c.outputDirectory, p_id[0], "%s_threshold_%s_all_params.csv" % (scan_ids[0].strip("_"), threshold_val) ) if os.path.exists(parameter_file): import csv measure_dict = {} f = csv.DictReader(open(parameter_file, "r")) for line in f: measure_map = {} for m in measure_list: if line.get(m): measure_map[m] = line[m] measure_dict[line["Subject"]] = measure_map else: print "No file name %s found" % parameter_file except Exception: print "Exception while extracting parameters from movement file - %s" % (parameter_file) for config in c.modelConfigs: import yaml try: conf = Configuration(yaml.load(open(os.path.realpath(config), "r"))) except: raise Exception("Error in reading %s configuration file" % config) subject_list = [ line.rstrip("\r\n") for line in open(conf.subjectListFile, "r") if not (line == "\n") and not line.startswith("#") ] exist_paths = [] for sub in subject_list: for path in s_paths: if sub in path: exist_paths.append(sub) if len(list(set(subject_list) - set(exist_paths))) > 0: print "list of outputs missing for subjects %s for derivative -%s at path- %s" % ( list(set(subject_list) - set(exist_paths)), resource, os.path.dirname(s_paths[0]).replace(s_ids[0], "*"), ) mod_path = os.path.join( os.path.dirname(s_paths[0]).replace(s_ids[0], "group_analysis_results/_grp_model_%s" % (conf.modelName)), "model_files", ) try: os.makedirs(mod_path) except: print "path %s already exists" % mod_path new_sub_file = os.path.join(mod_path, os.path.basename(conf.subjectListFile)) f = open(new_sub_file, "w") for sub in exist_paths: print >> f, sub f.close() conf.update("subjectListFile", new_sub_file) if measure_dict != None: conf.update( "phenotypicFile", get_phenotypic_file(conf.phenotypicFile, measure_dict, measure_list, mod_path) ) print "model config dictionary ->", conf.__dict__ try: from CPAC.utils import create_fsl_model create_fsl_model.run(conf, True) except Exception, e: print "Error in create_fsl_model script" print e model_sub_list.append((conf.outputModelFilesDirectory, conf.subjectListFile)) print "model_sub_list ->", model_sub_list
def run(config_file, subject_list_file, p_name = None): # take date+time stamp for run identification purposes unique_pipeline_id = strftime("%Y%m%d%H%M%S") pipeline_start_stamp = strftime("%Y-%m-%d_%H:%M:%S") try: if not os.path.exists(config_file): raise IOError else: c = Configuration(yaml.load(open(os.path.realpath(config_file), 'r'))) except IOError: print "config file %s doesn't exist" % config_file raise except Exception: print "Error reading config file - %s" % config_file raise Exception #do some validation validate(c) try: sublist = yaml.load(open(os.path.realpath(subject_list_file), 'r')) except: print "Subject list is not in proper YAML format. Please check your file" raise Exception strategies = sorted(build_strategies(c)) print "strategies ---> " print strategies sub_scan_map ={} print "subject list: " print sublist try: for sub in sublist: if sub['unique_id']: s = sub['subject_id']+"_" + sub["unique_id"] else: s = sub['subject_id'] scan_ids = ['scan_anat'] for id in sub['rest']: scan_ids.append('scan_'+ str(id)) sub_scan_map[s] = scan_ids except: print "\n\n" + "ERROR: Subject list file not in proper format - check if you loaded the correct file?" + "\n" + \ "Error name: cpac_runner_0001" + "\n\n" raise Exception create_group_log_template(sub_scan_map, os.path.join(c.outputDirectory, 'logs')) seeds_created = [] if not (c.seedSpecificationFile is None): try: if os.path.exists(c.seedSpecificationFile): seeds_created = create_seeds_(c.seedOutputLocation, c.seedSpecificationFile, c.FSLDIR) print 'seeds created %s -> ' % seeds_created except: raise IOError('Problem in seedSpecificationFile') if 1 in c.runVoxelTimeseries: if 'roi_voxelwise' in c.useSeedInAnalysis: c.maskSpecificationFile = append_seeds_to_file(c.workingDirectory, seeds_created, c.maskSpecificationFile) if 1 in c.runROITimeseries: if 'roi_average' in c.useSeedInAnalysis: c.roiSpecificationFile = append_seeds_to_file(c.workingDirectory, seeds_created, c.roiSpecificationFile) if 1 in c.runNetworkCentrality: if 'centrality_outputs_smoothed' in c.useSeedInAnalysis: c.templateSpecificationFile = append_seeds_to_file(c.workingDirectory, seeds_created, c.templateSpecificationFile) pipeline_timing_info = [] pipeline_timing_info.append(unique_pipeline_id) pipeline_timing_info.append(pipeline_start_stamp) pipeline_timing_info.append(len(sublist)) if not c.runOnGrid: from CPAC.pipeline.cpac_pipeline import prep_workflow procss = [Process(target=prep_workflow, args=(sub, c, strategies, 1, pipeline_timing_info, p_name)) for sub in sublist] pid = open(os.path.join(c.outputDirectory, 'pid.txt'), 'w') jobQueue = [] if len(sublist) <= c.numSubjectsAtOnce: """ Stream all the subjects as sublist is less than or equal to the number of subjects that need to run """ for p in procss: p.start() print >>pid,p.pid else: """ Stream the subject workflows for preprocessing. At Any time in the pipeline c.numSubjectsAtOnce will run, unless the number remaining is less than the value of the parameter stated above """ idx = 0 while(idx < len(sublist)): if len(jobQueue) == 0 and idx == 0: idc = idx for p in procss[idc: idc + c.numSubjectsAtOnce]: p.start() print >>pid,p.pid jobQueue.append(p) idx += 1 else: for job in jobQueue: if not job.is_alive(): print 'found dead job ', job loc = jobQueue.index(job) del jobQueue[loc] procss[idx].start() jobQueue.append(procss[idx]) idx += 1 pid.close() else: import commands import pickle temp_files_dir = os.path.join(os.getcwd(), 'cluster_temp_files') print commands.getoutput("mkdir -p %s" % temp_files_dir) strategies_file = os.path.join(temp_files_dir, 'strategies.obj') f = open(strategies_file, 'w') pickle.dump(strategies, f) f.close() if 'sge' in c.resourceManager.lower(): run_sge_jobs(c, config_file, strategies_file, subject_list_file, p_name) elif 'pbs' in c.resourceManager.lower(): run_pbs_jobs(c, config_file, strategies_file, subject_list_file, p_name) elif 'condor' in c.resourceManager.lower(): run_condor_jobs(c, config_file, strategies_file, subject_list_file, p_name)
def prep_group_analysis_workflow(c, resource, subject_infos): p_id, s_ids, scan_ids, s_paths = (list(tup) for tup in zip(*subject_infos)) #print "p_id -%s, s_ids -%s, scan_ids -%s, s_paths -%s" %(p_id, s_ids, scan_ids, s_paths) def get_phenotypic_file(phenotypic_file, m_dict, m_list, mod_path, sub_id): #print "phenotypic_file, m_dict", phenotypic_file, m_dict import csv reader = csv.reader(open(phenotypic_file, 'rU')) columns = {} order = {} count = 0 headers = reader.next() for h in headers: columns[h] =[] order[h] = count count+=1 for r in reader: for h, v in zip(headers, r): if v: columns[h].append(str(v)) if m_dict: for measure in m_list: if measure in headers: #check if 'MeanFD is present' if len(columns[measure]) < 1: for sub in columns[sub_id]: if m_dict.get(sub): if m_dict.get(sub).get(measure): columns[measure].append(m_dict[sub][measure]) else: raise Exception("Couldn't find %s value for subject %s"%(measure,sub)) else: raise Exception("Couldn't find subject %s in the parameter file"%sub) b = zip(*([k] + columns[k] for k in sorted(columns, key=order.get))) try: os.makedirs(mod_path) except: print "%s already exist"%(mod_path) new_phenotypic_file = os.path.join(mod_path, os.path.basename(phenotypic_file)) a = csv.writer(open(new_phenotypic_file, 'w')) for col in b: a.writerow(list(col)) return new_phenotypic_file threshold_val = None measure_dict = None measure_list = ['MeanFD', 'MeanFD_Jenkinson', 'MeanDVARS'] model_sub_list = [] if c.runScrubbing == 1: #get scrubbing threshold if re.search('(?<=/_threshold_)\d+.\d+',s_paths[0]): threshold_val = re.search('(?<=/_threshold_)\d+.\d+',s_paths[0]).group(0) elif len(c.scrubbingThreshold) == 1: threshold_val = c.scrubbingThreshold[0] else: print ("Found Multiple threshold value ") print "scrubbing threshold_val -->", threshold_val else: print "No scrubbing enabled." print "\n" #pick the right parameter file from the pipeline folder #create a dictionary of subject and measures in measure_list if c.runScrubbing == 1: try: parameter_file = os.path.join(c.outputDirectory, p_id[0], '%s_threshold_%s_all_params.csv'%(scan_ids[0].strip('_'),threshold_val)) if os.path.exists(parameter_file): import csv measure_dict = {} f = csv.DictReader(open(parameter_file,'r')) for line in f: measure_map = {} for m in measure_list: if line.get(m): measure_map[m] = line[m] measure_dict[line['Subject']] = measure_map else: print "No file name %s found"%parameter_file except Exception: print "Exception while extracting parameters from movement file - %s"%(parameter_file) for config in c.modelConfigs: import yaml try: conf = Configuration(yaml.load(open(os.path.realpath(config), 'r'))) except: raise Exception("Error in reading %s configuration file" % config) subject_list = [line.rstrip('\r\n') for line in open(conf.subjectListFile, 'r') \ if not (line == '\n') and not line.startswith('#')] # list of subject paths which DO exist exist_paths = [] # check for missing subject for the derivative for sub in subject_list : for path in s_paths: if sub in path: exist_paths.append(sub) # check to see if any derivatives of subjects are missing if len(list(set(subject_list) - set(exist_paths))) >0: print "-------------------------------------------" print "List of outputs missing for subjects:" print list(set(subject_list) - set(exist_paths)) print "\n" print "..for derivatives:" print resource print "\n" print "..at paths:" print os.path.dirname(s_paths[0]).replace(s_ids[0], '*') print "-------------------------------------------" print '\n' #import warnings #warnings.warn(msg) mod_path = os.path.join(os.path.dirname(s_paths[0]).replace(s_ids[0], 'group_analysis_results/_grp_model_%s'%(conf.modelName)), 'model_files') print "basename: ", os.path.basename(conf.subjectListFile) try: os.makedirs(mod_path) print "Creating directory:" print mod_path print "\n" except: print "Attempted to create directory, but path already exists:" print mod_path print '\n' new_sub_file = os.path.join(mod_path, os.path.basename(conf.subjectListFile)) try: f = open(new_sub_file, 'w') for sub in exist_paths: print >>f, sub f.close() except: print "Error: Could not open subject list file: ", new_sub_file print "" raise Exception conf.update('subjectListFile',new_sub_file) sub_id = conf.subjectColumn if measure_dict != None: conf.update('phenotypicFile',get_phenotypic_file(conf.phenotypicFile, measure_dict, measure_list, mod_path, sub_id)) print "Model config dictionary ->" print conf.__dict__ print '\n' # Run 'create_fsl_model' script to extract phenotypic data from # the phenotypic file for each of the subjects in the subject list try: from CPAC.utils import create_fsl_model create_fsl_model.run(conf, True) except Exception, e: print "Error in creating models in the create_fsl_model script" #print "Error ->", e raise model_sub_list.append((conf.outputModelFilesDirectory, conf.subjectListFile)) print "model_sub_list ->", model_sub_list
def prep_group_analysis_workflow(c, resource, subject_infos): # # this function runs once per output file during group analysis # # p_id = a list of pipeline IDs, i.e. the name of the output folder for # the strat # s_ids = a list of all the subject IDs # scan_ids = a list of scan IDs # s_paths = a list of all of the filepaths of this particular output # file that prep_group_analysis_workflow is being called for p_id, s_ids, scan_ids, s_paths = (list(tup) for tup in zip(*subject_infos)) def get_phenotypic_file(phenotypic_file, m_dict, m_list, mod_path, sub_id): import csv reader = csv.reader(open(phenotypic_file, 'rU')) columns = {} order = {} count = 0 headers = reader.next() for h in headers: columns[h] =[] order[h] = count count+=1 for r in reader: for h, v in zip(headers, r): if v: columns[h].append(str(v)) if m_dict: for measure in m_list: print '\n\nMeasure: ', measure, '\n\n' if measure in headers: #check if 'MeanFD is present' if len(columns[measure]) < 1: print '\n\ncolumns[sub_id]: ', columns[sub_id], '\n\n' for sub in columns[sub_id]: if m_dict.get(sub): if m_dict.get(sub).get(measure): columns[measure].append(m_dict[sub][measure]) else: raise Exception("Couldn't find %s value for subject %s"%(measure,sub)) else: raise Exception("Couldn't find subject %s in the parameter file"%sub) print '\n\ncolumns[measure]: ', columns, '\n\n' b = zip(*([k] + columns[k] for k in sorted(columns, key=order.get))) try: os.makedirs(mod_path) except: print "%s already exists"%(mod_path) new_phenotypic_file = os.path.join(mod_path, os.path.basename(phenotypic_file)) a = csv.writer(open(new_phenotypic_file, 'w')) for col in b: a.writerow(list(col)) return new_phenotypic_file # END get_phenotypic_file function threshold_val = None measure_dict = None measure_list = ['MeanFD', 'MeanFD_Jenkinson', 'MeanDVARS'] model_sub_list = [] if 1 in c.runScrubbing: #get scrubbing threshold if re.search('(?<=/_threshold_)\d+.\d+',s_paths[0]): threshold_val = re.search('(?<=/_threshold_)\d+.\d+',s_paths[0]).group(0) elif len(c.scrubbingThreshold) == 1: threshold_val = c.scrubbingThreshold[0] else: print "Found Multiple threshold value " print "scrubbing threshold_val -->", threshold_val else: print "No scrubbing enabled." if len(c.scrubbingThreshold) == 1: threshold_val = c.scrubbingThreshold[0] import yaml for config in c.modelConfigs: print c.modelConfigs print config try: conf = Configuration(yaml.load(open(os.path.realpath(config), 'r'))) except: raise Exception("Error in reading %s configuration file" % config) group_sublist = open(conf.subject_list, 'r') sublist_items = group_sublist.readlines() subject_list = [line.rstrip('\n') for line in sublist_items \ if not (line == '\n') and not line.startswith('#')] # list of subject paths which DO exist exist_paths = [] print 'subject_list: ', subject_list, '\n\n' print 's_paths: ', s_paths, '\n\n' ''' begin iteration through group subject list for processing ''' for sub in subject_list: # let's check to make sure the subject list is formatted for # repeated measures properly if repeated measures is enabled and # vice versa if (c.repeatedMeasures == True) and (',' not in sub): print '\n\n' print 'Whoops! The group analysis subject list is not in ' \ 'the appropriate format for repeated measures.\n' print 'Please use the appropriate format as described in ' \ 'the CPAC User Guide or turn off Repeated Measures ' \ 'in the CPAC pipeline configuration editor, found ' \ 'in the \'Group Analysis Settings\' tab of the ' \ 'pipeline configuration editor.\n' print 'NOTE: CPAC generates a properly-formatted group ' \ 'analysis subject list meant for running repeated ' \ 'measures when you create your original subject ' \ 'list. Look for \'subject_list_group_analysis_' \ 'repeated_measures.txt\' in the directory where ' \ 'you created your subject list.\n\n' raise Exception elif (c.repeatedMeasures == False) and (',' in sub): print '\n\n' print '[!] CPAC says: It looks like your group analysis ' \ 'subject list is formatted for running repeated ' \ 'measures, but \'Run Repeated Measures\' is not ' \ 'enabled in the pipeline configuration, found in ' \ 'the \'Group Analysis Settings\' tab of the ' \ 'pipeline configuration editor.\n' print 'Double-check your pipeline configuration?\n\n' raise Exception ''' process subject ids for repeated measures, if it is on ''' # if repeated measures is being run and the subject list # is a list of subject IDs and scan IDs concatenated if (c.repeatedMeasures == True): # sub.count(',') equals 1 when there is either multiple scans # or multiple sessions but not both, for repeated measures # sub.count(',') equals 2 when there are multiple sessions # AND scans, for repeated measures if sub.count(',') == 1: sub_id = sub.split(',',1)[0] other_id = sub.split(',',1)[1] elif sub.count(',') == 2: sub_id = sub.split(',',2)[0] scan_id = sub.split(',',2)[1] session_id = sub.split(',',2)[2] ''' drop subjects from the group subject list ''' # check the path files in path_files_here folder in the subject's # output folder - and drop any subjects from the group analysis # subject list which do not exist in the paths to the output files for path in s_paths: if (c.repeatedMeasures == True): if sub.count(',') == 1: if (sub_id in path) and (other_id in path): exist_paths.append(sub) elif sub.count(',') == 2: if (sub_id in path) and (scan_id in path) and \ (session_id in path): exist_paths.append(sub) else: if sub in path: exist_paths.append(sub) # check to see if any derivatives of subjects are missing if len(list(set(subject_list) - set(exist_paths))) >0: print "List of outputs missing for subjects:" print list(set(subject_list) - set(exist_paths)) print "..for derivatives:" print resource print "..at paths:" print os.path.dirname(s_paths[0]).replace(s_ids[0], '*') mod_path = os.path.join(os.path.dirname(s_paths[0]).replace(s_ids[0], 'group_analysis_results/_grp_model_%s'%(conf.model_name)), 'model_files') print "basename: ", os.path.basename(conf.subject_list) ''' try: os.makedirs(mod_path) print "Creating directory:" print mod_path except: print "Attempted to create directory, but path already exists:" print mod_path ''' if not os.path.isdir(mod_path): os.makedirs(mod_path) ''' write the new subject list ''' new_sub_file = os.path.join(mod_path, os.path.basename(conf.subject_list)) try: f = open(new_sub_file, 'w') for sub in exist_paths: print >>f, sub f.close() except: print "Error: Could not open subject list file: ", new_sub_file raise Exception conf.update('subjectListFile',new_sub_file) sub_id = conf.subject_id_label if measure_dict != None: conf.update('phenotypicFile',get_phenotypic_file(conf.pheno_file, measure_dict, measure_list, mod_path, sub_id)) print 'conf updated pheno: ', conf.pheno_file, '\n\n' print "Model config dictionary ->" print conf.__dict__ # Run 'create_fsl_model' script to extract phenotypic data from # the phenotypic file for each of the subjects in the subject list ''' get the motion statistics parameter file, if present ''' # get the parameter file so it can be passed to create_fsl_model.py # so MeanFD or other measures can be included in the design matrix parameter_file = os.path.join(c.outputDirectory, p_id[0], '%s_threshold_%s_all_params.csv'%(scan_ids[0].strip('_'),threshold_val)) if 1 in c.runGenerateMotionStatistics: if not os.path.exists(parameter_file): print '\n\n[!] CPAC says: Could not open the parameter file. ' \ 'If Generate Motion Statistics is enabled, this can ' \ 'usually be found in the output directory of your ' \ 'individual-level analysis runs.\n' print 'Path not found: ', parameter_file, '\n\n' raise Exception elif (1 not in c.runGenerateMotionStatistics) and (os.path.exists(parameter_file)): if not os.path.exists(parameter_file): print '\n\n[!] CPAC says: Could not open the parameter file. ' \ 'If Generate Motion Statistics is enabled, this can ' \ 'usually be found in the output directory of your ' \ 'individual-level analysis runs.\n' print 'Path not found: ', parameter_file, '\n\n' raise Exception else: def no_measures_error(measure): print '\n\n[!] CPAC says: The measure %s was included in ' \ 'your group analysis design matrix formula, but ' \ 'Generate Motion Statistics was not run during ' \ 'individual-level analysis.\n' % measure print 'Please run Generate Motion Statistics if you wish ' \ 'to include this measure in your model.\n' print 'If you HAVE completed a run with this option ' \ 'enabled, then you are seeing this error because ' \ 'the motion parameter file normally created by this ' \ 'option is missing.\n\n' raise Exception for measure in measure_list: if (measure in conf.design_formula): no_measures_error(measure) parameter_file = None ''' run create_fsl_model.py to generate the group analysis models ''' # path to the pipeline folder to be passed to create_fsl_model.py # so that certain files like output_means.csv can be accessed pipeline_path = os.path.join(c.outputDirectory, p_id[0]) # the current output that cpac_group_analysis_pipeline.py and # create_fsl_model.py is currently being run for current_output = s_paths[0].replace(pipeline_path, '').split('/')[2] try: from CPAC.utils import create_fsl_model create_fsl_model.run(conf, c.fTest, parameter_file, pipeline_path, current_output, True) #print >>diag, "> Runs create_fsl_model." #print >>diag, "" except Exception, e: print "FSL Group Analysis model not successfully created - error in create_fsl_model script" #print "Error ->", e raise model_sub_list.append((conf.output_dir, conf.subject_list))
def prep_group_analysis_workflow(c, group_config_file, resource, subject_infos, threshold_val): # # this function runs once per output file during group analysis # import yaml import commands # p_id = a list of pipeline IDs, i.e. the name of the output folder for # the strat # s_ids = a list of all the subject IDs # scan_ids = a list of scan IDs # s_paths = a list of all of the filepaths of this particular output # file that prep_group_analysis_workflow is being called for p_id, s_ids, scan_ids, s_paths = (list(tup) for tup in zip(*subject_infos)) try: group_conf = Configuration(yaml.load(open(os.path.realpath(group_config_file), 'r'))) except Exception as e: err_string = "\n\n[!] CPAC says: Could not read group model " \ "configuration YML file. Ensure you have read access " \ "for the file and that it is formatted properly.\n\n" \ "Configuration file: %s\n\nError details: %s" \ % (group_config_file, e) raise Exception(err_string) group_sublist_file = open(group_conf.subject_list, 'r') group_sublist_items = group_sublist_file.readlines() group_sublist = [line.rstrip('\n') for line in group_sublist_items \ if not (line == '\n') and not line.startswith('#')] # list of subjects for which paths which DO exist exist_paths = [] # paths to the actual derivatives for those subjects derivative_paths = [] z_threshold = float(group_conf.z_threshold[0]) p_threshold = float(group_conf.p_threshold[0]) custom_confile = group_conf.custom_contrasts if ((custom_confile == None) or (custom_confile == '') or \ ("None" in custom_confile)): if (len(group_conf.f_tests) == 0) or (group_conf.f_tests == None): fTest = False else: fTest = True else: if not os.path.exists(custom_confile): errmsg = "\n[!] CPAC says: You've specified a custom contrasts " \ ".CSV file for your group model, but this file cannot " \ "be found. Please double-check the filepath you have " \ "entered.\n\nFilepath: %s\n\n" % custom_confile raise Exception(errmsg) evs = open(custom_confile, 'r').readline() evs = evs.rstrip('\r\n').split(',') count_ftests = 0 fTest = False for ev in evs: if "f_test" in ev: count_ftests += 1 if count_ftests > 0: fTest = True ''' begin iteration through group subject list for processing ''' print "Sorting through subject list to check for missing outputs " \ "for %s..\n" % resource for ga_sub in group_sublist: # Strip out carriage-return character if it is there if ga_sub.endswith('\r'): ga_sub = ga_sub.rstrip('\r') # ga_sub = subject ID taken off the group analysis subject list # let's check to make sure the subject list is formatted for # repeated measures properly if repeated measures is enabled # and vice versa if (group_conf.repeated_measures == True) and (',' not in ga_sub): print '\n\n' print '[!] CPAC says: The group analysis subject list ' \ 'is not in the appropriate format for repeated ' \ 'measures.\n' print 'Please use the appropriate format as described in ' \ 'the CPAC User Guide or turn off Repeated Measures ' \ 'in the CPAC pipeline configuration editor, found ' \ 'in the \'Group Analysis Settings\' tab of the ' \ 'pipeline configuration editor.\n' print 'NOTE: CPAC generates a properly-formatted group ' \ 'analysis subject list meant for running repeated ' \ 'measures when you create your original subject ' \ 'list. Look for \'subject_list_group_analysis_' \ 'repeated_measures.txt\' in the directory where ' \ 'you created your subject list.\n\n' raise Exception elif (group_conf.repeated_measures == False) and (',' in ga_sub): print '\n\n' print '[!] CPAC says: It looks like your group analysis ' \ 'subject list is formatted for running repeated ' \ 'measures, but \'Run Repeated Measures\' is not ' \ 'enabled in the pipeline configuration, found in ' \ 'the \'Group Analysis Settings\' tab of the ' \ 'pipeline configuration editor.\n' print 'Double-check your pipeline configuration?\n\n' raise Exception ''' process subject ids for repeated measures, if it is on ''' # if repeated measures is being run and the subject list # is a list of subject IDs and scan IDs concatenated if (group_conf.repeated_measures == True): # sub.count(',') equals 1 when there is either multiple scans # or multiple sessions but not both, for repeated measures # sub.count(',') equals 2 when there are multiple sessions # AND scans, for repeated measures if ga_sub.count(',') == 1: sub_id = ga_sub.split(',',1)[0] other_id = ga_sub.split(',',1)[1] elif ga_sub.count(',') == 2: sub_id = ga_sub.split(',',2)[0] scan_id = ga_sub.split(',',2)[1] session_id = ga_sub.split(',',2)[2] ''' drop subjects from the group subject list ''' # check the path files in path_files_here folder in the # subject's output folder - and drop any subjects from the # group analysis subject list which do not exist in the paths # to the output files ''' REVISIT THIS LATER to establish a potentially better way to pull output paths (instead of path_files_here) ''' for path in s_paths: if (group_conf.repeated_measures == True): if ga_sub.count(',') == 1: if (sub_id in path) and (other_id in path): exist_paths.append(ga_sub) derivative_paths.append(path) elif ga_sub.count(',') == 2: if (sub_id in path) and (scan_id in path) and \ (session_id in path): exist_paths.append(ga_sub) derivative_paths.append(path) else: if ga_sub in path: exist_paths.append(ga_sub) derivative_paths.append(path) # END subject-dropping! if len(derivative_paths) == 0: print '\n\n\n[!] CPAC says: None of the subjects listed in the ' \ 'group analysis subject list were found to have outputs ' \ 'produced by individual-level analysis.\n\nEnsure that ' \ 'the subjects listed in your group analysis subject list ' \ 'are the same as the ones included in the individual-' \ 'level analysis you are running group-level analysis for.' \ '\n\n\n' raise Exception ''' END subject list iteration ''' # check to see if any derivatives of subjects are missing if len(list(set(group_sublist) - set(exist_paths))) >0: print "List of outputs missing for subjects:" print list(set(group_sublist) - set(exist_paths)) print "..for derivatives:" print resource print "..at paths:" print os.path.dirname(s_paths[0]).replace(s_ids[0], '*') # create the path string for the group analysis output out_dir = os.path.dirname(s_paths[0]).split(p_id[0] + '/') out_dir = os.path.join(group_conf.output_dir, out_dir[1]) out_dir = out_dir.replace(s_ids[0], 'group_analysis_results_%s/_grp_model_%s'%(p_id[0],group_conf.model_name)) model_out_dir = os.path.join(group_conf.output_dir, 'group_analysis_results_%s/_grp_model_%s'%(p_id[0],group_conf.model_name)) mod_path = os.path.join(out_dir, 'model_files') if not os.path.isdir(mod_path): os.makedirs(mod_path) ''' write the new subject list ''' new_sub_file = os.path.join(mod_path, os.path.basename(group_conf.subject_list)) try: f = open(new_sub_file, 'w') for sub in exist_paths: print >>f, sub f.close() except: print "Error: Could not open subject list file: ", new_sub_file raise Exception group_conf.update('subject_list',new_sub_file) sub_id_label = group_conf.subject_id_label # Run 'create_fsl_model' script to extract phenotypic data from # the phenotypic file for each of the subjects in the subject list ''' get the motion statistics parameter file, if present ''' # get the parameter file so it can be passed to create_fsl_model.py # so MeanFD or other measures can be included in the design matrix measure_list = ['MeanFD', 'MeanFD_Jenkinson', 'MeanDVARS'] for measure in measure_list: if (measure in group_conf.design_formula): parameter_file = os.path.join(c.outputDirectory, p_id[0], '%s%s_all_params.csv'%(scan_ids[0].strip('_'),threshold_val)) if 1 in c.runGenerateMotionStatistics: if not os.path.exists(parameter_file): print '\n\n[!] CPAC says: Could not find or open the motion ' \ 'parameter file. This is necessary if you have included ' \ 'any of the MeanFD measures in your group model.\n\n' \ 'If Generate Motion Statistics is enabled, this file can ' \ 'usually be found in the output directory of your ' \ 'individual-level analysis runs. If it is not there, ' \ 'double-check to see if individual-level analysis had ' \ 'completed successfully.\n' print 'Path not found: ', parameter_file, '\n\n' raise Exception else: def no_measures_error(measure): print '\n\n[!] CPAC says: The measure %s was included in ' \ 'your group analysis design matrix formula, but ' \ 'Generate Motion Statistics was not run during ' \ 'individual-level analysis.\n' % measure print 'Please run Generate Motion Statistics if you wish ' \ 'to include this measure in your model.\n' print 'If you HAVE completed a run with this option ' \ 'enabled, then you are seeing this error because ' \ 'the motion parameter file normally created by this ' \ 'option is missing.\n\n' raise Exception for measure in measure_list: if (measure in group_conf.design_formula): no_measures_error(measure) parameter_file = None break else: parameter_file = None # path to the pipeline folder to be passed to create_fsl_model.py # so that certain files like output_means.csv can be accessed pipeline_path = os.path.join(c.outputDirectory, p_id[0]) # the current output that cpac_group_analysis_pipeline.py and # create_fsl_model.py is currently being run for current_output = resource #s_paths[0].replace(pipeline_path, '').split('/')[2] # generate working directory for this output's group analysis run workDir = '%s/group_analysis/%s/%s_%s' % (c.workingDirectory, group_conf.model_name, resource, scan_ids[0]) # s_paths is a list of paths to each subject's derivative (of the current # derivative gpa is being run on) - s_paths_dirList is a list of each directory # in this path separated into list elements # this makes strgy_path basically the directory path of the folders after # the scan ID folder level strgy_path = os.path.dirname(s_paths[0]).split(scan_ids[0])[1] # get rid of periods in the path for ch in ['.']: if ch in strgy_path: strgy_path = strgy_path.replace(ch, "") # create nipype-workflow-name-friendly strgy_path # (remove special characters) strgy_path_name = strgy_path.replace('/', "_") workDir = workDir + '/' + strgy_path_name ''' merge the remaining subjects for this current output ''' # then, take the group mask, and iterate over the list of subjects # remaining to extract the mean of each subject using the group # mask merge_input = " " merge_output_dir = workDir + "/merged_files" if not os.path.exists(merge_output_dir): os.makedirs(merge_output_dir) merge_output = merge_output_dir + "/" + current_output + "_merged.nii.gz" merge_mask_output = merge_output_dir + "/" + current_output + "_merged_mask.nii.gz" # create a string per derivative filled with every subject's path to the # derivative output file for derivative_path in derivative_paths: merge_input = merge_input + " " + derivative_path merge_string = "fslmerge -t %s %s" % (merge_output, merge_input) # MERGE the remaining outputs try: commands.getoutput(merge_string) except Exception as e: print "[!] CPAC says: FSL Merge failed for output: %s" % current_output print "Error details: %s\n\n" % e raise merge_mask_string = "fslmaths %s -abs -Tmin -bin %s" % (merge_output, merge_mask_output) # CREATE A MASK of the merged file try: commands.getoutput(merge_mask_string) except Exception as e: print "[!] CPAC says: FSL Mask failed for output: %s" % current_output print "Error details: %s\n\n" % e raise derivative_means_dict = {} roi_means_dict = {} # CALCULATE THE MEANS of each remaining output using the group mask for derivative_path in derivative_paths: try: if "Group Mask" in group_conf.mean_mask: maskave_output = commands.getoutput("3dmaskave -mask %s %s" % (merge_mask_output, derivative_path)) elif "Individual Mask" in group_conf.mean_mask: maskave_output = commands.getoutput("3dmaskave -mask %s %s" % (derivative_path, derivative_path)) except Exception as e: print "[!] CPAC says: AFNI 3dmaskave failed for output: %s\n" \ "(Measure Mean calculation)" % current_output print "Error details: %s\n\n" % e raise # get the subject ID of the current derivative path reliably derivative_path_subID = derivative_path.replace(pipeline_path,"").strip("/").split("/")[0] # this crazy-looking command simply extracts the mean from the # verbose AFNI 3dmaskave output string derivative_means_dict[derivative_path_subID] = maskave_output.split("\n")[-1].split(" ")[0] # derivative_means_dict is now something like this: # { 'sub001': 0.3124, 'sub002': 0.2981, .. } # if custom ROI means are included in the model, do the same for those if "Custom_ROI_Mean" in group_conf.design_formula: try: if "centrality" in derivative_path: # resample custom roi mask to 3mm, then use that resampled_roi_mask = merge_output_dir + "/" + current_output + "_resampled_roi_mask.nii.gz" commands.getoutput("flirt -in %s -ref %s -o %s -applyxfm -init %s -interp nearestneighbour" % (group_conf.custom_roi_mask, derivative_path, resampled_roi_mask, c.identityMatrix)) ROIstats_output = commands.getoutput("3dROIstats -mask %s %s" % (resampled_roi_mask, derivative_path)) else: ROIstats_output = commands.getoutput("3dROIstats -mask %s %s" % (group_conf.custom_roi_mask, derivative_path)) except Exception as e: print "[!] CPAC says: AFNI 3dROIstats failed for output: %s" \ "\n(Custom ROI Mean calculation)" % current_output print "Error details: %s\n\n" % e raise ROIstats_list = ROIstats_output.split("\t") # calculate the number of ROIs - 3dROIstats output can be split # into a list, and the actual ROI means begin at a certain point num_rois = (len(ROIstats_list)-3)/2 roi_means = [] # create a list of the ROI means - each derivative of each subject # will have N number of ROIs depending on how many ROIs were # specified in the custom ROI mask for num in range(num_rois+3,len(ROIstats_list)): roi_means.append(ROIstats_list[num]) roi_means_dict[derivative_path_subID] = roi_means else: roi_means_dict = None if len(derivative_means_dict.keys()) == 0: err_string = "[!] CPAC says: Something went wrong with the " \ "calculation of the output means via the group mask.\n\n" raise Exception(err_string) ''' run create_fsl_model.py to generate the group analysis models ''' from CPAC.utils import create_fsl_model create_fsl_model.run(group_conf, fTest, parameter_file, derivative_means_dict, pipeline_path, current_output, model_out_dir, roi_means_dict, True) ''' begin GA workflow setup ''' if not os.path.exists(new_sub_file): raise Exception("path to input subject list %s is invalid" % new_sub_file) #if c.mixedScanAnalysis == True: # wf = pe.Workflow(name = 'group_analysis/%s/grp_model_%s'%(resource, os.path.basename(model))) #else: wf = pe.Workflow(name = resource) wf.base_dir = workDir wf.config['execution'] = {'hash_method': 'timestamp', 'crashdump_dir': os.path.abspath(c.crashLogDirectory)} log_dir = os.path.join(group_conf.output_dir, 'logs', 'group_analysis', resource, 'model_%s' % (group_conf.model_name)) if not os.path.exists(log_dir): os.makedirs(log_dir) else: pass # gp_flow # Extracts the model files (.con, .grp, .mat, .fts) from the model # directory and sends them to the create_group_analysis workflow gpa_wf gp_flow = create_grp_analysis_dataflow("gp_dataflow_%s" % resource) gp_flow.inputs.inputspec.grp_model = os.path.join(model_out_dir, "model_files", current_output) gp_flow.inputs.inputspec.model_name = group_conf.model_name gp_flow.inputs.inputspec.ftest = fTest # gpa_wf # Creates the actual group analysis workflow gpa_wf = create_group_analysis(fTest, "gp_analysis_%s" % resource) gpa_wf.inputs.inputspec.merged_file = merge_output gpa_wf.inputs.inputspec.merge_mask = merge_mask_output gpa_wf.inputs.inputspec.z_threshold = z_threshold gpa_wf.inputs.inputspec.p_threshold = p_threshold gpa_wf.inputs.inputspec.parameters = (c.FSLDIR, 'MNI152') wf.connect(gp_flow, 'outputspec.mat', gpa_wf, 'inputspec.mat_file') wf.connect(gp_flow, 'outputspec.con', gpa_wf, 'inputspec.con_file') wf.connect(gp_flow, 'outputspec.grp', gpa_wf, 'inputspec.grp_file') if fTest: wf.connect(gp_flow, 'outputspec.fts', gpa_wf, 'inputspec.fts_file') # ds # Creates the datasink node for group analysis ds = pe.Node(nio.DataSink(), name='gpa_sink') if 'sca_roi' in resource: out_dir = os.path.join(out_dir, \ re.search('sca_roi_(\d)+',os.path.splitext(os.path.splitext(os.path.basename(s_paths[0]))[0])[0]).group(0)) if 'dr_tempreg_maps_zstat_files_to_standard_smooth' in resource: out_dir = os.path.join(out_dir, \ re.search('temp_reg_map_z_(\d)+',os.path.splitext(os.path.splitext(os.path.basename(s_paths[0]))[0])[0]).group(0)) if 'centrality' in resource: names = ['degree_centrality_binarize', 'degree_centrality_weighted', \ 'eigenvector_centrality_binarize', 'eigenvector_centrality_weighted', \ 'lfcd_binarize', 'lfcd_weighted'] for name in names: if name in os.path.basename(s_paths[0]): out_dir = os.path.join(out_dir, name) break if 'tempreg_maps' in resource: out_dir = os.path.join(out_dir, \ re.search('\w*[#]*\d+', os.path.splitext(os.path.splitext(os.path.basename(s_paths[0]))[0])[0]).group(0)) # if c.mixedScanAnalysis == True: # out_dir = re.sub(r'(\w)*scan_(\w)*(\d)*(\w)*[/]', '', out_dir) ds.inputs.base_directory = out_dir ds.inputs.container = '' ds.inputs.regexp_substitutions = [(r'(?<=rendered)(.)*[/]','/'), (r'(?<=model_files)(.)*[/]','/'), (r'(?<=merged)(.)*[/]','/'), (r'(?<=stats/clusterMap)(.)*[/]','/'), (r'(?<=stats/unthreshold)(.)*[/]','/'), (r'(?<=stats/threshold)(.)*[/]','/'), (r'_cluster(.)*[/]',''), (r'_slicer(.)*[/]',''), (r'_overlay(.)*[/]','')] ''' if 1 in c.runSymbolicLinks: link_node = pe.MapNode(interface=util.Function( input_names=['in_file', 'resource'], output_names=[], function=prepare_gp_links), name='link_gp_', iterfield=['in_file']) link_node.inputs.resource = resource wf.connect(ds, 'out_file', link_node, 'in_file') ''' ########datasink connections######### if fTest: wf.connect(gp_flow, 'outputspec.fts', ds, 'model_files.@0') wf.connect(gp_flow, 'outputspec.mat', ds, 'model_files.@1' ) wf.connect(gp_flow, 'outputspec.con', ds, 'model_files.@2') wf.connect(gp_flow, 'outputspec.grp', ds, 'model_files.@3') wf.connect(gpa_wf, 'outputspec.merged', ds, 'merged') wf.connect(gpa_wf, 'outputspec.zstats', ds, 'stats.unthreshold') wf.connect(gpa_wf, 'outputspec.zfstats', ds,'stats.unthreshold.@01') wf.connect(gpa_wf, 'outputspec.fstats', ds,'stats.unthreshold.@02') wf.connect(gpa_wf, 'outputspec.cluster_threshold_zf', ds, 'stats.threshold') wf.connect(gpa_wf, 'outputspec.cluster_index_zf', ds,'stats.clusterMap') wf.connect(gpa_wf, 'outputspec.cluster_localmax_txt_zf', ds, 'stats.clusterMap.@01') wf.connect(gpa_wf, 'outputspec.overlay_threshold_zf', ds, 'rendered') wf.connect(gpa_wf, 'outputspec.rendered_image_zf', ds, 'rendered.@01') wf.connect(gpa_wf, 'outputspec.cluster_threshold', ds, 'stats.threshold.@01') wf.connect(gpa_wf, 'outputspec.cluster_index', ds, 'stats.clusterMap.@02') wf.connect(gpa_wf, 'outputspec.cluster_localmax_txt', ds, 'stats.clusterMap.@03') wf.connect(gpa_wf, 'outputspec.overlay_threshold', ds, 'rendered.@02') wf.connect(gpa_wf, 'outputspec.rendered_image', ds, 'rendered.@03') ###################################### # Run the actual group analysis workflow wf.run() ''' except: print "Error: Group analysis workflow run command did not complete successfully." print "subcount: ", subcount print "pathcount: ", pathcount print "sublist: ", sublist_items print "input subject list: " print "conf: ", conf.subjectListFile raise Exception ''' print "**Workflow finished for model %s and resource %s"%(os.path.basename(group_conf.output_dir), resource)
def testConfig(self, event): ''' This function runs when the user clicks the "Test Configuration" button in the pipeline configuration window. It prompts the user for a sample subject list (i.e. one that they will be using with the config they are building). Then it builds the pipeline but does not run it. It then reports whether or not the config will run or not depending on if the pipeline gets built successfully. ''' import os import yaml from CPAC.utils import Configuration from CPAC.pipeline.cpac_pipeline import prep_workflow from CPAC.pipeline.cpac_runner import build_strategies def display(win, msg, changeBg=True): wx.MessageBox(msg, "Error") if changeBg: win.SetBackgroundColour("pink") win.SetFocus() win.Refresh() # Collect a sample subject list and parse it in testDlg0 = wx.MessageDialog( self, 'This tool will run a quick check on the current pipeline configuration.' \ ' Click OK to provide a subject list you will be using with this setup.', 'Subject List', wx.OK | wx.ICON_INFORMATION) testDlg0.ShowModal() testDlg0.Destroy() dlg = wx.FileDialog(self, message="Choose the CPAC Subject list file", defaultDir=os.getcwd(), defaultFile="CPAC_subject_list.yml", wildcard="YAML files(*.yaml, *.yml)|*.yaml;*.yml", style=wx.OPEN | wx.CHANGE_DIR) if dlg.ShowModal() == wx.ID_OK: subListPath = dlg.GetPath() sublist = yaml.load(open(os.path.realpath(subListPath), 'r')) # Check to ensure the user is providing an actual subject # list and not some other kind of file try: subInfo = sublist[0] except: errDlg4 = wx.MessageDialog( self, 'ERROR: Subject list file not in proper format - check if you' \ ' loaded the correct file? \n\n' \ 'Error name: config_window_0001', 'Subject List Error', wx.OK | wx.ICON_ERROR) errDlg4.ShowModal() errDlg4.Destroy() raise Exception # Another check to ensure the actual subject list was generated # properly and that it will work if 'subject_id' not in subInfo: errDlg3 = wx.MessageDialog( self, 'ERROR: Subject list file not in proper format - check if you' \ ' loaded the correct file? \n\n' \ 'Error name: config_window_0002', 'Subject List Error', wx.OK | wx.ICON_ERROR) errDlg3.ShowModal() errDlg3.Destroy() raise Exception # Following code reads in the parameters and selections from the # pipeline configuration window and populate the config_list config_list = [] wf_counter = [] for page in self.nb.get_page_list(): switch = page.page.get_switch() ctrl_list = page.page.get_ctrl_list() validate = False if switch: switch_val = str(switch.get_selection()).lower() if switch_val == 'on' or switch_val == 'true' or switch_val == '1': validate = True wf_counter.append(page.get_counter()) for ctrl in ctrl_list: # option_name will be the selection name as it is written # as the dictionary key of the config.yml dictionary option_name = ctrl.get_name() #validating if (switch == None or validate) and ctrl.get_validation() \ and (option_name != 'derivativeList') and (option_name != 'modelConfigs'): win = ctrl.get_ctrl() if isinstance(ctrl.get_selection(), list): value = ctrl.get_selection() if not value: display( win, "%s field is empty or the items are not checked!" % ctrl.get_name(), False) return else: value = str(ctrl.get_selection()) if len(value) == 0: display(win, "%s field is empty!" % ctrl.get_name()) return if '/' in value and '$' not in value and not isinstance( value, list): if not os.path.exists( ctrl.get_selection()) and value != 'On/Off': display( win, "%s field contains incorrect path. Please update the path!" % ctrl.get_name()) return config_list.append(ctrl) # Get the user's CPAC output directory for use in this script for config in config_list: if config.get_name() == 'outputDirectory': outDir = config.get_selection() # Write out a pipeline_config file, read it in and then delete it # (Will revise the data structure of the config files later so this # can just pass the data structure instead of doing it this way) try: self.write(outDir + 'testConfig.yml', config_list) c = Configuration( yaml.load( open(os.path.realpath(outDir + 'testConfig.yml'), 'r'))) os.remove(outDir + 'testConfig.yml') except: errDlg2 = wx.MessageDialog( self, 'A problem occurred with preparing the pipeline test run. \n\n' \ 'Please ensure you have rights access to the directories you' \ ' have chosen for the CPAC working, crash, and output folders.', 'Test Configuration Error', wx.OK | wx.ICON_ERROR) errDlg2.ShowModal() errDlg2.Destroy() if (1 in c.runNuisance) or (c.Corrections != None): strategies = sorted(build_strategies(c)) else: strategies = None # Run the actual pipeline building prep and see if it works or not testDlg1 = wx.MessageDialog( self, 'Click OK to run the test. This should take only a few seconds.', 'Running Test', wx.OK | wx.ICON_INFORMATION) testDlg1.ShowModal() # Check file paths first # Just getting proper names of config file parameters try: params_file = open( p.resource_filename('CPAC', 'GUI/resources/config_parameters.txt'), "r") except: print "Error: Could not open configuration parameter file.", "\n" raise Exception paramInfo = params_file.read().split('\n') paramList = [] for param in paramInfo: if param != '': paramList.append(param.split(',')) # function for file path checking def testFile(filepath, paramName): try: if filepath != None: fileTest = open(filepath) fileTest.close() except: testDlg1.Destroy() for param in paramList: if param[0] == paramName: paramTitle = param[1] paramGroup = param[2] break errDlgFileTest = wx.MessageDialog( self, 'Error reading file - either it does not exist or you' \ ' do not have read access. \n\n' \ 'Parameter: %s \n' \ 'In tab: %s \n\n' \ 'Path: %s' % (paramTitle, paramGroup, filepath), 'Pipeline Not Ready', wx.OK | wx.ICON_ERROR) errDlgFileTest.ShowModal() errDlgFileTest.Destroy() testFile(c.template_brain_only_for_anat, 'template_brain_only_for_anat') testFile(c.template_skull_for_anat, 'template_skull_for_anat') testFile(c.PRIORS_WHITE, 'PRIORS_WHITE') testFile(c.PRIORS_GRAY, 'PRIORS_GRAY') testFile(c.PRIORS_CSF, 'PRIORS_CSF') testFile(c.template_brain_only_for_func, 'template_brain_only_for_func') testFile(c.template_skull_for_func, 'template_skull_for_func') testFile(c.identityMatrix, 'identityMatrix') testFile(c.boundaryBasedRegistrationSchedule, 'boundaryBasedRegistrationSchedule') testFile(c.lateral_ventricles_mask, 'lateral_ventricles_mask') testFile(c.seedSpecificationFile, 'seedSpecificationFile') testFile(c.roiSpecificationFile, 'roiSpecificationFile') testFile(c.roiSpecificationFileForSCA, 'roiSpecificationFileForSCA') testFile(c.maskSpecificationFile, 'maskSpecificationFile') testFile(c.maskSpecificationFileForSCA, 'maskSpecificationFileForSCA') testFile(c.spatialPatternMaps, 'spatialPatternMaps') testFile(c.template_symmetric_brain_only, 'template_symmetric_brain_only') testFile(c.template_symmetric_skull, 'template_symmetric_skull') testFile(c.dilated_symmetric_brain_mask, 'dilated_symmetric_brain_mask') testFile(c.configFileTwomm, 'configFileTwomm') testFile(c.templateSpecificationFile, 'templateSpecificationFile') testFile(c.bascAffinityThresholdFile, 'bascAffinityThresholdFile') testFile(c.cwasROIFile, 'cwasROIFile') testFile(c.cwasRegressorFile, 'cwasRegressorFile') try: # Run the pipeline building prep_workflow(sublist[0], c, strategies, 0) except: testDlg1.Destroy() errDlg1 = wx.MessageDialog( self, 'There are issues with the current configuration which need to be' \ ' resolved - please check to make sure the options you are running' \ ' have the proper pre-requisites selected.', 'Pipeline Not Ready', wx.OK | wx.ICON_ERROR) errDlg1.ShowModal() errDlg1.Destroy() else: testDlg1.Destroy() okDlg1 = wx.MessageDialog( self, 'The current configuration will run successfully. You can safely' \ ' save and run this setup!', 'Pipeline Ready', wx.OK | wx.ICON_INFORMATION) okDlg1.ShowModal() okDlg1.Destroy()
def prep_group_analysis_workflow(c, resource, subject_infos): p_id, s_ids, scan_ids, s_paths = (list(tup) for tup in zip(*subject_infos)) #print "p_id -%s, s_ids -%s, scan_ids -%s, s_paths -%s" %(p_id, s_ids, scan_ids, s_paths) def get_phenotypic_file(phenotypic_file, m_dict, m_list, mod_path, sub_id): #print "phenotypic_file, m_dict", phenotypic_file, m_dict import csv reader = csv.reader(open(phenotypic_file, 'rU')) columns = {} order = {} count = 0 headers = reader.next() for h in headers: columns[h] = [] order[h] = count count += 1 for r in reader: for h, v in zip(headers, r): if v: columns[h].append(str(v)) if m_dict: for measure in m_list: if measure in headers: #check if 'MeanFD is present' if len(columns[measure]) < 1: for sub in columns[sub_id]: if m_dict.get(sub): if m_dict.get(sub).get(measure): columns[measure].append( m_dict[sub][measure]) else: raise Exception( "Couldn't find %s value for subject %s" % (measure, sub)) else: raise Exception( "Couldn't find subject %s in the parameter file" % sub) b = zip(*([k] + columns[k] for k in sorted(columns, key=order.get))) try: os.makedirs(mod_path) except: print "%s already exist" % (mod_path) new_phenotypic_file = os.path.join(mod_path, os.path.basename(phenotypic_file)) a = csv.writer(open(new_phenotypic_file, 'w')) for col in b: a.writerow(list(col)) return new_phenotypic_file threshold_val = None measure_dict = None measure_list = ['MeanFD', 'MeanFD_Jenkinson', 'MeanDVARS'] model_sub_list = [] if c.runScrubbing == 1: #get scrubbing threshold if re.search('(?<=/_threshold_)\d+.\d+', s_paths[0]): threshold_val = re.search('(?<=/_threshold_)\d+.\d+', s_paths[0]).group(0) elif len(c.scrubbingThreshold) == 1: threshold_val = c.scrubbingThreshold[0] else: print("Found Multiple threshold value ") print "scrubbing threshold_val -->", threshold_val else: print "No scrubbing enabled." print "\n" #pick the right parameter file from the pipeline folder #create a dictionary of subject and measures in measure_list if c.runScrubbing == 1: try: parameter_file = os.path.join( c.outputDirectory, p_id[0], '%s_threshold_%s_all_params.csv' % (scan_ids[0].strip('_'), threshold_val)) if os.path.exists(parameter_file): import csv measure_dict = {} f = csv.DictReader(open(parameter_file, 'r')) for line in f: measure_map = {} for m in measure_list: if line.get(m): measure_map[m] = line[m] measure_dict[line['Subject']] = measure_map else: print "No file name %s found" % parameter_file except Exception: print "Exception while extracting parameters from movement file - %s" % ( parameter_file) for config in c.modelConfigs: import yaml try: conf = Configuration(yaml.load(open(os.path.realpath(config), 'r'))) except: raise Exception("Error in reading %s configuration file" % config) subject_list = [line.rstrip('\r\n') for line in open(conf.subjectListFile, 'r') \ if not (line == '\n') and not line.startswith('#')] # list of subject paths which DO exist exist_paths = [] # check for missing subject for the derivative for sub in subject_list: for path in s_paths: if sub in path: exist_paths.append(sub) # check to see if any derivatives of subjects are missing if len(list(set(subject_list) - set(exist_paths))) > 0: print "-------------------------------------------" print "List of outputs missing for subjects:" print list(set(subject_list) - set(exist_paths)) print "\n" print "..for derivatives:" print resource print "\n" print "..at paths:" print os.path.dirname(s_paths[0]).replace(s_ids[0], '*') print "-------------------------------------------" print '\n' #import warnings #warnings.warn(msg) mod_path = os.path.join( os.path.dirname(s_paths[0]).replace( s_ids[0], 'group_analysis_results/_grp_model_%s' % (conf.modelName)), 'model_files') print "basename: ", os.path.basename(conf.subjectListFile) try: os.makedirs(mod_path) print "Creating directory:" print mod_path print "\n" except: print "Attempted to create directory, but path already exists:" print mod_path print '\n' new_sub_file = os.path.join(mod_path, os.path.basename(conf.subjectListFile)) try: f = open(new_sub_file, 'w') for sub in exist_paths: print >> f, sub f.close() except: print "Error: Could not open subject list file: ", new_sub_file print "" raise Exception conf.update('subjectListFile', new_sub_file) sub_id = conf.subjectColumn if measure_dict != None: conf.update( 'phenotypicFile', get_phenotypic_file(conf.phenotypicFile, measure_dict, measure_list, mod_path, sub_id)) print "Model config dictionary ->" print conf.__dict__ print '\n' # Run 'create_fsl_model' script to extract phenotypic data from # the phenotypic file for each of the subjects in the subject list try: from CPAC.utils import create_fsl_model create_fsl_model.run(conf, True) except Exception, e: print "Error in creating models in the create_fsl_model script" #print "Error ->", e raise model_sub_list.append( (conf.outputModelFilesDirectory, conf.subjectListFile)) print "model_sub_list ->", model_sub_list
def run(config_file, subject_list_file, p_name = None): # take date+time stamp for run identification purposes unique_pipeline_id = strftime("%Y%m%d%H%M%S") pipeline_start_stamp = strftime("%Y-%m-%d_%H:%M:%S") try: if not os.path.exists(config_file): raise IOError else: c = Configuration(yaml.load(open(os.path.realpath(config_file), 'r'))) except IOError: print "config file %s doesn't exist" % config_file raise except Exception: print "Error reading config file - %s" % config_file raise Exception #do some validation validate(c) try: sublist = yaml.load(open(os.path.realpath(subject_list_file), 'r')) except: print "Subject list is not in proper YAML format. Please check your file" raise Exception # NOTE: strategies list is only needed in cpac_pipeline prep_workflow for # creating symlinks strategies = sorted(build_strategies(c)) print "strategies ---> " print strategies sub_scan_map ={} print "subject list: " print sublist try: for sub in sublist: if sub['unique_id']: s = sub['subject_id']+"_" + sub["unique_id"] else: s = sub['subject_id'] scan_ids = ['scan_anat'] for id in sub['rest']: scan_ids.append('scan_'+ str(id)) sub_scan_map[s] = scan_ids except: print "\n\n" + "ERROR: Subject list file not in proper format - check if you loaded the correct file?" + "\n" + \ "Error name: cpac_runner_0001" + "\n\n" raise Exception create_group_log_template(sub_scan_map, os.path.join(c.outputDirectory, 'logs')) seeds_created = [] if not (c.seedSpecificationFile is None): try: if os.path.exists(c.seedSpecificationFile): seeds_created = create_seeds_(c.seedOutputLocation, c.seedSpecificationFile, c.FSLDIR) print 'seeds created %s -> ' % seeds_created except: raise IOError('Problem in seedSpecificationFile') if 1 in c.runVoxelTimeseries: if 'roi_voxelwise' in c.useSeedInAnalysis: c.maskSpecificationFile = append_seeds_to_file(c.workingDirectory, seeds_created, c.maskSpecificationFile) if 1 in c.runROITimeseries: if 'roi_average' in c.useSeedInAnalysis: c.roiSpecificationFile = append_seeds_to_file(c.workingDirectory, seeds_created, c.roiSpecificationFile) if 1 in c.runNetworkCentrality: if 'centrality_outputs_smoothed' in c.useSeedInAnalysis: c.templateSpecificationFile = append_seeds_to_file(c.workingDirectory, seeds_created, c.templateSpecificationFile) pipeline_timing_info = [] pipeline_timing_info.append(unique_pipeline_id) pipeline_timing_info.append(pipeline_start_stamp) pipeline_timing_info.append(len(sublist)) if not c.runOnGrid: from CPAC.pipeline.cpac_pipeline import prep_workflow procss = [Process(target=prep_workflow, args=(sub, c, strategies, 1, pipeline_timing_info, p_name)) for sub in sublist] pid = open(os.path.join(c.outputDirectory, 'pid.txt'), 'w') jobQueue = [] if len(sublist) <= c.numSubjectsAtOnce: """ Stream all the subjects as sublist is less than or equal to the number of subjects that need to run """ for p in procss: p.start() print >>pid,p.pid else: """ Stream the subject workflows for preprocessing. At Any time in the pipeline c.numSubjectsAtOnce will run, unless the number remaining is less than the value of the parameter stated above """ idx = 0 while(idx < len(sublist)): if len(jobQueue) == 0 and idx == 0: idc = idx for p in procss[idc: idc + c.numSubjectsAtOnce]: p.start() print >>pid,p.pid jobQueue.append(p) idx += 1 else: for job in jobQueue: if not job.is_alive(): print 'found dead job ', job loc = jobQueue.index(job) del jobQueue[loc] procss[idx].start() jobQueue.append(procss[idx]) idx += 1 pid.close() else: import commands import pickle temp_files_dir = os.path.join(os.getcwd(), 'cluster_temp_files') print commands.getoutput("mkdir -p %s" % temp_files_dir) strategies_file = os.path.join(temp_files_dir, 'strategies.obj') f = open(strategies_file, 'w') pickle.dump(strategies, f) f.close() if 'sge' in c.resourceManager.lower(): run_sge_jobs(c, config_file, strategies_file, subject_list_file, p_name) elif 'pbs' in c.resourceManager.lower(): run_pbs_jobs(c, config_file, strategies_file, subject_list_file, p_name) elif 'condor' in c.resourceManager.lower(): run_condor_jobs(c, config_file, strategies_file, subject_list_file, p_name)
def AddConfig(self, event): ''' docstring ''' # Gets called when you click 'Load' for pipeline config in the GUI dlg = wx.FileDialog(self, message="Choose the CPAC Configuration file", defaultDir=os.getcwd(), defaultFile="", wildcard="YAML files(*.yaml, *.yml)|*.yaml;*.yml", style=wx.OPEN | wx.CHANGE_DIR) # User clicks "OK" if dlg.ShowModal() == wx.ID_OK: # Load config file into memory and verify its not a subject list path = dlg.GetPath() # Check for path existence if os.path.exists(path): path = os.path.realpath(path) try: f_cfg = yaml.load(open(path, 'r')) except Exception as e: print '\n\nUnable to load the specified file: %s' % path print "The YAML file may not be formatted properly." print 'Error:\n%s\n\n' % e raise Exception if type(f_cfg) == dict: if not f_cfg.has_key('pipelineName'): err_msg = 'File is not a pipeline configuration '\ 'file. It might be a data configuration file.' raise Exception(err_msg) else: err_msg = 'File is not a pipeline configuration '\ 'file. It might be a subject list file.' raise Exception(err_msg) # Otherwise, report error else: err_msg = 'File %s does not exist. Check and try again.' % path raise Exception(err_msg) # If config file is ok, proceed to load if self.check_config(path) > 0: while True: try: c = Configuration(f_cfg) except Exception as e: if "object has no attribute" in e: err = "%s\n\nIs this attribute linked (using " \ "'${}') in any of your configuration " \ "parameters? (Standard template paths, " \ "for example). If this is a pipeline " \ "configuration file from an older version "\ "of CPAC, this parameter may be obsolete. "\ "Double-check your selections.\n\n" % e print err else: print '\n\nERROR: Configuration file could not ' \ 'be loaded properly - the file might be '\ 'access-protected or you might have ' \ 'chosen the wrong file.\n' print 'Error name: main_window_0001\n\n' print 'Exception: %s' % e # Valid pipeline name if c.pipelineName != None: if self.pipeline_map.get(c.pipelineName) == None: # this runs if you click 'Load' on the main # CPAC window, enter a path, and the pipeline # name attribute of the pipeline config file # you are loading does NOT already exist in # the listbox, i.e., the proper condition self.pipeline_map[str(c.pipelineName)] = path self.listbox.Append(str(c.pipelineName)) dlg.Destroy() break else: # this runs if you click 'Load' on the main # CPAC window, enter a path, and the pipeline # name attribute of the pipeline config file # you are loading DOES already exist in # the listbox, which is a conflict dlg3 = wx.MessageDialog(self, 'The \'' \ 'Pipeline Name\' attribute of the ' \ 'configuration file you are loading' \ ' already exists in one of the' \ ' configuration files listed under' \ ' \'Pipelines\'.\n\nPlease change' \ ' the pipeline name attribute (not' \ ' the filename) from within the' \ ' pipeline editor (under the' \ ' \'Output Settings\' tab in' \ ' \'Environment Setup\'), or load a' \ ' new configuration file.\n\n' \ 'Pipeline configuration with' \ ' conflicting name:\n%s' \ % c.pipelineName, 'Conflicting Pipeline Names', wx.OK | wx.ICON_ERROR) dlg3.ShowModal() dlg3.Destroy() break # Pipeline name is None else: dlg4 = wx.MessageDialog(self, 'Warning: Pipeline name is blank.\n\nPlease edit' \ ' the pipeline_config.yml file in a text editor and' \ ' restore the pipelineName field.', 'Warning', wx.OK | wx.ICON_ERROR) dlg4.ShowModal() dlg4.Destroy() dlg.Destroy break
def testConfig(self, event): ''' This function runs when the user clicks the "Test Configuration" button in the pipeline configuration window. It prompts the user for a sample subject list (i.e. one that they will be using with the config they are building). Then it builds the pipeline but does not run it. It then reports whether or not the config will run or not depending on if the pipeline gets built successfully. ''' # Import packages import os import yaml from CPAC.utils import Configuration from CPAC.pipeline.cpac_pipeline import prep_workflow from CPAC.pipeline.cpac_runner import build_strategies def display(win, msg, changeBg=True): wx.MessageBox(msg, "Error") if changeBg: win.SetBackgroundColour("pink") win.SetFocus() win.Refresh() # Collect a sample subject list and parse it in testDlg0 = wx.MessageDialog( self, 'This tool will run a quick check on the current pipeline '\ 'configuration. Click OK to provide a subject list you ' \ 'will be using with this setup.', 'Subject List', wx.OK | wx.ICON_INFORMATION) testDlg0.ShowModal() testDlg0.Destroy() dlg = wx.FileDialog(self, message="Choose the CPAC Subject list file", defaultDir=os.getcwd(), defaultFile="CPAC_subject_list.yml", wildcard="YAML files(*.yaml, *.yml)|*.yaml;*.yml", style=wx.OPEN | wx.CHANGE_DIR) if dlg.ShowModal() == wx.ID_OK: subListPath = dlg.GetPath() # Load and test the subject list print 'Checking subject list: %s...' % subListPath sublist = yaml.load(open(os.path.realpath(subListPath), 'r')) sub_flg = self.test_sublist(sublist) if not sub_flg: raise Exception print 'Subject list looks good!' # Following code reads in the parameters and selections from the # pipeline configuration window and populate the config_list config_list = [] wf_counter = [] for page in self.nb.get_page_list(): switch = page.page.get_switch() ctrl_list = page.page.get_ctrl_list() validate = False if switch: switch_val = str(switch.get_selection()).lower() if switch_val == 'on' or switch_val == 'true' or \ switch_val == '1': validate = True wf_counter.append(page.get_counter()) for ctrl in ctrl_list: # option_name will be the selection name as it is written # as the dictionary key of the config.yml dictionary option_name = ctrl.get_name() #validating if (switch == None or validate) and ctrl.get_validation() \ and (option_name != 'derivativeList') and \ (option_name != 'modelConfigs'): win = ctrl.get_ctrl() if isinstance(ctrl.get_selection(), list): value = ctrl.get_selection() if not value: display( win, "%s field is empty or the items are " \ "not checked!" % ctrl.get_name(), False) return elif (option_name == "tsa_roi_paths") or \ (option_name == "sca_roi_paths"): # fires if the control is the checkbox grid for # multiple paths assigned to multiple options # (i.e. timeseries analysis) config_list.append(ctrl) continue else: value = str(ctrl.get_selection()) if len(value) == 0: display(win, "%s field is empty!" % ctrl.get_name()) return if '/' in value and '$' not in value and not \ isinstance(value, list): if not os.path.exists(ctrl.get_selection()) and \ value != 'On/Off': display( win, "%s field contains incorrect path. " \ "Please update the path!" % ctrl.get_name()) return config_list.append(ctrl) # Write out a pipeline_config file, read it in and then delete it # (Will revise the data structure of the config files later so this # can just pass the data structure instead of doing it this way) try: test_cfg_yml = '/tmp/test_config.yml' self.write(test_cfg_yml, config_list) c = Configuration( yaml.load(open(os.path.realpath(test_cfg_yml), 'r'))) os.remove(test_cfg_yml) except: errDlg2 = wx.MessageDialog( self, 'A problem occurred with preparing the pipeline test run. \n\n' \ 'Please ensure you have rights access to the directories you' \ ' have chosen for the CPAC working, crash, and output folders.', 'Test Configuration Error', wx.OK | wx.ICON_ERROR) errDlg2.ShowModal() errDlg2.Destroy() if (1 in c.runNuisance) or (c.Regressors != None): strategies = sorted(build_strategies(c)) else: strategies = None # Run the actual pipeline building prep and see if it works or not testDlg1 = wx.MessageDialog( self, 'Click OK to run the test. This should take only a few seconds.', 'Running Test', wx.OK | wx.ICON_INFORMATION) testDlg1.ShowModal() # Check file paths first # Just getting proper names of config file parameters try: params_file = open( p.resource_filename('CPAC', 'GUI/resources/config_parameters.txt'), "r") except: print "Error: Could not open configuration parameter file.", "\n" raise Exception paramInfo = params_file.read().split('\n') paramList = [] for param in paramInfo: if param != '': paramList.append(param.split(',')) # function for file path checking def testFile(filepath, paramName, switch): try: if (1 in switch) and (filepath != None): fileTest = open(filepath) fileTest.close() except: testDlg1.Destroy() for param in paramList: if param[0] == paramName: paramTitle = param[1] paramGroup = param[2] break errDlgFileTest = wx.MessageDialog( self, 'Error reading file - either it does not exist or '\ 'you do not have read access. \n\n' \ 'Parameter: %s \n' \ 'In tab: %s \n\n' \ 'Path: %s' % (paramTitle, paramGroup, filepath), 'Pipeline Not Ready', wx.OK | wx.ICON_ERROR) errDlgFileTest.ShowModal() errDlgFileTest.Destroy() # Check S3 output bucket access if writing to S3 output_dir = c.outputDirectory s3_str = 's3://' if output_dir.lower().startswith(s3_str): output_dir_sp = output_dir.split('/') output_dir_sp[0] = output_dir_sp[0].lower() output_dir = '/'.join(output_dir_sp) if type(output_dir) is str and output_dir.lower().startswith(s3_str): from indi_aws import fetch_creds creds_path = c.awsOutputBucketCredentials bucket_name = output_dir.split(s3_str)[1].split('/')[0] try: bucket = fetch_creds.return_bucket(creds_path, bucket_name) print 'Connection with output bucket "%s" successful!' % bucket_name except Exception as exc: err_msg = 'Unable to access output S3 bucket: "%s" with '\ 'credentials in: "%s". Check bucket name '\ 'and credentials file and try again'\ % (bucket_name, creds_path) testDlg1.Destroy() errDlg1 = wx.MessageDialog(self, err_msg, 'Pipeline Not Ready', wx.OK | wx.ICON_ERROR) errDlg1.ShowModal() errDlg1.Destroy() return testFile(c.template_brain_only_for_anat, \ 'template_brain_only_for_anat',[1]) testFile(c.template_skull_for_anat, 'template_skull_for_anat', [1]) testFile(c.PRIORS_WHITE, 'PRIORS_WHITE', c.runSegmentationPreprocessing) testFile(c.PRIORS_GRAY, 'PRIORS_GRAY', c.runSegmentationPreprocessing) testFile(c.PRIORS_CSF, 'PRIORS_CSF', c.runSegmentationPreprocessing) testFile(c.template_brain_only_for_func, \ 'template_brain_only_for_func',c.runRegisterFuncToMNI) testFile(c.template_skull_for_func,'template_skull_for_func', \ c.runRegisterFuncToMNI) testFile(c.identityMatrix, 'identityMatrix', c.runRegisterFuncToMNI) testFile(c.boundaryBasedRegistrationSchedule, \ 'boundaryBasedRegistrationSchedule', \ c.runRegisterFuncToAnat) testFile(c.lateral_ventricles_mask,'lateral_ventricles_mask', \ c.runNuisance) testFile(c.template_symmetric_brain_only, \ 'template_symmetric_brain_only',c.runVMHC) testFile(c.template_symmetric_skull,'template_symmetric_skull', \ c.runVMHC) testFile(c.dilated_symmetric_brain_mask, \ 'dilated_symmetric_brain_mask',c.runVMHC) testFile(c.configFileTwomm, 'configFileTwomm', c.runVMHC) testFile(c.templateSpecificationFile,'templateSpecificationFile', \ c.runNetworkCentrality) if c.tsa_roi_paths and type(c.tsa_roi_paths[0]) == dict: for roi_path in c.tsa_roi_paths[0].keys(): testFile(roi_path, "tsa_roi_paths", c.runROITimeseries) if c.sca_roi_paths and type(c.sca_roi_paths[0]) == dict: for roi_path in c.sca_roi_paths[0].keys(): testFile(roi_path, "sca_roi_paths", c.runSCA) try: # Run the pipeline building prep_workflow(sublist[0], c, strategies, 0) except Exception as xxx: print xxx print "an exception occured" testDlg1.Destroy() errDlg1 = wx.MessageDialog( self, 'There are issues with the current configuration ' \ 'which need to be resolved - please check to make ' \ 'sure the options you are running have the proper ' \ 'pre-requisites selected.\n\nIssue Info:\n%s' % xxx, 'Pipeline Not Ready', wx.OK | wx.ICON_ERROR) errDlg1.ShowModal() errDlg1.Destroy() else: testDlg1.Destroy() okDlg1 = wx.MessageDialog( self, 'The current configuration will run successfully. You '\ 'can safely save and run this setup!', 'Pipeline Ready', wx.OK | wx.ICON_INFORMATION) okDlg1.ShowModal() okDlg1.Destroy()
def prep_group_analysis_workflow(c, resource, subject_infos): # # this function runs once per output file during group analysis # # p_id = a list of pipeline IDs, i.e. the name of the output folder for # the strat # s_ids = a list of all the subject IDs # scan_ids = a list of scan IDs # s_paths = a list of all of the filepaths of this particular output # file that prep_group_analysis_workflow is being called for p_id, s_ids, scan_ids, s_paths = (list(tup) for tup in zip(*subject_infos)) # set this to False for now fTest = False def get_phenotypic_file(phenotypic_file, m_dict, m_list, mod_path, sub_id): import csv reader = csv.reader(open(phenotypic_file, 'rU')) columns = {} order = {} count = 0 headers = next(reader) for h in headers: columns[h] =[] order[h] = count count+=1 for r in reader: for h, v in zip(headers, r): if v: columns[h].append(str(v)) if m_dict: for measure in m_list: print('\n\nMeasure: ', measure, '\n\n') if measure in headers: #check if 'MeanFD is present' if len(columns[measure]) < 1: print('\n\ncolumns[sub_id]: ', columns[sub_id], '\n\n') for sub in columns[sub_id]: if m_dict.get(sub): if m_dict.get(sub).get(measure): columns[measure].append(m_dict[sub][measure]) else: raise Exception("Couldn't find %s value for subject %s"%(measure,sub)) else: raise Exception("Couldn't find subject %s in the parameter file"%sub) print('\n\ncolumns[measure]: ', columns, '\n\n') b = list(zip(*([k] + columns[k] for k in sorted(columns, key=order.get)))) try: os.makedirs(mod_path) except: print("%s already exists"%(mod_path)) new_phenotypic_file = os.path.join(mod_path, os.path.basename(phenotypic_file)) a = csv.writer(open(new_phenotypic_file, 'w')) for col in b: a.writerow(list(col)) return new_phenotypic_file # END get_phenotypic_file function threshold_val = None measure_dict = None measure_list = ['MeanFD', 'MeanFD_Jenkinson', 'MeanDVARS'] model_sub_list = [] if 1 in c.runScrubbing: #get scrubbing threshold if re.search('(?<=/_threshold_)\d+.\d+',s_paths[0]): threshold_val = re.search('(?<=/_threshold_)\d+.\d+',s_paths[0]).group(0) elif len(c.scrubbingThreshold) == 1: threshold_val = c.scrubbingThreshold[0] else: print("Found Multiple threshold value ") print("scrubbing threshold_val -->", threshold_val) else: print("No scrubbing enabled.") if len(c.scrubbingThreshold) == 1: threshold_val = c.scrubbingThreshold[0] import yaml for config in c.modelConfigs: print(c.modelConfigs) print(config) try: conf = Configuration(yaml.load(open(os.path.realpath(config), 'r'))) except: raise Exception("Error in reading %s configuration file" % config) group_sublist = open(conf.subject_list, 'r') sublist_items = group_sublist.readlines() subject_list = [line.rstrip('\n') for line in sublist_items \ if not (line == '\n') and not line.startswith('#')] # list of subject paths which DO exist exist_paths = [] ''' begin iteration through group subject list for processing ''' for sub in subject_list: # let's check to make sure the subject list is formatted for # repeated measures properly if repeated measures is enabled and # vice versa if (c.repeatedMeasures == True) and (',' not in sub): print('\n\n') print('[!] CPAC says: The group analysis subject list is ' \ 'not inthe appropriate format for repeated ' \ 'measures.\n') print('Please use the appropriate format as described in ' \ 'the CPAC User Guide or turn off Repeated Measures ' \ 'in the CPAC pipeline configuration editor, found ' \ 'in the \'Group Analysis Settings\' tab of the ' \ 'pipeline configuration editor.\n') print('NOTE: CPAC generates a properly-formatted group ' \ 'analysis subject list meant for running repeated ' \ 'measures when you create your original subject ' \ 'list. Look for \'subject_list_group_analysis_' \ 'repeated_measures.txt\' in the directory where ' \ 'you created your subject list.\n\n') raise Exception elif (c.repeatedMeasures == False) and (',' in sub): print('\n\n') print('[!] CPAC says: It looks like your group analysis ' \ 'subject list is formatted for running repeated ' \ 'measures, but \'Run Repeated Measures\' is not ' \ 'enabled in the pipeline configuration, found in ' \ 'the \'Group Analysis Settings\' tab of the ' \ 'pipeline configuration editor.\n') print('Double-check your pipeline configuration?\n\n') raise Exception ''' process subject ids for repeated measures, if it is on ''' # if repeated measures is being run and the subject list # is a list of subject IDs and scan IDs concatenated if (c.repeatedMeasures == True): # sub.count(',') equals 1 when there is either multiple scans # or multiple sessions but not both, for repeated measures # sub.count(',') equals 2 when there are multiple sessions # AND scans, for repeated measures if sub.count(',') == 1: sub_id = sub.split(',',1)[0] other_id = sub.split(',',1)[1] elif sub.count(',') == 2: sub_id = sub.split(',',2)[0] scan_id = sub.split(',',2)[1] session_id = sub.split(',',2)[2] ''' drop subjects from the group subject list ''' # check the path files in path_files_here folder in the subject's # output folder - and drop any subjects from the group analysis # subject list which do not exist in the paths to the output files for path in s_paths: if (c.repeatedMeasures == True): if sub.count(',') == 1: if (sub_id in path) and (other_id in path): exist_paths.append(sub) elif sub.count(',') == 2: if (sub_id in path) and (scan_id in path) and \ (session_id in path): exist_paths.append(sub) else: if sub in path: exist_paths.append(sub) # check to see if any derivatives of subjects are missing if len(list(set(subject_list) - set(exist_paths))) >0: print("List of outputs missing for subjects:") print(list(set(subject_list) - set(exist_paths))) print("..for derivatives:") print(resource) print("..at paths:") print(os.path.dirname(s_paths[0]).replace(s_ids[0], '*')) # create the path string for the group analysis output out_dir = os.path.dirname(s_paths[0]).split(p_id[0] + '/') out_dir = os.path.join(conf.output_dir, out_dir[1]) out_dir = out_dir.replace(s_ids[0], 'group_analysis_results_%s/_grp_model_%s'%(p_id[0],conf.model_name)) mod_path = os.path.join(out_dir, 'model_files') if not os.path.isdir(mod_path): os.makedirs(mod_path) ''' write the new subject list ''' new_sub_file = os.path.join(mod_path, os.path.basename(conf.subject_list)) try: f = open(new_sub_file, 'w') for sub in exist_paths: print(sub, file=f) f.close() except: print("Error: Could not open subject list file: ", new_sub_file) raise Exception conf.update('subject_list',new_sub_file) sub_id = conf.subject_id_label if measure_dict != None: conf.update('pheno_file',get_phenotypic_file(conf.pheno_file, measure_dict, measure_list, mod_path, sub_id)) print('conf updated pheno: ', conf.pheno_file, '\n\n') print("Model config dictionary ->") print(conf.__dict__) # Run 'create_fsl_model' script to extract phenotypic data from # the phenotypic file for each of the subjects in the subject list ''' get the motion statistics parameter file, if present ''' # get the parameter file so it can be passed to create_fsl_model.py # so MeanFD or other measures can be included in the design matrix parameter_file = os.path.join(c.outputDirectory, p_id[0], '%s_threshold_%s_all_params.csv'%(scan_ids[0].strip('_'),threshold_val)) if 1 in c.runGenerateMotionStatistics: if not os.path.exists(parameter_file): print('\n\n[!] CPAC says: Could not open the parameter file. ' \ 'If Generate Motion Statistics is enabled, this can ' \ 'usually be found in the output directory of your ' \ 'individual-level analysis runs.\n') print('Path not found: ', parameter_file, '\n\n') raise Exception elif (1 not in c.runGenerateMotionStatistics) and (os.path.exists(parameter_file)): if not os.path.exists(parameter_file): print('\n\n[!] CPAC says: Could not open the parameter file. ' \ 'If Generate Motion Statistics is enabled, this can ' \ 'usually be found in the output directory of your ' \ 'individual-level analysis runs.\n') print('Path not found: ', parameter_file, '\n\n') raise Exception else: def no_measures_error(measure): print('\n\n[!] CPAC says: The measure %s was included in ' \ 'your group analysis design matrix formula, but ' \ 'Generate Motion Statistics was not run during ' \ 'individual-level analysis.\n' % measure) print('Please run Generate Motion Statistics if you wish ' \ 'to include this measure in your model.\n') print('If you HAVE completed a run with this option ' \ 'enabled, then you are seeing this error because ' \ 'the motion parameter file normally created by this ' \ 'option is missing.\n\n') raise Exception for measure in measure_list: if (measure in conf.design_formula): no_measures_error(measure) parameter_file = None ''' run create_fsl_model.py to generate the group analysis models ''' # path to the pipeline folder to be passed to create_fsl_model.py # so that certain files like output_means.csv can be accessed pipeline_path = os.path.join(c.outputDirectory, p_id[0]) # the current output that cpac_group_analysis_pipeline.py and # create_fsl_model.py is currently being run for current_output = s_paths[0].replace(pipeline_path, '').split('/')[2] try: from CPAC.utils import create_fsl_model create_fsl_model.run(conf, fTest, parameter_file, pipeline_path, current_output, True) #print >>diag, "> Runs create_fsl_model." #print >>diag, "" except Exception as e: print("FSL Group Analysis model not successfully created - error in create_fsl_model script") #print "Error ->", e raise model_sub_list.append((conf.output_dir, conf.subject_list)) if len(model_sub_list) == 0: raise Exception("no model found") ''' start group analysis ''' print('\n\nPreparing the group analysis workflow..\n\n') for model_sub in model_sub_list: #print >>diag, "Current model_sub: ", model_sub #print >>diag, "" model, subject_list = model_sub if not os.path.exists(model): raise Exception("path to model %s doesn't exist"%model) if not os.path.exists(subject_list): raise Exception("path to input subject list %s is invalid" % subject_list) #if c.mixedScanAnalysis == True: # wf = pe.Workflow(name = 'group_analysis/%s/grp_model_%s'%(resource, os.path.basename(model))) #else: # s_paths is a list of paths to each subject's derivative (of the current # derivative gpa is being run on) - s_paths_dirList is a list of each directory # in this path separated into list elements s_paths_dirList = s_paths[0].split('/') currentDerivativeFile = s_paths_dirList[-1] currentDerivative = currentDerivativeFile.split('.')[0] currentDerivative = currentDerivative.replace('#', '_') strgy_path = os.path.dirname(s_paths[0]).split(scan_ids[0])[1] for ch in ['.']: if ch in strgy_path: strgy_path = strgy_path.replace(ch, '_') # create nipype-workflow-name-friendly strgy_path # (remove special characters) strgy_path_name = strgy_path.replace('/', '__') wf = pe.Workflow(name = currentDerivative) workDir = c.workingDirectory + '/group_analysis__%s__grp_model_%s__%s' % (resource, conf.model_name, scan_ids[0]) workDir = workDir + '/' + strgy_path_name wf.base_dir = workDir wf.config['execution'] = {'hash_method': 'timestamp', 'crashdump_dir': os.path.abspath(c.crashLogDirectory)} log_dir = os.path.join(conf.output_dir, 'logs', 'group_analysis', resource, 'model_%s' % (conf.model_name)) if not os.path.exists(log_dir): os.makedirs(log_dir) else: print("log_dir already exist") # enable logging from nipype import config from nipype import logging config.update_config({'logging': {'log_directory': log_dir, 'log_to_file': True}}) # Temporarily disable until solved #logging.update_logging(config) iflogger = logging.getLogger('interface') ''' create the list of paths to all output files to go to model ''' # create the 'ordered_paths' list, which is a list of all of the # output paths of the output files being included in the current # group-level analysis model # 'ordered_paths' is later connected to the 'zmap_files' input # of the group analysis workflow - the files listed in this list # are merged into the merged 4D file that goes into group analysis group_sublist = open(subject_list, 'r') sublist_items = group_sublist.readlines() input_subject_list = [line.rstrip('\n') for line in sublist_items \ if not (line == '\n') and not line.startswith('#')] ordered_paths = [] pathcount = 0 subcount = 0 for sub in input_subject_list: subcount += 1 if (c.repeatedMeasures == True): # sub.count(',') equals 1 when there is either multiple scans # or multiple sessions but not both, for repeated measures # sub.count(',') equals 2 when there are multiple sessions # AND scans, for repeated measures if sub.count(',') == 1: sub_id = sub.split(',',1)[0] other_id = sub.split(',',1)[1] elif sub.count(',') == 2: sub_id = sub.split(',',2)[0] scan_id = sub.split(',',2)[1] session_id = sub.split(',',2)[2] for path in s_paths: if (c.repeatedMeasures == True): # if repeated measures is enabled, make sure all of the # relevant indicators are in the path before adding it # to 'ordered_paths', i.e. the session and/or scan IDs if sub.count(',') == 1: if (sub_id in path) and (other_id in path): pathcount += 1 ordered_paths.append(path) elif sub.count(',') == 2: if (sub_id in path) and (scan_id in path) and \ (session_id in path): pathcount += 1 ordered_paths.append(path) else: if sub in path: pathcount += 1 ordered_paths.append(path) print('S_paths length: ', len(s_paths)) print("Ordered paths length (number of subjects): ", len(ordered_paths)) print("input_subject_list -> %s" % input_subject_list) print("strgy_path: ", strgy_path) if len(ordered_paths) == 0: print('\n\n\n[!] CPAC says: None of the subjects listed in the ' \ 'group analysis subject list were found to have outputs ' \ 'produced by individual-level analysis.\n\nEnsure that ' \ 'the subjects listed in your group analysis subject list ' \ 'are the same as the ones included in the individual-' \ 'level analysis you are running group-level analysis for.' \ '\n\n\n') raise Exception # gp_flow # Extracts the model files (.con, .grp, .mat, .fts) from the model # directory and sends them to the create_group_analysis workflow gpa_wf gp_flow = create_grp_analysis_dataflow("gp_dataflow_%s" % currentDerivative) gp_flow.inputs.inputspec.grp_model = model gp_flow.inputs.inputspec.fTest = fTest # gpa_wf # Creates the actual group analysis workflow gpa_wf = create_group_analysis(fTest, "gp_analysis_%s" % currentDerivative) gpa_wf.inputs.inputspec.zmap_files = ordered_paths gpa_wf.inputs.inputspec.z_threshold = c.zThreshold gpa_wf.inputs.inputspec.p_threshold = c.pThreshold gpa_wf.inputs.inputspec.parameters = (c.FSLDIR, 'MNI152') print("group model: ", model) print("f test: ", fTest) print("z threshold: ", c.zThreshold) print("p threshold: ", c.pThreshold) print("parameters: ", (c.FSLDIR, 'MNI152')) wf.connect(gp_flow, 'outputspec.mat', gpa_wf, 'inputspec.mat_file') wf.connect(gp_flow, 'outputspec.con', gpa_wf, 'inputspec.con_file') wf.connect(gp_flow, 'outputspec.grp', gpa_wf, 'inputspec.grp_file') if fTest: wf.connect(gp_flow, 'outputspec.fts', gpa_wf, 'inputspec.fts_file') # ds # Creates the datasink node for group analysis ds = pe.Node(nio.DataSink(), name='gpa_sink') if 'sca_roi' in resource: out_dir = os.path.join(out_dir, \ re.search('ROI_number_(\d)+',os.path.splitext(os.path.splitext(os.path.basename(s_paths[0]))[0])[0]).group(0)) if 'centrality' in resource: names = ['degree_centrality_binarize', 'degree_centrality_weighted', \ 'eigenvector_centrality_binarize', 'eigenvector_centrality_weighted', \ 'lfcd_binarize', 'lfcd_weighted'] for name in names: if name in os.path.basename(s_paths[0]): out_dir = os.path.join(out_dir, name) break if 'tempreg_maps_z_files' in resource: out_dir = os.path.join(out_dir, \ re.search('\w*[#]*\d+', os.path.splitext(os.path.splitext(os.path.basename(s_paths[0]))[0])[0]).group(0)) # if c.mixedScanAnalysis == True: # out_dir = re.sub(r'(\w)*scan_(\w)*(\d)*(\w)*[/]', '', out_dir) ds.inputs.base_directory = out_dir ds.inputs.container = '' ds.inputs.regexp_substitutions = [(r'(?<=rendered)(.)*[/]','/'), (r'(?<=model_files)(.)*[/]','/'), (r'(?<=merged)(.)*[/]','/'), (r'(?<=stats/clusterMap)(.)*[/]','/'), (r'(?<=stats/unthreshold)(.)*[/]','/'), (r'(?<=stats/threshold)(.)*[/]','/'), (r'_cluster(.)*[/]',''), (r'_slicer(.)*[/]',''), (r'_overlay(.)*[/]','')] ''' if 1 in c.runSymbolicLinks: link_node = pe.MapNode(interface=util.Function( input_names=['in_file', 'resource'], output_names=[], function=prepare_gp_links), name='link_gp_', iterfield=['in_file']) link_node.inputs.resource = resource wf.connect(ds, 'out_file', link_node, 'in_file') ''' ########datasink connections######### if fTest: wf.connect(gp_flow, 'outputspec.fts', ds, 'model_files.@0') wf.connect(gp_flow, 'outputspec.mat', ds, 'model_files.@1' ) wf.connect(gp_flow, 'outputspec.con', ds, 'model_files.@2') wf.connect(gp_flow, 'outputspec.grp', ds, 'model_files.@3') wf.connect(gpa_wf, 'outputspec.merged', ds, 'merged') wf.connect(gpa_wf, 'outputspec.zstats', ds, 'stats.unthreshold') wf.connect(gpa_wf, 'outputspec.zfstats', ds,'stats.unthreshold.@01') wf.connect(gpa_wf, 'outputspec.fstats', ds,'stats.unthreshold.@02') wf.connect(gpa_wf, 'outputspec.cluster_threshold_zf', ds, 'stats.threshold') wf.connect(gpa_wf, 'outputspec.cluster_index_zf', ds,'stats.clusterMap') wf.connect(gpa_wf, 'outputspec.cluster_localmax_txt_zf', ds, 'stats.clusterMap.@01') wf.connect(gpa_wf, 'outputspec.overlay_threshold_zf', ds, 'rendered') wf.connect(gpa_wf, 'outputspec.rendered_image_zf', ds, 'rendered.@01') wf.connect(gpa_wf, 'outputspec.cluster_threshold', ds, 'stats.threshold.@01') wf.connect(gpa_wf, 'outputspec.cluster_index', ds, 'stats.clusterMap.@02') wf.connect(gpa_wf, 'outputspec.cluster_localmax_txt', ds, 'stats.clusterMap.@03') wf.connect(gpa_wf, 'outputspec.overlay_threshold', ds, 'rendered.@02') wf.connect(gpa_wf, 'outputspec.rendered_image', ds, 'rendered.@03') ###################################### # Run the actual group analysis workflow wf.run() ''' except: print "Error: Group analysis workflow run command did not complete successfully." print "subcount: ", subcount print "pathcount: ", pathcount print "sublist: ", sublist_items print "input subject list: " print "conf: ", conf.subjectListFile raise Exception ''' print("**Workflow finished for model %s and resource %s"%(os.path.basename(model), resource))
class ListBox(wx.Frame): def __init__(self, parent, id, title): wx.Frame.__init__(self, parent, id, title, size=(700, 650), style=wx.SYSTEM_MENU | wx.CAPTION | wx.CLOSE_BOX) # Import packages import CPAC self.CreateStatusBar() self.SetStatusText("The Configurable Pipeline for the Analysis of " "Connectomes (C-PAC) v" + CPAC.__version__) self.pipeline_map = {} self.sublist_map = {} self.pids = [] mainPanel = wx.Panel(self) mainPanel.SetBackgroundColour('#E9E3DB') mainSizer = wx.BoxSizer(wx.VERTICAL) outerPanel1 = wx.Panel(mainPanel) outerSizer1 = wx.BoxSizer(wx.HORIZONTAL) outerPanel2 = wx.Panel(mainPanel) outerSizer2 = wx.BoxSizer(wx.HORIZONTAL) outerPanel3 = wx.Panel(mainPanel) outerSizer3 = wx.BoxSizer(wx.HORIZONTAL) innerPanel1 = wx.Panel(outerPanel1) innerSizer1 = wx.BoxSizer(wx.HORIZONTAL) innerPanel2 = wx.Panel(outerPanel1, ) innerSizer2 = wx.BoxSizer(wx.HORIZONTAL) lboxPanel1 = wx.Panel(innerPanel1) lboxSizer1 = wx.BoxSizer(wx.VERTICAL) btnPanel1 = wx.Panel(innerPanel1, -1) btnSizer1 = wx.BoxSizer(wx.VERTICAL) label = wx.StaticText(lboxPanel1, -1, "Pipelines") if 'linux' in sys.platform: label.SetFont(wx.Font(12, wx.SWISS, wx.NORMAL, wx.BOLD)) else: label.SetFont(wx.Font(16, wx.SWISS, wx.NORMAL, wx.BOLD)) self.listbox = wx.CheckListBox(lboxPanel1, -1, size=(160, 400)) lboxSizer1.Add(label, 0, wx.ALIGN_CENTER) lboxSizer1.Add(self.listbox, 1, wx.EXPAND | wx.ALL, 10) lboxPanel1.SetSizer(lboxSizer1) lboxPanel1.SetBackgroundColour('#E9E3DB') new = wx.Button(btnPanel1, ID_NEW, 'New', size=(90, 30)) group = wx.Button(btnPanel1, ID_GROUP, 'New Group', size=(90, 30)) ren = wx.Button(btnPanel1, ID_RENAME, 'Rename', size=(90, 30)) dlt = wx.Button(btnPanel1, ID_DELETE, 'Delete', size=(90, 30)) load = wx.Button(btnPanel1, ID_LOAD, 'Load', size=(90, 30)) edit = wx.Button(btnPanel1, ID_EDIT, 'Edit', size=(90, 30)) shw = wx.Button(btnPanel1, ID_DISPLAY, 'View', size=(90, 30)) clr = wx.Button(btnPanel1, ID_CLEAR, 'Clear', size=(90, 30)) self.Bind(wx.EVT_BUTTON, self.NewItem, id=ID_NEW) self.Bind(wx.EVT_BUTTON, self.NewGroup, id=ID_GROUP) self.Bind(wx.EVT_BUTTON, self.OnRename, id=ID_RENAME) self.Bind(wx.EVT_BUTTON, self.OnDelete, id=ID_DELETE) self.Bind(wx.EVT_BUTTON, self.AddConfig, id=ID_LOAD) self.Bind(wx.EVT_BUTTON, self.OnEdit, id=ID_EDIT) self.Bind(wx.EVT_BUTTON, self.OnDisplay, id=ID_DISPLAY) self.Bind(wx.EVT_BUTTON, lambda event: self.OnClear(event, 1), id=ID_CLEAR) self.Bind(wx.EVT_LISTBOX_DCLICK, self.OnDisplay) if 'linux' in sys.platform: btnSizer1.Add((-1, 30)) else: btnSizer1.Add((-1, 27)) btnSizer1.Add(new, 0, wx.TOP) btnSizer1.Add(group, 0, wx.TOP) btnSizer1.Add(load, 0, wx.TOP) btnSizer1.Add(edit, 0, wx.TOP) btnSizer1.Add(shw, 0, wx.TOP) btnSizer1.Add(ren, 0, wx.TOP) btnSizer1.Add(dlt, 0, wx.TOP) btnSizer1.Add(clr, 0, wx.TOP) btnPanel1.SetSizer(btnSizer1) btnPanel1.SetBackgroundColour('#E9E3DB') innerSizer1.Add(lboxPanel1, 1, wx.EXPAND | wx.ALL) if 'linux' in sys.platform: innerSizer1.Add(btnPanel1, 1, wx.EXPAND | wx.ALL, 5) else: innerSizer1.Add(btnPanel1, 1, wx.EXPAND | wx.ALL) innerPanel1.SetSizer(innerSizer1) innerPanel1.SetBackgroundColour('#E9E3DB') lboxPanel2 = wx.Panel(innerPanel2) lboxSizer2 = wx.BoxSizer(wx.VERTICAL) btnPanel2 = wx.Panel(innerPanel2, -1) btnSizer2 = wx.BoxSizer(wx.VERTICAL) label2 = wx.StaticText(lboxPanel2, -1, "Data Configurations") if 'linux' in sys.platform: label2.SetFont(wx.Font(12, wx.SWISS, wx.NORMAL, wx.BOLD)) else: label2.SetFont(wx.Font(16, wx.SWISS, wx.NORMAL, wx.BOLD)) self.listbox2 = wx.CheckListBox(lboxPanel2, -1, size=(160, 400)) self.listbox2.Bind(wx.EVT_LISTBOX_DCLICK, self.OnShow) lboxSizer2.Add(label2, 0, wx.ALIGN_CENTER) lboxSizer2.Add(self.listbox2, 1, wx.EXPAND | wx.ALL, 10) lboxPanel2.SetSizer(lboxSizer2) lboxPanel2.SetBackgroundColour('#E9E3DB') create = wx.Button(btnPanel2, ID_CREATE, 'New', size=(90, 30)) add = wx.Button(btnPanel2, ID_ADD, 'Load', size=(90, 30)) show = wx.Button(btnPanel2, ID_SHOW, 'View', size=(90, 30)) clr2 = wx.Button(btnPanel2, ID_CLEARALL, 'Clear', size=(90, 30)) self.Bind(wx.EVT_BUTTON, self.CreateItem, id=ID_CREATE) self.Bind(wx.EVT_BUTTON, self.AddItem, id=ID_ADD) self.Bind(wx.EVT_BUTTON, self.OnShow, id=ID_SHOW) self.Bind(wx.EVT_BUTTON, lambda event: self.OnClear(event, 2), id=ID_CLEARALL) if 'linux' in sys.platform: btnSizer2.Add((-1, 30)) else: btnSizer2.Add((-1, 27)) # Add buttons to button sizer btnSizer2.Add(create, 0, wx.TOP) btnSizer2.Add(add, 0, wx.TOP) btnSizer2.Add(show, 0, wx.TOP) btnSizer2.Add(clr2, 0, wx.TOP) btnPanel2.SetSizer(btnSizer2) btnPanel2.SetBackgroundColour('#E9E3DB') innerSizer2.Add(lboxPanel2, 1, wx.EXPAND | wx.ALL) if 'linux' in sys.platform: innerSizer2.Add(btnPanel2, 1, wx.EXPAND | wx.ALL, 5) else: innerSizer2.Add(btnPanel2, 1, wx.EXPAND | wx.ALL) innerPanel2.SetSizer(innerSizer2) innerPanel2.SetBackgroundColour('#E9E3DB') outerSizer1.Add(innerPanel2, 1, wx.EXPAND | wx.ALL) outerSizer1.Add(innerPanel1, 1, wx.EXPAND | wx.ALL) outerPanel1.SetSizer(outerSizer1) outerPanel1.SetBackgroundColour('#E9E3DB') self.runCPAC1 = wx.Button(outerPanel2, -1, 'Run Individual Level Analysis') self.runCPAC1.Bind(wx.EVT_BUTTON, self.runIndividualAnalysis) self.runCPAC2 = wx.Button(outerPanel2, -1, 'Run Group Level Analysis') self.runCPAC2.Bind(wx.EVT_BUTTON, self.runGroupLevelAnalysis) self.openPresets = wx.Button(outerPanel3, -1, 'Generate FSL-FEAT Presets') self.openPresets.Bind(wx.EVT_BUTTON, self.openFSLPresets) self.buildModels = wx.Button(outerPanel3, -1, 'Build FSL-FEAT Models') self.buildModels.Bind(wx.EVT_BUTTON, self.buildFSLModels) outerSizer2.Add(self.runCPAC1, 1, wx.RIGHT, 12) outerSizer2.Add(self.runCPAC2, 1, wx.LEFT, 12) outerSizer3.Add(self.openPresets, 1, wx.RIGHT, 12) outerSizer3.Add(self.buildModels, 1, wx.LEFT, 12) #outerSizer3.Add(self.stopCPAC1, 1, wx.RIGHT, 20) #outerSizer3.Add(self.stopCPAC2, 1, wx.LEFT, 20) outerPanel2.SetSizer(outerSizer2) outerPanel2.SetBackgroundColour('#E9E3DB') outerPanel3.SetSizer(outerSizer3) outerPanel3.SetBackgroundColour('#E9E3DB') hbox = wx.BoxSizer(wx.HORIZONTAL) text1 = wx.StaticText(mainPanel, -1, "Configure CPAC") if 'linux' in sys.platform: text1.SetFont(wx.Font(14, wx.SWISS, wx.NORMAL, wx.BOLD)) else: text1.SetFont(wx.Font(18, wx.SWISS, wx.NORMAL, wx.BOLD)) img = wx.Image( p.resource_filename('CPAC', 'GUI/resources/images/cpac_new_logo.png'), wx.BITMAP_TYPE_PNG).ConvertToBitmap() logo = wx.StaticBitmap(mainPanel, -1, img) hbox.Add(text1, 1, wx.TOP | wx.EXPAND, 15) hbox.Add(logo, 0, wx.ALIGN_RIGHT | wx.RIGHT) text2 = wx.StaticText(mainPanel, -1, "Run CPAC") if 'linux' in sys.platform: text2.SetFont(wx.Font(14, wx.SWISS, wx.NORMAL, wx.BOLD)) else: text2.SetFont(wx.Font(18, wx.SWISS, wx.NORMAL, wx.BOLD)) mainSizer.Add(hbox, 0, wx.EXPAND | wx.ALL, 10) mainSizer.Add(outerPanel1, 1, wx.EXPAND | wx.ALL, 20) mainSizer.Add(wx.StaticLine(mainPanel), 0, wx.EXPAND | wx.TOP | wx.BOTTOM, 10) mainSizer.Add(text2, 0, wx.EXPAND | wx.ALL, 5) mainSizer.Add(outerPanel2, 0, wx.EXPAND | wx.ALL, 5) mainSizer.Add(outerPanel3, 0, wx.EXPAND | wx.ALL, 5) mainPanel.SetSizer(mainSizer) self.Centre() self.Show(True) def runAnalysis1(self, pipeline, sublist, p): try: import CPAC from CPAC.utils import Configuration except ImportError, e: wx.MessageBox("Error importing CPAC. %s" % e, "Error") print "Error importing CPAC" print e c = Configuration(yaml.load(open(os.path.realpath(pipeline), 'r'))) plugin_args = { 'n_procs': c.maxCoresPerParticipant, 'memory_gb': c.maximumMemoryPerParticipant } # TODO: make this work if self.pids: #print "THERE'S SOMETHING RUNNING!" pass CPAC.pipeline.cpac_runner.run(pipeline, sublist, p, plugin='MultiProc', plugin_args=plugin_args)
def prep_group_analysis_workflow(model_df, pipeline_config_obj, \ model_name, group_config_obj, resource_id, preproc_strat, \ series_or_repeated_label): # # this function runs once per derivative type and preproc strat combo # during group analysis # import os import nipype.pipeline.engine as pe import nipype.interfaces.utility as util import nipype.interfaces.io as nio pipeline_ID = pipeline_config_obj.pipeline_name # get thresholds z_threshold = float(group_config_obj.z_threshold[0]) p_threshold = float(group_config_obj.p_threshold[0]) sub_id_label = group_config_obj.subject_id_label # determine if f-tests are included or not custom_confile = group_config_obj.custom_contrasts if ((custom_confile == None) or (custom_confile == '') or \ ("None" in custom_confile) or ("none" in custom_confile)): if (len(group_config_obj.f_tests) == 0) or \ (group_config_obj.f_tests == None): fTest = False else: fTest = True else: if not os.path.exists(custom_confile): errmsg = "\n[!] CPAC says: You've specified a custom contrasts " \ ".CSV file for your group model, but this file cannot " \ "be found. Please double-check the filepath you have " \ "entered.\n\nFilepath: %s\n\n" % custom_confile raise Exception(errmsg) with open(custom_confile,"r") as f: evs = f.readline() evs = evs.rstrip('\r\n').split(',') count_ftests = 0 fTest = False for ev in evs: if "f_test" in ev: count_ftests += 1 if count_ftests > 0: fTest = True # create path for output directory out_dir = os.path.join(group_config_obj.output_dir, \ "group_analysis_results_%s" % pipeline_ID, \ "group_model_%s" % model_name, resource_id, \ series_or_repeated_label, preproc_strat) model_path = os.path.join(out_dir, 'model_files') # generate working directory for this output's group analysis run work_dir = os.path.join(c.workingDirectory, "group_analysis", model_name,\ resource_id, series_or_repeated_label, preproc_strat) log_dir = os.path.join(out_dir, 'logs', resource_id, \ 'model_%s' % model_name) # create the actual directories if not os.path.isdir(model_path): try: os.makedirs(model_path) except Exception as e: err = "\n\n[!] Could not create the group analysis output " \ "directories.\n\nAttempted directory creation: %s\n\n" \ "Error details: %s\n\n" % (model_path, e) raise Exception(err) if not os.path.isdir(work_dir): try: os.makedirs(work_dir) except Exception as e: err = "\n\n[!] Could not create the group analysis working " \ "directories.\n\nAttempted directory creation: %s\n\n" \ "Error details: %s\n\n" % (model_path, e) raise Exception(err) if not os.path.isdir(log_dir): try: os.makedirs(log_dir) except Exception as e: err = "\n\n[!] Could not create the group analysis logfile " \ "directories.\n\nAttempted directory creation: %s\n\n" \ "Error details: %s\n\n" % (model_path, e) raise Exception(err) # create new subject list based on which subjects are left after checking # for missing outputs new_participant_list = [] for part in list(model_df["Participant"]): # do this instead of using "set" just in case, to preserve order # only reason there may be duplicates is because of multiple-series # repeated measures runs if part not in new_participant_list: new_participant_list.append(part) new_sub_file = write_new_sub_file(model_path, \ group_config_obj.participant_list, \ new_participant_list) group_conf.update('participant_list',new_sub_file) # start processing the dataframe further design_formula = group_config_obj.design_formula # demean the motion params if ("MeanFD" in design_formula) or ("MeanDVARS" in design_formula): params = ["MeanFD_Power", "MeanFD_Jenkinson", "MeanDVARS"] for param in params: model_df[param] = model_df[param].astype(float) model_df[param] = model_df[param].sub(model_df[param].mean()) # create 4D merged copefile, in the correct order, identical to design # matrix merge_outfile = model_name + "_" + resource_id + "_merged.nii.gz" merge_outfile = os.path.join(model_path, merge_outfile) merge_file = create_merged_copefile(list(model_df["Filepath"]), \ merge_outfile) # create merged group mask if group_config_obj.mean_mask[0] == "Group Mask": merge_mask_outfile = os.path.basename(merge_file) + "_mask.nii.gz" merge_mask = create_merged_mask(merge_file, merge_mask_outfile) # calculate measure means, and demean if "Measure_Mean" in design_formula: model_df = calculate_measure_mean_in_df(model_df, merge_mask) # calculate custom ROIs, and demean (in workflow?) if "Custom_ROI_Mean" in design_formula: custom_roi_mask = group_config_obj.custom_roi_mask if (custom_roi_mask == None) or (custom_roi_mask == "None") or \ (custom_roi_mask == "none") or (custom_roi_mask == ""): err = "\n\n[!] You included 'Custom_ROI_Mean' in your design " \ "formula, but you didn't supply a custom ROI mask file." \ "\n\nDesign formula: %s\n\n" % design_formula raise Exception(err) # make sure the custom ROI mask file is the same resolution as the # output files - if not, resample and warn the user roi_mask = check_mask_file_resolution(list(model_df["Raw_Filepath"])[0], \ custom_roi_mask, model_path, \ resource_id) # if using group merged mask, trim the custom ROI mask to be within # its constraints if merge_mask: output_mask = os.path.join(model_path, "group_masked_%s" \ % os.path.basename(input_mask)) roi_mask = trim_mask(roi_mask, merge_mask, output_mask) # calculate model_df = calculate_custom_roi_mean_in_df(model_df, roi_mask) # modeling group variances separately # add repeated measures 1's matrices # patsify model DF, drop columns not in design formula # process contrasts wf = pe.Workflow(name=resource_id) wf.base_dir = work_dir crash_dir = os.path.join(pipeline_config_obj.crashLogDirectory, \ "group_analysis", model_name) wf.config['execution'] = {'hash_method': 'timestamp', \ 'crashdump_dir': crash_dir} if "Measure_Mean" in design_formula: measure_mean = pe.Node(util.Function(input_names=['model_df', 'merge_mask'], output_names=['model_df'], function=calculate_measure_mean_in_df), name='measure_mean') measure_mean.inputs.model_df = model_df wf.connect(merge_mask, "out_file", measure_mean, "merge_mask") if "Custom_ROI_Mean" in design_formula: roi_mean = pe.Node(util.Function()) group_config_obj.custom_roi_mask #---------------- import yaml import pandas as pd # load group analysis model configuration file try: with open(os.path.realpath(group_config_file),"r") as f: group_conf = Configuration(yaml.load(f)) except Exception as e: err_string = "\n\n[!] CPAC says: Could not read group model " \ "configuration YML file. Ensure you have read access " \ "for the file and that it is formatted properly.\n\n" \ "Configuration file: %s\n\nError details: %s" \ % (group_config_file, e) raise Exception(err_string) # gather all of the information # - lists of all the participant unique IDs (participant_site_session) and # of all of the series IDs present in output_file_list # - also returns the pipeline ID new_participant_list, all_series_names, pipeline_ID = \ gather_new_participant_list(output_path_file, output_file_list) # create the path string for the group analysis output # replicate the directory path of one of the participant's output # folder path to the derivative's file, but replace the participant ID # with the group model name # this is to ensure nothing gets overwritten between strategies # or thresholds, etc. out_dir = os.path.dirname(output_file_list[0]).split(pipeline_ID + '/') out_dir = out_dir[1].split(out_dir[1].split("/")[-1])[0] out_dir = os.path.join(group_conf.output_dir, out_dir) out_dir = out_dir.replace(new_participant_list[0], \ 'group_analysis_results_%s/_grp_model_%s' \ % (pipeline_ID, group_conf.model_name)) # !!!!!!!!!! if (group_conf.repeated_measures == True) and (series_ids[0] != None): out_dir = out_dir.replace(series_ids[0] + "/", "multiple_series") # create model file output directories model_out_dir = os.path.join(group_conf.output_dir, \ 'group_analysis_results_%s/_grp_model_%s' \ %(pipeline_ID, group_conf.model_name)) mod_path = os.path.join(model_out_dir, 'model_files') if not os.path.isdir(mod_path): os.makedirs(mod_path) # current_mod_path = folder under # "/gpa_output/_grp_model_{model name}/model_files/{current derivative}" current_mod_path = os.path.join(mod_path, resource) if not os.path.isdir(current_mod_path): os.makedirs(current_mod_path) # create new subject list based on which subjects are left after checking # for missing outputs new_sub_file = write_new_sub_file(current_mod_path, \ group_conf.subject_list, new_participant_list) group_conf.update('subject_list',new_sub_file) # create new design matrix with only the subjects that are left # Run 'create_fsl_model' script to extract phenotypic data from # the phenotypic file for each of the subjects in the subject list # get the motion statistics parameter file, if present # get the parameter file so it can be passed to create_fsl_model.py # so MeanFD or other measures can be included in the design matrix ''' okay, here we go... how are we handling series? because here it needs to take in ''' ''' the appropriate series to get the appropriate parameter file ! ! ! ''' ''' MAY HAVE TO GO BACK ON THIS, and just have one series sent in per this function...''' power_params_files = {} measure_list = ['MeanFD_Power', 'MeanFD_Jenkinson', 'MeanDVARS'] for measure in measure_list: if measure in group_conf.design_formula: for series_id in all_series_names: parameter_file = os.path.join(c.outputDirectory, \ pipeline_ID, \ '%s%s_all_params.csv' % \ (series_id.strip('_'), \ threshold_val)) if not os.path.exists(parameter_file): err = "\n\n[!] CPAC says: Could not find or open the motion "\ "parameter file. This is necessary if you have " \ "included any of the MeanFD measures in your group " \ "model.\n\nThis file can usually be found in the " \ "output directory of your individual-level analysis " \ "runs. If it is not there, double-check to see if " \ "individual-level analysis had completed successfully."\ "\n\nPath not found: %s\n\n" % parameter_file raise Exception(err) power_params_files[series_id] = parameter_file break else: power_params_files = None # path to the pipeline folder to be passed to create_fsl_model.py # so that certain files like output_means.csv can be accessed pipeline_path = os.path.join(c.outputDirectory, pipeline_ID) # generate working directory for this output's group analysis run workDir = '%s/group_analysis/%s/%s' % (c.workingDirectory, \ group_conf.model_name, \ resource) # this makes strgy_path basically the directory path of the folders after # the resource/derivative folder level strgy_path = os.path.dirname(output_file_list[0]).split(resource)[1] # get rid of periods in the path for ch in ['.']: if ch in strgy_path: strgy_path = strgy_path.replace(ch, "") # create nipype-workflow-name-friendly strgy_path # (remove special characters) strgy_path_name = strgy_path.replace('/', "_") workDir = workDir + '/' + strgy_path_name # merge the subjects for this current output # then, take the group mask, and iterate over the list of subjects # to extract the mean of each subject using the group mask merge_output, merge_mask_output, merge_output_dir = \ create_merged_files(workDir, resource, output_file_list) # CALCULATE THE MEANS of each output using the group mask derivative_means_dict, roi_means_dict = \ calculate_output_means(resource, output_file_list, \ group_conf.mean_mask, \ group_conf.design_formula, \ group_conf.custom_roi_mask, pipeline_path, \ merge_output_dir, c.identityMatrix) measure_dict = {} # extract motion measures from CPAC-generated power params file if power_params_files != None: for param_file in power_params_files.values(): new_measure_dict = get_measure_dict(param_file) measure_dict.update(new_measure_dict) # combine the motion measures dictionary with the measure_mean # dictionary (if it exists) if derivative_means_dict: measure_dict["Measure_Mean"] = derivative_means_dict # run create_fsl_model.py to generate the group analysis models from CPAC.utils import create_fsl_model, kill_me create_fsl_model.run(group_conf, resource, parameter_file, \ derivative_means_dict, roi_means_dict, \ current_mod_path, True) # begin GA workflow setup if not os.path.exists(new_sub_file): raise Exception("path to input subject list %s is invalid" % new_sub_file) #if c.mixedScanAnalysis == True: # wf = pe.Workflow(name = 'group_analysis/%s/grp_model_%s'%(resource, os.path.basename(model))) #else: wf = pe.Workflow(name = resource) wf.base_dir = workDir wf.config['execution'] = {'hash_method': 'timestamp', 'crashdump_dir': os.path.abspath(c.crashLogDirectory)} log_dir = os.path.join(group_conf.output_dir, 'logs', 'group_analysis', resource, 'model_%s' % (group_conf.model_name)) if not os.path.exists(log_dir): os.makedirs(log_dir) else: pass # gp_flow # Extracts the model files (.con, .grp, .mat, .fts) from the model # directory and sends them to the create_group_analysis workflow gpa_wf gp_flow = create_grp_analysis_dataflow("gp_dataflow_%s" % resource) gp_flow.inputs.inputspec.grp_model = os.path.join(mod_path, resource) gp_flow.inputs.inputspec.model_name = group_conf.model_name gp_flow.inputs.inputspec.ftest = fTest # gpa_wf # Creates the actual group analysis workflow gpa_wf = create_group_analysis(fTest, "gp_analysis_%s" % resource) gpa_wf.inputs.inputspec.merged_file = merge_output gpa_wf.inputs.inputspec.merge_mask = merge_mask_output gpa_wf.inputs.inputspec.z_threshold = z_threshold gpa_wf.inputs.inputspec.p_threshold = p_threshold gpa_wf.inputs.inputspec.parameters = (c.FSLDIR, 'MNI152') wf.connect(gp_flow, 'outputspec.mat', gpa_wf, 'inputspec.mat_file') wf.connect(gp_flow, 'outputspec.con', gpa_wf, 'inputspec.con_file') wf.connect(gp_flow, 'outputspec.grp', gpa_wf, 'inputspec.grp_file') if fTest: wf.connect(gp_flow, 'outputspec.fts', gpa_wf, 'inputspec.fts_file') # ds # Creates the datasink node for group analysis ds = pe.Node(nio.DataSink(), name='gpa_sink') if 'sca_roi' in resource: out_dir = os.path.join(out_dir, \ re.search('sca_roi_(\d)+',os.path.splitext(os.path.splitext(os.path.basename(output_file_list[0]))[0])[0]).group(0)) if 'dr_tempreg_maps_zstat_files_to_standard_smooth' in resource: out_dir = os.path.join(out_dir, \ re.search('temp_reg_map_z_(\d)+',os.path.splitext(os.path.splitext(os.path.basename(output_file_list[0]))[0])[0]).group(0)) if 'centrality' in resource: names = ['degree_centrality_binarize', 'degree_centrality_weighted', \ 'eigenvector_centrality_binarize', 'eigenvector_centrality_weighted', \ 'lfcd_binarize', 'lfcd_weighted'] for name in names: if name in os.path.basename(output_file_list[0]): out_dir = os.path.join(out_dir, name) break if 'tempreg_maps' in resource: out_dir = os.path.join(out_dir, \ re.search('\w*[#]*\d+', os.path.splitext(os.path.splitext(os.path.basename(output_file_list[0]))[0])[0]).group(0)) # if c.mixedScanAnalysis == True: # out_dir = re.sub(r'(\w)*scan_(\w)*(\d)*(\w)*[/]', '', out_dir) ds.inputs.base_directory = out_dir ds.inputs.container = '' ds.inputs.regexp_substitutions = [(r'(?<=rendered)(.)*[/]','/'), (r'(?<=model_files)(.)*[/]','/'), (r'(?<=merged)(.)*[/]','/'), (r'(?<=stats/clusterMap)(.)*[/]','/'), (r'(?<=stats/unthreshold)(.)*[/]','/'), (r'(?<=stats/threshold)(.)*[/]','/'), (r'_cluster(.)*[/]',''), (r'_slicer(.)*[/]',''), (r'_overlay(.)*[/]','')] ########datasink connections######### if fTest: wf.connect(gp_flow, 'outputspec.fts', ds, 'model_files.@0') wf.connect(gp_flow, 'outputspec.mat', ds, 'model_files.@1' ) wf.connect(gp_flow, 'outputspec.con', ds, 'model_files.@2') wf.connect(gp_flow, 'outputspec.grp', ds, 'model_files.@3') wf.connect(gpa_wf, 'outputspec.merged', ds, 'merged') wf.connect(gpa_wf, 'outputspec.zstats', ds, 'stats.unthreshold') wf.connect(gpa_wf, 'outputspec.zfstats', ds,'stats.unthreshold.@01') wf.connect(gpa_wf, 'outputspec.fstats', ds,'stats.unthreshold.@02') wf.connect(gpa_wf, 'outputspec.cluster_threshold_zf', ds, 'stats.threshold') wf.connect(gpa_wf, 'outputspec.cluster_index_zf', ds,'stats.clusterMap') wf.connect(gpa_wf, 'outputspec.cluster_localmax_txt_zf', ds, 'stats.clusterMap.@01') wf.connect(gpa_wf, 'outputspec.overlay_threshold_zf', ds, 'rendered') wf.connect(gpa_wf, 'outputspec.rendered_image_zf', ds, 'rendered.@01') wf.connect(gpa_wf, 'outputspec.cluster_threshold', ds, 'stats.threshold.@01') wf.connect(gpa_wf, 'outputspec.cluster_index', ds, 'stats.clusterMap.@02') wf.connect(gpa_wf, 'outputspec.cluster_localmax_txt', ds, 'stats.clusterMap.@03') wf.connect(gpa_wf, 'outputspec.overlay_threshold', ds, 'rendered.@02') wf.connect(gpa_wf, 'outputspec.rendered_image', ds, 'rendered.@03') ###################################### # Run the actual group analysis workflow wf.run() print "\n\nWorkflow finished for model %s and resource %s\n\n" \ % (os.path.basename(group_conf.output_dir), resource)
def run(config_file, subject_list_file, p_name = None): try: if not os.path.exists(config_file): raise IOError else: c = Configuration(yaml.load(open(os.path.realpath(config_file), 'r'))) except IOError: print "config file %s doesn't exist" %config_file raise except Exception: raise Exception("Error reading config file - %s"%config_file) #do some validation validate(c) try: sublist = yaml.load(open(os.path.realpath(subject_list_file), 'r')) except: raise Exception ("Subject list is not in proper YAML format. Please check your file") strategies = sorted(build_strategies(c)) print "strategies ---> ", strategies sub_ids =[] for sub in sublist: if sub['unique_id']: sub_ids.append(sub['subject_id']+"_" + sub["unique_id"]) else: sub_ids.append(sub['subject_id']) create_group_log_template(sub_ids, os.path.join(c.outputDirectory, 'logs')) seeds_created = [] if not (c.seedSpecificationFile is None): try: if os.path.exists(c.seedSpecificationFile): seeds_created = create_seeds_(c.seedOutputLocation, c.seedSpecificationFile, c.FSLDIR) print 'seeds created %s -> ' % seeds_created except: raise IOError('Problem in seedSpecificationFile') if 1 in c.runVoxelTimeseries: if 2 in c.useSeedInAnalysis: c.maskSpecificationFile = append_seeds_to_file(c.workingDirectory, seeds_created, c.maskSpecificationFile) if 1 in c.runROITimeseries: if 1 in c.useSeedInAnalysis: c.roiSpecificationFile = append_seeds_to_file(c.workingDirectory, seeds_created, c.roiSpecificationFile) if 1 in c.runNetworkCentrality: if 3 in c.useSeedInAnalysis: c.templateSpecificationFile = append_seeds_to_file(c.workingDirectory, seeds_created, c.templateSpecificationFile) if not c.runOnGrid: from CPAC.pipeline.cpac_pipeline import prep_workflow procss = [Process(target=prep_workflow, args=(sub, c, strategies, p_name)) for sub in sublist] pid = open(os.path.join(c.outputDirectory, 'pid.txt'), 'w') import subprocess jobQueue = [] if len(sublist) <= c.numSubjectsAtOnce: """ Stream all the subjects as sublist is less than or equal to the number of subjects that need to run """ for p in procss: p.start() print >>pid,p.pid else: """ Stream the subject worlflows for preprocessing. At Any time in the pipeline c.numSubjectsAtOnce will run, unless the number remaining is less than the value of the parameter stated above """ idx = 0 while(idx < len(sublist)): if len(jobQueue) == 0 and idx == 0: idc = idx for p in procss[idc: idc + c.numSubjectsAtOnce]: p.start() print >>pid,p.pid jobQueue.append(p) idx += 1 else: for job in jobQueue: if not job.is_alive(): print 'found dead job ', job loc = jobQueue.index(job) del jobQueue[loc] procss[idx].start() jobQueue.append(procss[idx]) idx += 1 pid.close() else: import commands import pickle from time import strftime temp_files_dir = os.path.join(os.getcwd(), 'cluster_temp_files') print commands.getoutput("mkdir -p %s" % temp_files_dir) strategies_file = os.path.join(temp_files_dir, 'strategies.obj') f = open(strategies_file, 'w') pickle.dump(strategies, f) f.close() if 'sge' in c.resourceManager.lower(): run_sge_jobs(c, config_file, strategies_file, subject_list_file, p_name) elif 'pbs' in c.resourceManager.lower(): run_pbs_jobs(c, config_file, strategies_file, subject_list_file, p_name) elif 'condor' in c.resourceManager.lower(): run_condor_jobs(c, config_file, strategies_file, subject_list_file, p_name) return 1
def run_cpac_on_cluster(config_file, subject_list_file, strategies_file, cluster_files_dir): ''' Function to build a SLURM batch job submission script and submit it to the scheduler via 'sbatch' ''' # Import packages import commands import getpass import re from time import strftime from CPAC.utils import Configuration from indi_schedulers import cluster_templates # Load in pipeline config try: pipeline_dict = yaml.load(open(os.path.realpath(config_file), 'r')) pipeline_config = Configuration(pipeline_dict) except: raise Exception('Pipeline config is not in proper YAML format. '\ 'Please check your file') # Load in the subject list try: sublist = yaml.load(open(os.path.realpath(subject_list_file), 'r')) except: raise Exception('Subject list is not in proper YAML format. '\ 'Please check your file') # Init variables timestamp = str(strftime("%Y_%m_%d_%H_%M_%S")) job_scheduler = pipeline_config.resourceManager.lower() # For SLURM time limit constraints only, hh:mm:ss hrs_limit = 8 * len(sublist) time_limit = '%d:00:00' % hrs_limit # Batch file variables shell = commands.getoutput('echo $SHELL') user_account = getpass.getuser() num_subs = len(sublist) # Run CPAC via python -c command python_cpac_str = 'python -c "from CPAC.pipeline.cpac_pipeline import run; '\ 'run(\'%(config_file)s\', \'%(subject_list_file)s\', '\ '%(env_arr_idx)s, \'%(strategies_file)s\', '\ '\'%(pipeline_name)s\', plugin=\'MultiProc\', '\ 'plugin_args=%(plugin_args)s)"' # Init plugin arguments plugin_args = { 'n_procs': pipeline_config.numCoresPerSubject, 'memory_gb': pipeline_config.memoryAllocatedPerSubject } # Set up run command dictionary run_cmd_dict = { 'config_file': config_file, 'subject_list_file': subject_list_file, 'strategies_file': strategies_file, 'pipeline_name': pipeline_config.pipelineName, 'plugin_args': plugin_args } # Set up config dictionary config_dict = { 'timestamp': timestamp, 'shell': shell, 'job_name': 'CPAC_' + pipeline_config.pipelineName, 'num_tasks': num_subs, 'queue': pipeline_config.queue, 'par_env': pipeline_config.parallelEnvironment, 'cores_per_task': pipeline_config.numCoresPerSubject, 'user': user_account, 'work_dir': cluster_files_dir, 'time_limit': time_limit } # Get string template for job scheduler if job_scheduler == 'pbs': env_arr_idx = '$PBS_ARRAYID' batch_file_contents = cluster_templates.pbs_template confirm_str = '(?<=Your job-array )\d+' exec_cmd = 'qsub' elif job_scheduler == 'sge': env_arr_idx = '$SGE_TASK_ID' batch_file_contents = cluster_templates.sge_template confirm_str = '(?<=Your job-array )\d+' exec_cmd = 'qsub' elif job_scheduler == 'slurm': env_arr_idx = '$SLURM_ARRAY_TASK_ID' batch_file_contents = cluster_templates.slurm_template confirm_str = '(?<=Submitted batch job )\d+' exec_cmd = 'sbatch' # Populate rest of dictionary config_dict['env_arr_idx'] = env_arr_idx run_cmd_dict['env_arr_idx'] = env_arr_idx config_dict['run_cmd'] = python_cpac_str % run_cmd_dict # Populate string from config dict values batch_file_contents = batch_file_contents % config_dict # Write file batch_filepath = os.path.join(cluster_files_dir, 'cpac_submit_%s.%s' \ % (timestamp, job_scheduler)) with open(batch_filepath, 'w') as f: f.write(batch_file_contents) # Get output response from job submission out = commands.getoutput('%s %s' % (exec_cmd, batch_filepath)) # Check for successful qsub submission if re.search(confirm_str, out) == None: err_msg = 'Error submitting C-PAC pipeline run to %s queue' \ % job_scheduler raise Exception(err_msg) # Get pid and send to pid file pid = re.search(confirm_str, out).group(0) pid_file = os.path.join(cluster_files_dir, 'pid.txt') with open(pid_file, 'w') as f: f.write(pid)
def prep_group_analysis_workflow(c, resource, subject_infos): p_id, s_ids, scan_ids, s_paths = (list(tup) for tup in zip(*subject_infos)) #print "p_id -%s, s_ids -%s, scan_ids -%s, s_paths -%s" %(p_id, s_ids, scan_ids, s_paths) ''' #diag = open(os.path.join('/home/data/Projects/CPAC_Regression_Test/2013-08-19-20_v0-3-1/fsl-model/2013-09-03', 'group_analysis_diagnostic.txt'), 'wt') #for tup in subject_infos: # print >>diag, list(tup) #print >>diag, "" #for tup in zip(*subject_infos): # print >>diag, list(tup) #print >>diag, "" print >>diag, "Working variables passed from cpac_group_runner: " print >>diag, "" print >>diag, "Pipeline ID (p_id): ", p_id print >>diag, "Subject IDs (s_ids): ", s_ids print >>diag, "Scan IDs (scan_ids): ", scan_ids print >>diag, "(s_paths): ", s_paths print >>diag, "" ''' def get_phenotypic_file(phenotypic_file, m_dict, m_list, mod_path, sub_id): #print "phenotypic_file, m_dict", phenotypic_file, m_dict import csv reader = csv.reader(open(phenotypic_file, 'rU')) columns = {} order = {} count = 0 headers = reader.next() for h in headers: columns[h] =[] order[h] = count count+=1 for r in reader: for h, v in zip(headers, r): if v: columns[h].append(str(v)) if m_dict: for measure in m_list: if measure in headers: #check if 'MeanFD is present' if len(columns[measure]) < 1: for sub in columns[sub_id]: if m_dict.get(sub): if m_dict.get(sub).get(measure): columns[measure].append(m_dict[sub][measure]) else: raise Exception("Couldn't find %s value for subject %s"%(measure,sub)) else: raise Exception("Couldn't find subject %s in the parameter file"%sub) b = zip(*([k] + columns[k] for k in sorted(columns, key=order.get))) try: os.makedirs(mod_path) except: print "%s already exists"%(mod_path) new_phenotypic_file = os.path.join(mod_path, os.path.basename(phenotypic_file)) a = csv.writer(open(new_phenotypic_file, 'w')) for col in b: a.writerow(list(col)) return new_phenotypic_file threshold_val = None measure_dict = None measure_list = ['MeanFD', 'MeanFD_Jenkinson', 'MeanDVARS'] model_sub_list = [] if c.runScrubbing == 1: #get scrubbing threshold if re.search('(?<=/_threshold_)\d+.\d+',s_paths[0]): threshold_val = re.search('(?<=/_threshold_)\d+.\d+',s_paths[0]).group(0) elif len(c.scrubbingThreshold) == 1: threshold_val = c.scrubbingThreshold[0] else: print "Found Multiple threshold value " print "scrubbing threshold_val -->", threshold_val else: print "No scrubbing enabled." #pick the right parameter file from the pipeline folder #create a dictionary of subject and measures in measure_list if c.runScrubbing == 1: try: parameter_file = os.path.join(c.outputDirectory, p_id[0], '%s_threshold_%s_all_params.csv'%(scan_ids[0].strip('_'),threshold_val)) if os.path.exists(parameter_file): import csv measure_dict = {} f = csv.DictReader(open(parameter_file,'r')) for line in f: measure_map = {} for m in measure_list: if line.get(m): measure_map[m] = line[m] measure_dict[line['Subject']] = measure_map else: print "No file name %s found"%parameter_file except Exception: print "Exception while extracting parameters from movement file - %s"%(parameter_file) #print >>diag, "Begins to iterate over each config file listed here: ", c.modelConfigs #print >>diag, "" for config in c.modelConfigs: import yaml try: conf = Configuration(yaml.load(open(os.path.realpath(config), 'r'))) except: raise Exception("Error in reading %s configuration file" % config) #print >>diag, "Starting iteration for config: ", config #print >>diag, "" group_sublist = open(conf.subjectListFile, 'r') sublist_items = group_sublist.readlines() subject_list = [line.rstrip('\n') for line in sublist_items \ if not (line == '\n') and not line.startswith('#')] #print >>diag, "Subject list run-through #1: ", subject_list #print >>diag, "" #subject_list = [line.rstrip('\r\n') for line in open(conf.subjectListFile, 'r') \ # if not (line == '\n') and not line.startswith('#')] # list of subject paths which DO exist exist_paths = [] # check for missing subject for the derivative #print >>diag, "> Iterates over subject_list - for each subject in the subject list, it iterates over the paths in s_paths." #print >>diag, "> For each path, it checks if the current subject exists in this path, and then appends this subject to 'exist_paths' list." #print >>diag, "" for sub in subject_list : for path in s_paths: if sub in path: exist_paths.append(sub) #print >>diag, "Current status of exist_paths list: " #print >>diag, exist_paths #print >>diag, "" # check to see if any derivatives of subjects are missing if len(list(set(subject_list) - set(exist_paths))) >0: print "List of outputs missing for subjects:" print list(set(subject_list) - set(exist_paths)) print "..for derivatives:" print resource print "..at paths:" print os.path.dirname(s_paths[0]).replace(s_ids[0], '*') #import warnings #warnings.warn(msg) mod_path = os.path.join(os.path.dirname(s_paths[0]).replace(s_ids[0], 'group_analysis_results/_grp_model_%s'%(conf.modelName)), 'model_files') #print >>diag, "> Created mod_path variable: ", mod_path #print >>diag, "" print "basename: ", os.path.basename(conf.subjectListFile) try: os.makedirs(mod_path) print "Creating directory:" print mod_path except: print "Attempted to create directory, but path already exists:" print mod_path new_sub_file = os.path.join(mod_path, os.path.basename(conf.subjectListFile)) try: f = open(new_sub_file, 'w') for sub in exist_paths: print >>f, sub f.close() #print >>diag, "> Created new subject list file: ", new_sub_file #print >>diag, "" #print >>diag, "> ..which is filled with the subjects from exist_paths" #print >>diag, "" except: print "Error: Could not open subject list file: ", new_sub_file raise Exception #print >>diag, "> Updates the FSL model config's subject list file parameter from: ", conf.subjectListFile conf.update('subjectListFile',new_sub_file) #print >>diag, "> ..to new subject list file: ", conf.subjectListFile #print >>diag, "" sub_id = conf.subjectColumn ''' print >>diag, "> If measure_dict is not empty, it updates the phenotypic file with these parameters: " print >>diag, "" print >>diag, "measure_dict: ", measure_dict print >>diag, "measure_list: ", measure_list print >>diag, "mod_path: ", mod_path print >>diag, "sub_id: ", sub_id print >>diag, "" ''' if measure_dict != None: conf.update('phenotypicFile',get_phenotypic_file(conf.phenotypicFile, measure_dict, measure_list, mod_path, sub_id)) print "Model config dictionary ->" print conf.__dict__ # Run 'create_fsl_model' script to extract phenotypic data from # the phenotypic file for each of the subjects in the subject list try: from CPAC.utils import create_fsl_model create_fsl_model.run(conf, c.fTest, True) #print >>diag, "> Runs create_fsl_model." #print >>diag, "" except Exception, e: print "FSL Group Analysis model not successfully created - error in create_fsl_model script" #print "Error ->", e raise model_sub_list.append((conf.outputModelFilesDirectory, conf.subjectListFile)) print "model_sub_list ->", model_sub_list '''
def run(config_file, subject_list_file, p_name=None, plugin=None, plugin_args=None): ''' ''' # Import packages import commands import os import pickle import time from CPAC.pipeline.cpac_pipeline import prep_workflow # Init variables config_file = os.path.realpath(config_file) subject_list_file = os.path.realpath(subject_list_file) # take date+time stamp for run identification purposes unique_pipeline_id = strftime("%Y%m%d%H%M%S") pipeline_start_stamp = strftime("%Y-%m-%d_%H:%M:%S") # Load in pipeline config file try: if not os.path.exists(config_file): raise IOError else: c = Configuration(yaml.load(open(config_file, 'r'))) except IOError: print "config file %s doesn't exist" % config_file raise except Exception: print "Error reading config file - %s" % config_file raise Exception # Do some validation validate(c) # Get the pipeline name p_name = c.pipelineName # Load in subject list try: sublist = yaml.load(open(subject_list_file, 'r')) except: print "Subject list is not in proper YAML format. Please check your file" raise Exception # NOTE: strategies list is only needed in cpac_pipeline prep_workflow for # creating symlinks strategies = sorted(build_strategies(c)) # Populate subject scan map sub_scan_map = {} try: for sub in sublist: if sub['unique_id']: s = sub['subject_id'] + "_" + sub["unique_id"] else: s = sub['subject_id'] scan_ids = ['scan_anat'] try: for id in sub['func']: scan_ids.append('scan_' + str(id)) except KeyError: for id in sub['rest']: scan_ids.append('scan_' + str(id)) sub_scan_map[s] = scan_ids except: print "\n\n" + "ERROR: Subject list file not in proper format - " \ "check if you loaded the correct file?" + "\n" + \ "Error name: cpac_runner_0001" + "\n\n" raise Exception create_group_log_template(sub_scan_map, c.logDirectory) pipeline_timing_info = [] pipeline_timing_info.append(unique_pipeline_id) pipeline_timing_info.append(pipeline_start_stamp) pipeline_timing_info.append(len(sublist)) # If we're running on cluster, execute job scheduler if c.runOnGrid: # Create cluster log dir cluster_files_dir = os.path.join(c.logDirectory, 'cluster_files') if not os.path.exists(cluster_files_dir): os.makedirs(cluster_files_dir) # Create strategies file strategies_file = os.path.join(cluster_files_dir, 'strategies.obj') with open(strategies_file, 'w') as f: pickle.dump(strategies, f) # Check if its a condor job, and run that if 'condor' in c.resourceManager.lower(): run_condor_jobs(c, config_file, strategies_file, subject_list_file, p_name) # All other schedulers are supported else: run_cpac_on_cluster(config_file, subject_list_file, strategies_file, cluster_files_dir) # Run on one computer else: # Init variables procss = [Process(target=prep_workflow, args=(sub, c, strategies, 1, pipeline_timing_info, p_name, plugin, plugin_args)) \ for sub in sublist] if not os.path.exists(c.workingDirectory): try: os.makedirs(c.workingDirectory) except: err = "\n\n[!] CPAC says: Could not create the working " \ "directory: %s\n\nMake sure you have permissions " \ "to write to this directory.\n\n" % c.workingDirectory raise Exception(err) pid = open(os.path.join(c.workingDirectory, 'pid.txt'), 'w') # Init job queue jobQueue = [] # If we're allocating more processes than are subjects, run them all if len(sublist) <= c.numParticipantsAtOnce: for p in procss: p.start() print >> pid, p.pid # Otherwise manage resources to run processes incrementally else: idx = 0 while (idx < len(sublist)): # If the job queue is empty and we haven't started indexing if len(jobQueue) == 0 and idx == 0: # Init subject process index idc = idx # Launch processes (one for each subject) for p in procss[idc:idc + c.numParticipantsAtOnce]: p.start() print >> pid, p.pid jobQueue.append(p) idx += 1 # Otherwise, jobs are running - check them else: # Check every job in the queue's status for job in jobQueue: # If the job is not alive if not job.is_alive(): # Find job and delete it from queue print 'found dead job ', job loc = jobQueue.index(job) del jobQueue[loc] # ...and start the next available process (subject) procss[idx].start() # Append this to job queue and increment index jobQueue.append(procss[idx]) idx += 1 # Add sleep so while loop isn't consuming 100% of CPU time.sleep(2) # Close PID txt file to indicate finish pid.close()
def run(config_file, subject_list_file, p_name=None, plugin=None, plugin_args=None, tracking=True, num_subs_at_once=None, debug=False): ''' ''' # Import packages import commands import os import pickle import time from CPAC.pipeline.cpac_pipeline import prep_workflow # Init variables config_file = os.path.realpath(config_file) subject_list_file = os.path.realpath(subject_list_file) # take date+time stamp for run identification purposes unique_pipeline_id = strftime("%Y%m%d%H%M%S") pipeline_start_stamp = strftime("%Y-%m-%d_%H:%M:%S") # Load in pipeline config file try: if not os.path.exists(config_file): raise IOError else: c = Configuration(yaml.load(open(config_file, 'r'))) except IOError: print "config file %s doesn't exist" % config_file raise except Exception as e: raise Exception("Error reading config file - {0}\n\nError details:" "\n{1}\n\n".format(config_file, e)) c.logDirectory = os.path.abspath(c.logDirectory) c.workingDirectory = os.path.abspath(c.workingDirectory) if 's3://' not in c.outputDirectory: c.outputDirectory = os.path.abspath(c.outputDirectory) c.crashLogDirectory = os.path.abspath(c.crashLogDirectory) if debug: c.write_debugging_outputs = "[1]" if num_subs_at_once: if not str(num_subs_at_once).isdigit(): raise Exception('[!] Value entered for --num_cores not a digit.') c.numParticipantsAtOnce = int(num_subs_at_once) # Do some validation validate(c) # Get the pipeline name p_name = p_name or c.pipelineName # Load in subject list try: with open(subject_list_file, 'r') as sf: sublist = yaml.load(sf) except: print "Subject list is not in proper YAML format. Please check " \ "your file" raise Exception # Populate subject scan map sub_scan_map = {} try: for sub in sublist: if sub['unique_id']: s = sub['subject_id'] + "_" + sub["unique_id"] else: s = sub['subject_id'] scan_ids = ['scan_anat'] if 'func' in sub: for id in sub['func']: scan_ids.append('scan_'+ str(id)) if 'rest' in sub: for id in sub['rest']: scan_ids.append('scan_'+ str(id)) sub_scan_map[s] = scan_ids except: print "\n\n" + "ERROR: Subject list file not in proper format - " \ "check if you loaded the correct file?" + "\n" + \ "Error name: cpac_runner_0001" + "\n\n" raise Exception pipeline_timing_info = [] pipeline_timing_info.append(unique_pipeline_id) pipeline_timing_info.append(pipeline_start_stamp) pipeline_timing_info.append(len(sublist)) if tracking: track_run(level='participant', participants=len(sublist)) # If we're running on cluster, execute job scheduler if c.runOnGrid: # Create cluster log dir cluster_files_dir = os.path.join(c.logDirectory, 'cluster_files') if not os.path.exists(cluster_files_dir): os.makedirs(cluster_files_dir) # Check if its a condor job, and run that if 'condor' in c.resourceManager.lower(): run_condor_jobs(c, config_file, subject_list_file, p_name) # All other schedulers are supported else: run_cpac_on_cluster(config_file, subject_list_file, cluster_files_dir) # Run on one computer else: if not os.path.exists(c.workingDirectory): try: os.makedirs(c.workingDirectory) except: err = "\n\n[!] CPAC says: Could not create the working " \ "directory: %s\n\nMake sure you have permissions " \ "to write to this directory.\n\n" % c.workingDirectory raise Exception(err) # If it only allows one, run it linearly if c.numParticipantsAtOnce == 1: for sub in sublist: prep_workflow(sub, c, True, pipeline_timing_info, p_name, plugin, plugin_args) return pid = open(os.path.join(c.workingDirectory, 'pid.txt'), 'w') # Init job queue job_queue = [] # Allocate processes processes = [Process(target=prep_workflow, args=(sub, c, True, pipeline_timing_info, p_name, plugin, plugin_args)) for sub in sublist] # If we're allocating more processes than are subjects, run them all if len(sublist) <= c.numParticipantsAtOnce: for p in processes: p.start() print >>pid, p.pid # Otherwise manage resources to run processes incrementally else: idx = 0 while idx < len(sublist): # If the job queue is empty and we haven't started indexing if len(job_queue) == 0 and idx == 0: # Init subject process index idc = idx # Launch processes (one for each subject) for p in processes[idc: idc+c.numParticipantsAtOnce]: p.start() print >>pid, p.pid job_queue.append(p) idx += 1 # Otherwise, jobs are running - check them else: # Check every job in the queue's status for job in job_queue: # If the job is not alive if not job.is_alive(): # Find job and delete it from queue print 'found dead job ', job loc = job_queue.index(job) del job_queue[loc] # ...and start the next available process # (subject) processes[idx].start() # Append this to job queue and increment index job_queue.append(processes[idx]) idx += 1 # Add sleep so while loop isn't consuming 100% of CPU time.sleep(2) # Close PID txt file to indicate finish pid.close()
def configuration_strategy_mock(method='FSL'): # mock the config dictionary c = Configuration({ "num_ants_threads": 4, "workingDirectory": "/scratch/pipeline_tests", "crashLogDirectory": "/scratch", "outputDirectory": "/output/output/pipeline_analysis_nuisance/sub-M10978008_ses-NFB3", "resolution_for_func_preproc": "3mm", "resolution_for_func_derivative": "3mm", "template_for_resample": "/usr/share/fsl/5.0/data/standard/MNI152_T1_1mm_brain.nii.gz", "template_brain_only_for_func": "/usr/share/fsl/5.0/data/standard/MNI152_T1_${resolution_for_func_preproc}_brain.nii.gz", "template_skull_for_func": "/usr/share/fsl/5.0/data/standard/MNI152_T1_${resolution_for_func_preproc}.nii.gz", "identityMatrix": "/usr/share/fsl/5.0/etc/flirtsch/ident.mat", "funcRegFSLinterpolation": "sinc", "funcRegANTSinterpolation": "LanczosWindowedSinc" }) if method == 'ANTS': c.update('regOption', 'ANTS') else: c.update('regOption', 'FSL') # mock the strategy strat = Strategy() resource_dict = { "mean_functional": os.path.join( c.outputDirectory, "mean_functional/sub-M10978008_ses-NFB3_task-test_bold_calc_tshift_resample_volreg_calc_tstat.nii.gz" ), "motion_correct": os.path.join( c.outputDirectory, "motion_correct/_scan_test/sub-M10978008_ses-NFB3_task-test_bold_calc_tshift_resample_volreg.nii.gz" ), "anatomical_brain": os.path.join( c.outputDirectory, "anatomical_brain/sub-M10978008_ses-NFB3_acq-ao_brain_resample.nii.gz" ), "ants_initial_xfm": os.path.join( c.outputDirectory, "ants_initial_xfm/transform0DerivedInitialMovingTranslation.mat"), "ants_affine_xfm": os.path.join(c.outputDirectory, "ants_affine_xfm/transform2Affine.mat"), "ants_rigid_xfm": os.path.join(c.outputDirectory, "ants_rigid_xfm/transform1Rigid.mat"), "anatomical_to_mni_linear_xfm": os.path.join( c.outputDirectory, "anatomical_to_mni_linear_xfm/sub-M10978008_ses-NFB3_T1w_resample_calc_flirt.mat" ), "functional_to_anat_linear_xfm": os.path.join( c.outputDirectory, "functional_to_anat_linear_xfm/_scan_test/sub-M10978008_ses-NFB3_task-test_bold_calc_tshift_resample_volreg_calc_tstat_flirt.mat" ), 'ants_symm_warp_field': os.path.join( c.outputDirectory, "anatomical_to_symmetric_mni_nonlinear_xfm/transform3Warp.nii.gz"), 'ants_symm_affine_xfm': os.path.join(c.outputDirectory, "ants_symmetric_affine_xfm/transform2Affine.mat"), 'ants_symm_rigid_xfm': os.path.join(c.outputDirectory, "ants_symmetric_rigid_xfm/transform1Rigid.mat"), 'ants_symm_initial_xfm': os.path.join( c.outputDirectory, "ants_symmetric_initial_xfm/transform0DerivedInitialMovingTranslation.mat" ), "dr_tempreg_maps_files": [ os.path.join( '/scratch', 'resting_preproc_sub-M10978008_ses-NFB3_cpac105', 'temporal_dual_regression_0/_scan_test/_selector_CSF-2mmE-M_aC-WM-2mmE-DPC5_G-M_M-SDB_P-2/_spatial_map_PNAS_Smith09_rsn10_spatial_map_file_..cpac_templates..PNAS_Smith09_rsn10.nii.gz/split_raw_volumes/temp_reg_map_000{0}.nii.gz' .format(n)) for n in range(10) ] } if method == 'ANTS': resource_dict["anatomical_to_mni_nonlinear_xfm"] = os.path.join( c.outputDirectory, "anatomical_to_mni_nonlinear_xfm/transform3Warp.nii.gz") else: resource_dict["anatomical_to_mni_nonlinear_xfm"] = os.path.join( c.outputDirectory, "anatomical_to_mni_nonlinear_xfm/sub-M10978008_ses-NFB3_T1w_resample_fieldwarp.nii.gz" ) file_node_num = 0 for resource, filepath in resource_dict.items(): strat.update_resource_pool( {resource: file_node(filepath, file_node_num)}) strat.append_name(resource + '_0') file_node_num += 1 templates_for_resampling = [ (c.resolution_for_func_preproc, c.template_brain_only_for_func, 'template_brain_for_func_preproc', 'resolution_for_func_preproc'), (c.resolution_for_func_preproc, c.template_brain_only_for_func, 'template_skull_for_func_preproc', 'resolution_for_func_preproc') ] for resolution, template, template_name, tag in templates_for_resampling: resampled_template = pe.Node(Function( input_names=['resolution', 'template', 'template_name', 'tag'], output_names=['resampled_template'], function=resolve_resolution, as_module=True), name='resampled_' + template_name) resampled_template.inputs.resolution = resolution resampled_template.inputs.template = template resampled_template.inputs.template_name = template_name resampled_template.inputs.tag = tag strat.update_resource_pool( {template_name: (resampled_template, 'resampled_template')}) strat.append_name('resampled_template_0') return c, strat