def run(config_file, output_path_file): # Runs group analysis import yaml # Load the config file into 'c' c = Configuration(yaml.load(open(os.path.realpath(config_file), 'r'))) subject_paths = [] for file in glob.glob(os.path.abspath(output_path_file)): path_list = open(file, 'r').readlines() subject_paths.extend([s.rstrip('\r\n') for s in path_list]) set_subject_paths = set(subject_paths) subject_paths = list(set_subject_paths) #base_path = os.path.dirname(os.path.commonprefix(subject_paths)) base_path = c.outputDirectory from collections import defaultdict analysis_map = defaultdict(list) analysis_map_gp = defaultdict(list) for subject_path in subject_paths: #Remove the base bath offset rs_path = subject_path.replace(base_path, "", 1) rs_path = rs_path.lstrip('/') folders = split_folders(rs_path) pipeline_id = folders[0] subject_id = folders[1] resource_id = folders[2] scan_id = folders[3] #if scan_id == '_scan_rest_1_rest': key = subject_path.replace(subject_id, '*') analysis_map[(resource_id, key)].append((pipeline_id, subject_id, scan_id, subject_path)) # separate map for group analysis #if c.mixedScanAnalysis == True: # key = key.replace(scan_id, '*') analysis_map_gp[(resource_id, key)].append((pipeline_id, subject_id, scan_id, subject_path)) gpa_start_datetime = strftime("%Y-%m-%d %H:%M:%S") gpa_starttime_string = gpa_start_datetime.replace(' ','_') gpa_starttime_string = gpa_starttime_string.replace(':','-') timing = open(os.path.join(c.outputDirectory, 'group_analysis_timing_%s_%s.txt' % (c.pipelineName, gpa_starttime_string)), 'wt') sca_roi_runs = 0 sca_roi_time = 0 sca_seed_runs = 0 sca_seed_time = 0 sca_tempreg_runs = 0 sca_tempreg_time = 0 dr_tempreg_runs = 0 dr_tempreg_time = 0 vmhc_z_runs = 0 vmhc_z_time = 0 alff_Z_runs = 0 alff_Z_time = 0 falff_Z_runs = 0 falff_Z_time = 0 reho_Z_runs = 0 reho_Z_time = 0 centrality_outputs_runs = 0 centrality_outputs_time = 0 # Start timing here gpa_start_time = time.time() for resource, glob_key in analysis_map.keys(): if resource == 'functional_mni': wf_start_time = time.time() if 1 in c.runBASC: if not c.runOnGrid: from CPAC.pipeline.cpac_basc_pipeline import prep_basc_workflow prep_basc_workflow(c, analysis_map[(resource, glob_key)]) else: if 'sge' in c.resourceManager.lower(): run_sge_jobs(c, config_file, resource, analysis_map[(resource, glob_key)]) elif 'pbs' in c.resourceManager.lower(): run_pbs_jobs(c, config_file, resource, analysis_map[(resource, glob_key)]) if 1 in c.runCWAS: if not c.runOnGrid: from CPAC.pipeline.cpac_cwas_pipeline import prep_cwas_workflow prep_cwas_workflow(c, analysis_map[(resource, glob_key)]) else: if 'sge' in c.resourceManager.lower(): run_sge_jobs(c, config_file, resource, analysis_map[(resource, glob_key)]) elif 'pbs' in c.resourceManager.lower(): run_pbs_jobs(c, config_file, resource, analysis_map[(resource, glob_key)]) print >>timing, "Group analysis workflow completed for resource: ", resource print >>timing, "Elapsed run time (minutes): ", ((time.time() - wf_start_time)/60) print >>timing, "" for resource, glob_key in analysis_map_gp.keys(): if resource in c.derivativeList: wf_start_time = time.time() if 1 in c.runGroupAnalysis: #get all the motion parameters across subjects try: from CPAC.utils import extract_parameters extract_parameters.run(c.outputDirectory) except Exception: print "Extract parameters script did not run correctly" if not c.runOnGrid: from CPAC.pipeline.cpac_group_analysis_pipeline import prep_group_analysis_workflow #procss = Process(target=prep_group_analysis_workflow, args=(c, resource, analysis_map_gp[(resource, glob_key)])) #print c, " ", resource, " ", analysis_map_gp[(resource, glob_key)], " ", glob_key prep_group_analysis_workflow(c, resource, analysis_map_gp[(resource, glob_key)]) if c.runOnGrid: if 'sge' in c.resourceManager.lower(): run_sge_jobs(c, config_file, resource, analysis_map_gp[(resource, glob_key)]) elif 'pbs' in c.resourceManager.lower(): run_pbs_jobs(c, config_file, resource, analysis_map_gp[(resource, glob_key)]) print >>timing, "Group analysis workflow completed for resource: ", resource print >>timing, "Elapsed run time (minutes): ", ((time.time() - wf_start_time)/60) print >>timing, "" # This can be implemented more sleekly using a dictionary, have to do this at some point if resource == 'sca_roi_Z_to_standard_smooth': sca_roi_runs += 1 sca_roi_time = sca_roi_time + ((time.time() - wf_start_time)/60) elif resource == 'sca_seed_Z_to_standard_smooth': sca_seed_runs += 1 sca_seed_time = sca_seed_time + ((time.time() - wf_start_time)/60) elif resource == 'sca_tempreg_maps_z_files_smooth': sca_tempreg_runs += 1 sca_tempreg_time = sca_tempreg_time + ((time.time() - wf_start_time)/60) elif resource == 'dr_tempreg_maps_z_files_smooth': dr_tempreg_runs += 1 dr_tempreg_time = dr_tempreg_time + ((time.time() - wf_start_time)/60) elif resource == 'vmhc_z_score_stat_map': vmhc_z_runs += 1 vmhc_z_time = vmhc_z_time + ((time.time() - wf_start_time)/60) elif resource == 'alff_Z_to_standard_smooth': alff_Z_runs += 1 alff_Z_time = alff_Z_time + ((time.time() - wf_start_time)/60) elif resource == 'falff_Z_to_standard_smooth': falff_Z_runs += 1 falff_Z_time = falff_Z_time + ((time.time() - wf_start_time)/60) elif resource == 'reho_Z_to_standard_smooth': reho_Z_runs += 1 reho_Z_time = reho_Z_time + ((time.time() - wf_start_time)/60) elif resource == 'centrality_outputs_smoothed': centrality_outputs_runs += 1 centrality_outputs_time = centrality_outputs_time + ((time.time() - wf_start_time)/60) ''' procss = [] for resource, glob_key in analysis_map_gp.keys(): if resource in c.derivativeList: if 1 in c.runGroupAnalysis: #get all the motion parameters across subjects try: from CPAC.utils import extract_parameters extract_parameters.run(c.outputDirectory) except Exception: print "Extract parameters script did not run correctly" if not c.runOnGrid: from CPAC.pipeline.cpac_group_analysis_pipeline import prep_group_analysis_workflow procss.append(Process(target=prep_group_analysis_workflow, args=(c, resource, analysis_map_gp[(resource, glob_key)]))) pid = open(os.path.join(c.outputDirectory, 'pid_group.txt'), 'w') jobQueue = [] if len(c.derivativeList) <= c.numSubjectsAtOnce: """ Stream all the subjects as sublist is less than or equal to the number of subjects that need to run """ for p in procss: p.start() print >>pid,p.pid else: """ Stream the subject workflows for preprocessing. At Any time in the pipeline c.numSubjectsAtOnce will run, unless the number remaining is less than the value of the parameter stated above """ idx = 0 while(idx < len(c.derivativeList)): if len(jobQueue) == 0 and idx == 0: idc = idx for p in procss[idc: idc + c.numSubjectsAtOnce]: p.start() print >>pid,p.pid jobQueue.append(p) idx += 1 else: for job in jobQueue: if not job.is_alive(): print 'found dead job ', job loc = jobQueue.index(job) del jobQueue[loc] procss[idx].start() jobQueue.append(procss[idx]) idx += 1 pid.close() ''' print >>timing, "Entire group analysis run complete." print >>timing, "Elapsed run time (minutes): ", ((time.time() - gpa_start_time)/60) print >>timing, "" print >>timing, "sca_roi_Z_to_standard_smooth" print >>timing, "Number of runs: ", sca_roi_runs print >>timing, "Total run time (minutes): ", sca_roi_time print >>timing, "" print >>timing, "sca_seed_Z_to_standard_smooth" print >>timing, "Number of runs: ", sca_seed_runs print >>timing, "Total run time (minutes): ", sca_seed_time print >>timing, "" print >>timing, "sca_tempreg_maps_z_files_smooth" print >>timing, "Number of runs: ", sca_tempreg_runs print >>timing, "Total run time (minutes): ", sca_tempreg_time print >>timing, "" print >>timing, "dr_tempreg_maps_z_files_smooth" print >>timing, "Number of runs: ", dr_tempreg_runs print >>timing, "Total run time (minutes): ", dr_tempreg_time print >>timing, "" print >>timing, "vmhc_z_score_stat_map" print >>timing, "Number of runs: ", vmhc_z_runs print >>timing, "Total run time (minutes): ", vmhc_z_time print >>timing, "" print >>timing, "alff_Z_to_standard_smooth" print >>timing, "Number of runs: ", alff_Z_runs print >>timing, "Total run time (minutes): ", alff_Z_time print >>timing, "" print >>timing, "falff_Z_to_standard_smooth" print >>timing, "Number of runs: ", falff_Z_runs print >>timing, "Total run time (minutes): ", falff_Z_time print >>timing, "" print >>timing, "reho_Z_to_standard_smooth" print >>timing, "Number of runs: ", reho_Z_runs print >>timing, "Total run time (minutes): ", reho_Z_time print >>timing, "" print >>timing, "centrality_outputs_smoothed" print >>timing, "Number of runs: ", centrality_outputs_runs print >>timing, "Total run time (minutes): ", centrality_outputs_time print >>timing, "" timing.close()
def run(config_file, output_path_file): import re import os import glob path, fname = os.path.split(os.path.realpath(config_file)) sys.path.append(path) c = __import__(fname.split('.')[0]) subject_paths = [] input_subject_ids = [subject_id.rstrip('\r\n') for subject_id in open(c.groupAnalysisSubjectList, 'r').readlines()] for file in glob.glob(os.path.abspath(output_path_file)): path_list = open(file, 'r').readlines() subject_paths.extend([s.rstrip('\r\n') for s in path_list]) set_subject_paths = set(subject_paths) subject_paths = list(set_subject_paths) #base_path = os.path.dirname(os.path.commonprefix(subject_paths)) base_path = c.sinkDirectory from collections import defaultdict analysis_map = defaultdict(list) analysis_map_gp = defaultdict(list) for subject_path in subject_paths: #Remove the base bath offset rs_path = subject_path.replace(base_path, "", 1) rs_path = rs_path.lstrip('/') folders = split_folders(rs_path) pipeline_id = folders[0] subject_id = folders[1] resource_id = folders[2] scan_id = folders[3] key = subject_path.replace(subject_id, '*') analysis_map[(resource_id, key)].append((pipeline_id, subject_id, scan_id, subject_path)) # separate map for group analysis if c.mixedScanAnalysis == True: key = key.replace(scan_id, '*') if subject_id in input_subject_ids: analysis_map_gp[(resource_id, key)].append((pipeline_id, subject_id, scan_id, subject_path)) for resource, glob_key in analysis_map.keys(): if resource == 'functional_mni': if 1 in c.runBASC: if not c.runOnGrid: from CPAC.pipeline.cpac_basc_pipeline import prep_basc_workflow prep_basc_workflow(c, analysis_map[(resource, glob_key)]) else: if 'sge' in c.resourceManager.lower(): run_sge_jobs(c, args.config, resource, analysis_map[(resource, glob_key)]) elif 'pbs' in c.resourceManager.lower(): run_pbs_jobs(c, args.config, resource, analysis_map[(resource, glob_key)]) if 1 in c.runCWAS: if not c.runOnGrid: from CPAC.pipeline.cpac_cwas_pipeline import prep_cwas_workflow prep_cwas_workflow(c, analysis_map[(resource, glob_key)]) else: if 'sge' in c.resourceManager.lower(): run_sge_jobs(c, args.config, resource, analysis_map[(resource, glob_key)]) elif 'pbs' in c.resourceManager.lower(): run_pbs_jobs(c, args.config, resource, analysis_map[(resource, glob_key)]) for resource, glob_key in analysis_map_gp.keys(): if resource in c.derivativeList: if 1 in c.runGroupAnalysis: if not c.runOnGrid: from CPAC.pipeline.cpac_group_analysis_pipeline import prep_group_analysis_workflow prep_group_analysis_workflow(c, resource, analysis_map_gp[(resource, glob_key)]) else: if 'sge' in c.resourceManager.lower(): run_sge_jobs(c, args.config, resource, analysis_map_gp[(resource, glob_key)]) elif 'pbs' in c.resourceManager.lower(): run_pbs_jobs(c, args.config, resource, analysis_map_gp[(resource, glob_key)])
def run(config_file, subject_list_file, output_path_file): # Runs group analysis import yaml # Load the config file into 'c' c = Configuration(yaml.load(open(os.path.realpath(config_file), 'r'))) # load the subject list (in the main GUI window, not the group analysis # one), and parse the yaml so that the subIDs and session IDs can be # accessed for below try: sublist = yaml.load(open(os.path.realpath(subject_list_file), 'r')) except: print "Subject list is not in proper YAML format. Please check your file" raise Exception subject_paths = [] # 'output_path_file' is the wildcard-filled path to the 'Derivative Path # File' provided in the dialog box when group analysis is first run for file in glob.glob(os.path.abspath(output_path_file)): path_list = open(file, 'r').readlines() subject_paths.extend([s.rstrip('\r\n') for s in path_list]) if len(subject_paths) == 0: print '[!] CPAC says: No individual-level analysis outputs were ' \ 'found given the path file you provided.\n\nDerivative ' \ 'Path File provided: ', output_path_file, '\n\nEither make ' \ 'sure your Derivative Path File is correctly formatted, or ' \ 'that individual-level analysis completed successfully and ' \ 'generated the \'path_files_here\' folder found in the ' \ 'output directory, then try again.\n\n' raise Exception if len(c.derivativeList) == 0: print '[!] CPAC says: You do not have any derivatives selected ' \ 'to run for group-level analysis. Return to your pipeline ' \ 'configuration file and select at least one.\n\n' raise Exception if len(c.modelConfigs) == 0: print '[!] CPAC says: You do not have any models selected ' \ 'to run for group-level analysis. Return to your pipeline ' \ 'configuration file and create or select at least one.\n\n' raise Exception # 'subject_paths' is a list of every output from every subject included # in the output folder of the run # converts the subject_paths list into a set to enforce no duplicates set_subject_paths = set(subject_paths) # converts the set back into a list subject_paths = list(set_subject_paths) #base_path = os.path.dirname(os.path.commonprefix(subject_paths)) base_path = c.outputDirectory from collections import defaultdict analysis_map = defaultdict(list) analysis_map_gp = defaultdict(list) for subject_path in subject_paths: # each 'subject_path' is a full filepath to one of the output files # Remove the base bath offset rs_path = subject_path.replace(base_path, "", 1) rs_path = rs_path.lstrip('/') # rs_path is now the path to the output file, except everything before # the pipeline folder (named with the pipeline ID) is stripped from # the path folders = split_folders(rs_path) pipeline_id = folders[0] subject_unique_id = folders[1] resource_id = folders[2] scan_id = folders[3] # get list of all unique IDs (session IDs) # loop through them and check subject_path for existence of any of the # session IDs # if it exists, load it into unique_id for sub in sublist: if sub['subject_id'] in subject_unique_id: subject_id = sub['subject_id'] # include all of the scans and sessions in one model if True if c.repeatedMeasures == True: key = subject_path.replace(subject_unique_id, '*') key = key.replace(scan_id, '*') else: # each group of subjects from each session go into their own # separate model, instead of combining all sessions into one try: key = subject_path.replace(subject_id, '*') except: # this fires if 'subject_id' was never given a value basically print '\n\n[!] CPAC says: The derivative path file you ' \ 'provided does not contain the output directory ' \ 'given in the pipeline configuration file.\n' print 'Derivative path file: ', output_path_file, '\n' print 'Output directory: ', c.outputDirectory, '\n' print 'Please correct this and try again.\n\n\n' raise Exception # 'resource_id' is each type of output # 'key' is a path to each and every individual output file, # except with the subject ID replaced with a wildcard (*) if resource_id in c.derivativeList: analysis_map[(resource_id, key)].append((pipeline_id, subject_id, scan_id, subject_path)) analysis_map_gp[(resource_id, key)].append((pipeline_id, subject_id, scan_id, subject_path)) # with this loop, 'analysis_map_gp' is a dictionary with a key for # each individual output file - and each entry is a list of tuples, # one tuple for each subject in the subject list, containing # 'subject_path', which is a full path to that output file for that # one particular subject for resource, glob_key in analysis_map.keys(): if resource == 'functional_mni': if 1 in c.runBASC: if not c.runOnGrid: from CPAC.pipeline.cpac_basc_pipeline import prep_basc_workflow prep_basc_workflow(c, analysis_map[(resource, glob_key)]) else: if 'sge' in c.resourceManager.lower(): run_sge_jobs(c, config_file, resource, analysis_map[(resource, glob_key)]) elif 'pbs' in c.resourceManager.lower(): run_pbs_jobs(c, config_file, resource, analysis_map[(resource, glob_key)]) if 1 in c.runCWAS: if not c.runOnGrid: from CPAC.pipeline.cpac_cwas_pipeline import prep_cwas_workflow prep_cwas_workflow(c, analysis_map[(resource, glob_key)]) else: if 'sge' in c.resourceManager.lower(): run_sge_jobs(c, config_file, resource, analysis_map[(resource, glob_key)]) elif 'pbs' in c.resourceManager.lower(): run_pbs_jobs(c, config_file, resource, analysis_map[(resource, glob_key)]) procss = [] for resource, glob_key in analysis_map_gp.keys(): # 'resource' is each type of output # 'glob_key' is a path to each and every individual output file, # except with the subject ID replaced with a wildcard (*) if resource in c.derivativeList: #get all the motion parameters across subjects try: from CPAC.utils import extract_parameters extract_parameters.run(c.outputDirectory, c.runScrubbing) except: print '\n\n [!] CPAC says: Extract parameters script did ' \ 'not run correctly.\n\n' raise Exception if not c.runOnGrid: from CPAC.pipeline.cpac_group_analysis_pipeline import prep_group_analysis_workflow procss.append(Process(target=prep_group_analysis_workflow, args=(c, resource, analysis_map_gp[(resource, glob_key)]))) pid = open(os.path.join(c.outputDirectory, 'pid_group.txt'), 'w') jobQueue = [] if len(procss) <= c.numGPAModelsAtOnce: """ Stream all the subjects as sublist is less than or equal to the number of subjects that need to run """ for p in procss: p.start() print >>pid,p.pid else: """ Stream the subject workflows for preprocessing. At Any time in the pipeline c.numSubjectsAtOnce will run, unless the number remaining is less than the value of the parameter stated above """ idx = 0 while(idx < len(procss)): if len(jobQueue) == 0 and idx == 0: idc = idx for p in procss[idc: idc + c.numGPAModelsAtOnce]: p.start() print >>pid,p.pid jobQueue.append(p) idx += 1 else: for job in jobQueue: if not job.is_alive(): print 'found dead job ', job loc = jobQueue.index(job) del jobQueue[loc] procss[idx].start() jobQueue.append(procss[idx]) idx += 1 pid.close()
def run(config_file, subject_list_file, output_path_file): # Runs group analysis import yaml # Load the config file into 'c' c = Configuration(yaml.load(open(os.path.realpath(config_file), 'r'))) # load the subject list (in the main GUI window, not the group analysis # one), and parse the yaml so that the subIDs and session IDs can be # accessed for below try: sublist = yaml.load(open(os.path.realpath(subject_list_file), 'r')) except: print "Subject list is not in proper YAML format. Please check your file" raise Exception subject_paths = [] # 'output_path_file' is the wildcard-filled path to the 'Derivative Path # File' provided in the dialog box when group analysis is first run #for file in glob.glob(os.path.abspath(output_path_file)): # path_list = open(file, 'r').readlines() # subject_paths.extend([s.rstrip('\r\n') for s in path_list]) ind_outputs = [ 'alff_to_standard_zstd', 'alff_to_standard_smooth_zstd', 'falff_to_standard_zstd', 'falff_to_standard_smooth_zstd', 'reho_to_standard_zstd', 'reho_to_standard_smooth_zstd', 'sca_roi_files_to_standard_fisher_zstd', 'sca_roi_files_to_standard_smooth_fisher_zstd', 'sca_seed_to_standard_fisher_zstd', 'sca_seed_to_standard_smooth_fisher_zstd', 'sca_tempreg_maps_zstat_files_smooth', 'vmhc_fisher_zstd', 'vmhc_fisher_zstd_zstat_map', 'centrality_outputs_zstd', 'centrality_outputs_smoothed_zstd', 'dr_tempreg_maps_files_to_standard', 'dr_tempreg_maps_files_to_standard_smooth', 'dr_tempreg_maps_zstat_files_to_standard', 'dr_tempreg_maps_zstat_files_to_standard_smooth', 'alff_to_standard', 'alff_to_standard_smooth', 'falff_to_standard', 'falff_to_standard_smooth', 'reho_to_standard', 'reho_to_standard_smooth', 'sca_roi_files_to_standard', 'sca_roi_files_to_standard_smooth', 'sca_seed_to_standard', 'sca_seed_to_standard_smooth', 'sca_tempreg_maps_files', 'sca_tempreg_maps_files_smooth', 'sca_tempreg_maps_zstat_files', 'sca_tempreg_maps_zstat_files_smooth', 'vmhc_raw_score', 'centrality_outputs', 'centrality_outputs_smoothed', 'dr_tempreg_maps_files_to_standard', 'dr_tempreg_maps_files_to_standard_smooth', 'dr_tempreg_maps_zstat_files_to_standard', 'dr_tempreg_maps_zstat_files_to_standard_smooth' ] # collect all of the output paths for root, folders, files in os.walk(output_path_file): split_output_dir_path = output_path_file.split("/") for filename in files: if filename.endswith("nii.gz"): fullpath = os.path.join(root, filename) split_fullpath = fullpath.split("/") #subID = split_fullpath[len(split_output_dir_path)] deriv_folder_name = split_fullpath[len(split_output_dir_path) + 1] #second_half_filepath = fullpath.split(subID) for output_name in ind_outputs: if output_name == deriv_folder_name: subject_paths.append(fullpath) if len(subject_paths) == 0: print '[!] CPAC says: No individual-level analysis outputs were ' \ 'found given the path file you provided.\n\nPipeline Output ' \ 'Directory provided: ', output_path_file, '\n\nEither make ' \ 'sure your Output Directory path is correct, or that ' \ 'individual-level analysis completed successfully.\n\n' raise Exception if len(c.modelConfigs) == 0: print '[!] CPAC says: You do not have any models selected ' \ 'to run for group-level analysis. Return to your pipeline ' \ 'configuration file and create or select at least one.\n\n' raise Exception # 'subject_paths' is a list of every output from every subject included # in the output folder of the run # converts the subject_paths list into a set to enforce no duplicates set_subject_paths = set(subject_paths) # converts the set back into a list subject_paths = list(set_subject_paths) #base_path = os.path.dirname(os.path.commonprefix(subject_paths)) base_path = c.outputDirectory from collections import defaultdict analysis_map = defaultdict(list) analysis_map_gp = defaultdict(list) print "Parsing through output paths. This may take a little while " \ "depending on how many subjects, group analysis models, or " \ "selected derivatives you have..\n" count = 0 for subject_path in subject_paths: # each 'subject_path' is a full filepath to one of the output files # Remove the base bath offset #rs_path = subject_path.replace(base_path, "", 1) #rs_path = rs_path.lstrip('/') # rs_path is now the path to the output file, except everything before # the pipeline folder (named with the pipeline ID) is stripped from # the path #folders = split_folders(rs_path) #pipeline_id = folders[0] #subject_unique_id = folders[1] #resource_id = folders[2] #scan_id = folders[3] split_output_dir_path = output_path_file.split("/") split_fullpath = subject_path.split("/") pipeline_id = split_fullpath[len(split_output_dir_path) - 1] subject_unique_id = split_fullpath[len(split_output_dir_path)] resource_id = split_fullpath[len(split_output_dir_path) + 1] scan_id = split_fullpath[len(split_output_dir_path) + 2] # add auxiliary stuff to resource_id if applicable if ("_mask_" in subject_path) and (("sca_roi" in subject_path) or \ ("sca_tempreg" in subject_path)): for dirname in split_fullpath: if "_mask_" in dirname: maskname = dirname filename = split_fullpath[-1] if ".nii.gz" in filename: filename = filename.replace(".nii.gz", "") elif ".nii" in filename: filename = filename.replace(".nii", "") resource_name = resource_id + "_%s_%s" % (maskname, filename) elif ("_spatial_map_" in subject_path) and \ ("dr_tempreg" in subject_path): for dirname in split_fullpath: if "_spatial_map_" in dirname: mapname = dirname filename = split_fullpath[-1] if ".nii.gz" in filename: filename = filename.replace(".nii.gz", "") elif ".nii" in filename: filename = filename.replace(".nii", "") resource_name = resource_id + "_%s_%s" % (mapname, filename) elif ("_mask_" in subject_path) and ("centrality" in subject_path): for dirname in split_fullpath: if "_mask_" in dirname: maskname = dirname filename = split_fullpath[-1] if ".nii.gz" in filename: filename = filename.replace(".nii.gz", "") elif ".nii" in filename: filename = filename.replace(".nii", "") resource_name = resource_id + "_%s_%s" % (maskname, filename) else: resource_name = resource_id # get list of all unique IDs (session IDs) # loop through them and check subject_path for existence of any of the # session IDs # if it exists, load it into unique_id # init subject_id to None subject_id = None for sub in sublist: if sub['subject_id'] in subject_unique_id: subject_id = sub['subject_id'] # If subject_id never gets set for this specific subject, move on to next subject if not subject_id: continue # 'resource_id' is each type of output # 'key' is a path to each and every individual output file, # except with the subject ID replaced with a wildcard (*) # loop here to replace the one below it: # go through model configs, make a list of all ders included # enumerate list of selected derivatives and the models they are in # like: (resource_id, group_model, key) for group_config_file in c.modelConfigs: try: ga_config = Configuration( yaml.load(open(os.path.realpath(group_config_file), 'r'))) except: raise Exception( "\n\nError in reading %s configuration file\n\n" % group_config_file) if len(ga_config.derivative_list) == 0: print '[!] CPAC says: You do not have any derivatives selected ' \ 'to run for group-level analysis. Return to your group-analysis ' \ 'configuration file and select at least one.' print 'Group analysis configuration file: %s\n\n' % group_config_file raise Exception if resource_id in ga_config.derivative_list: # include all of the scans and sessions in one model if True if ga_config.repeated_measures == True: key = subject_path.replace(subject_unique_id, '*') key = key.replace(scan_id, '*') else: # each group of subjects from each session go into their own # separate model, instead of combining all sessions into one try: key = subject_path.replace(subject_id, '*') except: # this fires if 'subject_id' was never given a value basically print '\n\n[!] CPAC says: Either the derivative path file ' \ 'you provided does not contain the output directory ' \ 'given in the pipeline configuration file.\n' print 'Derivative path file: ', output_path_file, '\n' print 'Output directory: ', c.outputDirectory, '\n' print '- OR -\n' print 'Your subject list does not contain all of the ' \ 'subjects you wish to run group-level analysis on.\n' print 'Please correct this and try again.\n\n\n' raise Exception analysis_map[(resource_name, group_config_file, key)].append( (pipeline_id, subject_id, scan_id, subject_path)) analysis_map_gp[(resource_name, group_config_file, key)].append((pipeline_id, subject_id, scan_id, subject_path)) count += 1 if count == int(len(subject_paths) * 0.7): print "Almost finished parsing output paths.." # with this loop, 'analysis_map_gp' is a dictionary with a key for # each individual output file - and each entry is a list of tuples, # one tuple for each subject in the subject list, containing # 'subject_path', which is a full path to that output file for that # one particular subject print "Finished parsing through output paths!\n" for resource, group_model, glob_key in analysis_map.keys(): if resource == 'functional_mni': if 1 in c.runBASC: if not c.runOnGrid: from CPAC.pipeline.cpac_basc_pipeline import prep_basc_workflow prep_basc_workflow( c, analysis_map[(resource, group_model, glob_key)]) else: if 'sge' in c.resourceManager.lower(): run_sge_jobs( c, config_file, resource, analysis_map[(resource, group_model, glob_key)]) elif 'pbs' in c.resourceManager.lower(): run_pbs_jobs( c, config_file, resource, analysis_map[(resource, group_model, glob_key)]) if 1 in c.runCWAS: if not c.runOnGrid: from CPAC.pipeline.cpac_cwas_pipeline import prep_cwas_workflow prep_cwas_workflow( c, analysis_map[(resource, group_model, glob_key)]) else: if 'sge' in c.resourceManager.lower(): run_sge_jobs( c, config_file, resource, analysis_map[(resource, group_model, glob_key)]) elif 'pbs' in c.resourceManager.lower(): run_pbs_jobs( c, config_file, resource, analysis_map[(resource, group_model, glob_key)]) procss = [] for resource, group_model, glob_key in analysis_map_gp.keys(): # 'resource' is each type of output # 'glob_key' is a path to each and every individual output file, # except with the subject ID replaced with a wildcard (*) #get all the motion parameters across subjects print "Pulling motion parameters for all subjects..\n" from CPAC.utils import extract_parameters scrub_threshold = extract_parameters.run(c.outputDirectory, c.runScrubbing) if not c.runOnGrid: print "Starting group analysis pipeline setup..\n" from CPAC.pipeline.cpac_ga_model_generator import prep_group_analysis_workflow procss.append( Process(target=prep_group_analysis_workflow, args=(c, group_model, resource, analysis_map_gp[(resource, group_model, glob_key)], scrub_threshold))) else: print "\n\n[!] CPAC says: Group-level analysis has not yet " \ "been implemented to handle runs on a cluster or grid.\n\n"\ "Please turn off 'Run CPAC On A Cluster/Grid' in order " \ "to continue with group-level analysis. This will submit " \ "the job to only one node, however.\n\nWe will update " \ "users on when this feature will be available through " \ "release note announcements.\n\n" pid = open(os.path.join(c.outputDirectory, 'pid_group.txt'), 'w') jobQueue = [] if len(procss) <= c.numGPAModelsAtOnce: """ Stream all the subjects as sublist is less than or equal to the number of subjects that need to run """ for p in procss: p.start() print >> pid, p.pid else: """ Stream the subject workflows for preprocessing. At Any time in the pipeline c.numSubjectsAtOnce will run, unless the number remaining is less than the value of the parameter stated above """ idx = 0 while (idx < len(procss)): if len(jobQueue) == 0 and idx == 0: idc = idx for p in procss[idc:idc + c.numGPAModelsAtOnce]: p.start() print >> pid, p.pid jobQueue.append(p) idx += 1 else: for job in jobQueue: if not job.is_alive(): print 'found dead job ', job loc = jobQueue.index(job) del jobQueue[loc] procss[idx].start() jobQueue.append(procss[idx]) idx += 1 pid.close()
def run(config_file, output_path_file): # Runs group analysis import yaml # Load the config file into 'c' c = Configuration(yaml.load(open(os.path.realpath(config_file), 'r'))) subject_paths = [] for file in glob.glob(os.path.abspath(output_path_file)): path_list = open(file, 'r').readlines() subject_paths.extend([s.rstrip('\r\n') for s in path_list]) set_subject_paths = set(subject_paths) subject_paths = list(set_subject_paths) #base_path = os.path.dirname(os.path.commonprefix(subject_paths)) base_path = c.outputDirectory from collections import defaultdict analysis_map = defaultdict(list) analysis_map_gp = defaultdict(list) for subject_path in subject_paths: #Remove the base bath offset rs_path = subject_path.replace(base_path, "", 1) rs_path = rs_path.lstrip('/') folders = split_folders(rs_path) pipeline_id = folders[0] subject_id = folders[1] resource_id = folders[2] scan_id = folders[3] #if scan_id == '_scan_rest_1_rest': key = subject_path.replace(subject_id, '*') analysis_map[(resource_id, key)].append( (pipeline_id, subject_id, scan_id, subject_path)) # separate map for group analysis #if c.mixedScanAnalysis == True: # key = key.replace(scan_id, '*') analysis_map_gp[(resource_id, key)].append( (pipeline_id, subject_id, scan_id, subject_path)) gpa_start_datetime = strftime("%Y-%m-%d %H:%M:%S") gpa_starttime_string = gpa_start_datetime.replace(' ', '_') gpa_starttime_string = gpa_starttime_string.replace(':', '-') timing = open( os.path.join( c.outputDirectory, 'group_analysis_timing_%s_%s.txt' % (c.pipelineName, gpa_starttime_string)), 'wt') sca_roi_runs = 0 sca_roi_time = 0 sca_seed_runs = 0 sca_seed_time = 0 sca_tempreg_runs = 0 sca_tempreg_time = 0 dr_tempreg_runs = 0 dr_tempreg_time = 0 vmhc_z_runs = 0 vmhc_z_time = 0 alff_Z_runs = 0 alff_Z_time = 0 falff_Z_runs = 0 falff_Z_time = 0 reho_Z_runs = 0 reho_Z_time = 0 centrality_outputs_runs = 0 centrality_outputs_time = 0 # Start timing here gpa_start_time = time.time() for resource, glob_key in analysis_map.keys(): if resource == 'functional_mni': wf_start_time = time.time() if 1 in c.runBASC: if not c.runOnGrid: from CPAC.pipeline.cpac_basc_pipeline import prep_basc_workflow prep_basc_workflow(c, analysis_map[(resource, glob_key)]) else: if 'sge' in c.resourceManager.lower(): run_sge_jobs(c, config_file, resource, analysis_map[(resource, glob_key)]) elif 'pbs' in c.resourceManager.lower(): run_pbs_jobs(c, config_file, resource, analysis_map[(resource, glob_key)]) if 1 in c.runCWAS: if not c.runOnGrid: from CPAC.pipeline.cpac_cwas_pipeline import prep_cwas_workflow prep_cwas_workflow(c, analysis_map[(resource, glob_key)]) else: if 'sge' in c.resourceManager.lower(): run_sge_jobs(c, config_file, resource, analysis_map[(resource, glob_key)]) elif 'pbs' in c.resourceManager.lower(): run_pbs_jobs(c, config_file, resource, analysis_map[(resource, glob_key)]) print >> timing, "Group analysis workflow completed for resource: ", resource print >> timing, "Elapsed run time (minutes): ", ( (time.time() - wf_start_time) / 60) print >> timing, "" for resource, glob_key in analysis_map_gp.keys(): if resource in c.derivativeList: wf_start_time = time.time() if 1 in c.runGroupAnalysis: #get all the motion parameters across subjects try: from CPAC.utils import extract_parameters extract_parameters.run(c.outputDirectory) except Exception: print "Extract parameters script did not run correctly" if not c.runOnGrid: from CPAC.pipeline.cpac_group_analysis_pipeline import prep_group_analysis_workflow #procss = Process(target=prep_group_analysis_workflow, args=(c, resource, analysis_map_gp[(resource, glob_key)])) #print c, " ", resource, " ", analysis_map_gp[(resource, glob_key)], " ", glob_key prep_group_analysis_workflow( c, resource, analysis_map_gp[(resource, glob_key)]) if c.runOnGrid: if 'sge' in c.resourceManager.lower(): run_sge_jobs(c, config_file, resource, analysis_map_gp[(resource, glob_key)]) elif 'pbs' in c.resourceManager.lower(): run_pbs_jobs(c, config_file, resource, analysis_map_gp[(resource, glob_key)]) print >> timing, "Group analysis workflow completed for resource: ", resource print >> timing, "Elapsed run time (minutes): ", ( (time.time() - wf_start_time) / 60) print >> timing, "" # This can be implemented more sleekly using a dictionary, have to do this at some point if resource == 'sca_roi_Z_to_standard_smooth': sca_roi_runs += 1 sca_roi_time = sca_roi_time + ( (time.time() - wf_start_time) / 60) elif resource == 'sca_seed_Z_to_standard_smooth': sca_seed_runs += 1 sca_seed_time = sca_seed_time + ( (time.time() - wf_start_time) / 60) elif resource == 'sca_tempreg_maps_z_files_smooth': sca_tempreg_runs += 1 sca_tempreg_time = sca_tempreg_time + ( (time.time() - wf_start_time) / 60) elif resource == 'dr_tempreg_maps_z_files_smooth': dr_tempreg_runs += 1 dr_tempreg_time = dr_tempreg_time + ( (time.time() - wf_start_time) / 60) elif resource == 'vmhc_z_score_stat_map': vmhc_z_runs += 1 vmhc_z_time = vmhc_z_time + ( (time.time() - wf_start_time) / 60) elif resource == 'alff_Z_to_standard_smooth': alff_Z_runs += 1 alff_Z_time = alff_Z_time + ( (time.time() - wf_start_time) / 60) elif resource == 'falff_Z_to_standard_smooth': falff_Z_runs += 1 falff_Z_time = falff_Z_time + ( (time.time() - wf_start_time) / 60) elif resource == 'reho_Z_to_standard_smooth': reho_Z_runs += 1 reho_Z_time = reho_Z_time + ( (time.time() - wf_start_time) / 60) elif resource == 'centrality_outputs_smoothed': centrality_outputs_runs += 1 centrality_outputs_time = centrality_outputs_time + ( (time.time() - wf_start_time) / 60) ''' procss = [] for resource, glob_key in analysis_map_gp.keys(): if resource in c.derivativeList: if 1 in c.runGroupAnalysis: #get all the motion parameters across subjects try: from CPAC.utils import extract_parameters extract_parameters.run(c.outputDirectory) except Exception: print "Extract parameters script did not run correctly" if not c.runOnGrid: from CPAC.pipeline.cpac_group_analysis_pipeline import prep_group_analysis_workflow procss.append(Process(target=prep_group_analysis_workflow, args=(c, resource, analysis_map_gp[(resource, glob_key)]))) pid = open(os.path.join(c.outputDirectory, 'pid_group.txt'), 'w') jobQueue = [] if len(c.derivativeList) <= c.numSubjectsAtOnce: """ Stream all the subjects as sublist is less than or equal to the number of subjects that need to run """ for p in procss: p.start() print >>pid,p.pid else: """ Stream the subject workflows for preprocessing. At Any time in the pipeline c.numSubjectsAtOnce will run, unless the number remaining is less than the value of the parameter stated above """ idx = 0 while(idx < len(c.derivativeList)): if len(jobQueue) == 0 and idx == 0: idc = idx for p in procss[idc: idc + c.numSubjectsAtOnce]: p.start() print >>pid,p.pid jobQueue.append(p) idx += 1 else: for job in jobQueue: if not job.is_alive(): print 'found dead job ', job loc = jobQueue.index(job) del jobQueue[loc] procss[idx].start() jobQueue.append(procss[idx]) idx += 1 pid.close() ''' print >> timing, "Entire group analysis run complete." print >> timing, "Elapsed run time (minutes): ", ( (time.time() - gpa_start_time) / 60) print >> timing, "" print >> timing, "sca_roi_Z_to_standard_smooth" print >> timing, "Number of runs: ", sca_roi_runs print >> timing, "Total run time (minutes): ", sca_roi_time print >> timing, "" print >> timing, "sca_seed_Z_to_standard_smooth" print >> timing, "Number of runs: ", sca_seed_runs print >> timing, "Total run time (minutes): ", sca_seed_time print >> timing, "" print >> timing, "sca_tempreg_maps_z_files_smooth" print >> timing, "Number of runs: ", sca_tempreg_runs print >> timing, "Total run time (minutes): ", sca_tempreg_time print >> timing, "" print >> timing, "dr_tempreg_maps_z_files_smooth" print >> timing, "Number of runs: ", dr_tempreg_runs print >> timing, "Total run time (minutes): ", dr_tempreg_time print >> timing, "" print >> timing, "vmhc_z_score_stat_map" print >> timing, "Number of runs: ", vmhc_z_runs print >> timing, "Total run time (minutes): ", vmhc_z_time print >> timing, "" print >> timing, "alff_Z_to_standard_smooth" print >> timing, "Number of runs: ", alff_Z_runs print >> timing, "Total run time (minutes): ", alff_Z_time print >> timing, "" print >> timing, "falff_Z_to_standard_smooth" print >> timing, "Number of runs: ", falff_Z_runs print >> timing, "Total run time (minutes): ", falff_Z_time print >> timing, "" print >> timing, "reho_Z_to_standard_smooth" print >> timing, "Number of runs: ", reho_Z_runs print >> timing, "Total run time (minutes): ", reho_Z_time print >> timing, "" print >> timing, "centrality_outputs_smoothed" print >> timing, "Number of runs: ", centrality_outputs_runs print >> timing, "Total run time (minutes): ", centrality_outputs_time print >> timing, "" timing.close()
def run(config_file, subject_list_file, output_path_file): # Runs group analysis import yaml # Load the config file into 'c' c = Configuration(yaml.load(open(os.path.realpath(config_file), 'r'))) # load the subject list (in the main GUI window, not the group analysis # one), and parse the yaml so that the subIDs and session IDs can be # accessed for below try: sublist = yaml.load(open(os.path.realpath(subject_list_file), 'r')) except: print "Subject list is not in proper YAML format. Please check your file" raise Exception subject_paths = [] # 'output_path_file' is the wildcard-filled path to the 'Derivative Path # File' provided in the dialog box when group analysis is first run #for file in glob.glob(os.path.abspath(output_path_file)): # path_list = open(file, 'r').readlines() # subject_paths.extend([s.rstrip('\r\n') for s in path_list]) ind_outputs = ['alff_to_standard_zstd', 'alff_to_standard_smooth_zstd', 'falff_to_standard_zstd', 'falff_to_standard_smooth_zstd', 'reho_to_standard_zstd', 'reho_to_standard_smooth_zstd', 'sca_roi_files_to_standard_fisher_zstd', 'sca_roi_files_to_standard_smooth_fisher_zstd', 'sca_seed_to_standard_fisher_zstd', 'sca_seed_to_standard_smooth_fisher_zstd', 'sca_tempreg_maps_zstat_files_smooth', 'vmhc_fisher_zstd', 'vmhc_fisher_zstd_zstat_map', 'centrality_outputs_zstd', 'centrality_outputs_smoothed_zstd', 'dr_tempreg_maps_files_to_standard', 'dr_tempreg_maps_files_to_standard_smooth', 'dr_tempreg_maps_zstat_files_to_standard', 'dr_tempreg_maps_zstat_files_to_standard_smooth', 'alff_to_standard', 'alff_to_standard_smooth', 'falff_to_standard', 'falff_to_standard_smooth', 'reho_to_standard', 'reho_to_standard_smooth', 'sca_roi_files_to_standard', 'sca_roi_files_to_standard_smooth', 'sca_seed_to_standard', 'sca_seed_to_standard_smooth', 'sca_tempreg_maps_files', 'sca_tempreg_maps_files_smooth', 'sca_tempreg_maps_zstat_files', 'sca_tempreg_maps_zstat_files_smooth', 'vmhc_raw_score', 'centrality_outputs', 'centrality_outputs_smoothed', 'dr_tempreg_maps_files_to_standard', 'dr_tempreg_maps_files_to_standard_smooth', 'dr_tempreg_maps_zstat_files_to_standard', 'dr_tempreg_maps_zstat_files_to_standard_smooth'] # collect all of the output paths for root, folders, files in os.walk(output_path_file): split_output_dir_path = output_path_file.split("/") for filename in files: if filename.endswith("nii.gz"): fullpath = os.path.join(root, filename) split_fullpath = fullpath.split("/") #subID = split_fullpath[len(split_output_dir_path)] deriv_folder_name = split_fullpath[len(split_output_dir_path)+1] #second_half_filepath = fullpath.split(subID) for output_name in ind_outputs: if output_name == deriv_folder_name: subject_paths.append(fullpath) if len(subject_paths) == 0: print '[!] CPAC says: No individual-level analysis outputs were ' \ 'found given the path file you provided.\n\nPipeline Output ' \ 'Directory provided: ', output_path_file, '\n\nEither make ' \ 'sure your Output Directory path is correct, or that ' \ 'individual-level analysis completed successfully.\n\n' raise Exception if len(c.modelConfigs) == 0: print '[!] CPAC says: You do not have any models selected ' \ 'to run for group-level analysis. Return to your pipeline ' \ 'configuration file and create or select at least one.\n\n' raise Exception # 'subject_paths' is a list of every output from every subject included # in the output folder of the run # converts the subject_paths list into a set to enforce no duplicates set_subject_paths = set(subject_paths) # converts the set back into a list subject_paths = list(set_subject_paths) #base_path = os.path.dirname(os.path.commonprefix(subject_paths)) base_path = c.outputDirectory from collections import defaultdict analysis_map = defaultdict(list) analysis_map_gp = defaultdict(list) print "Parsing through output paths. This may take a little while " \ "depending on how many subjects, group analysis models, or " \ "selected derivatives you have..\n" count = 0 for subject_path in subject_paths: # each 'subject_path' is a full filepath to one of the output files # Remove the base bath offset #rs_path = subject_path.replace(base_path, "", 1) #rs_path = rs_path.lstrip('/') # rs_path is now the path to the output file, except everything before # the pipeline folder (named with the pipeline ID) is stripped from # the path #folders = split_folders(rs_path) #pipeline_id = folders[0] #subject_unique_id = folders[1] #resource_id = folders[2] #scan_id = folders[3] split_output_dir_path = output_path_file.split("/") split_fullpath = subject_path.split("/") pipeline_id = split_fullpath[len(split_output_dir_path)-1] subject_unique_id = split_fullpath[len(split_output_dir_path)] resource_id = split_fullpath[len(split_output_dir_path)+1] scan_id = split_fullpath[len(split_output_dir_path)+2] # add auxiliary stuff to resource_id if applicable if ("_mask_" in subject_path) and (("sca_roi" in subject_path) or \ ("sca_tempreg" in subject_path)): for dirname in split_fullpath: if "_mask_" in dirname: maskname = dirname filename = split_fullpath[-1] if ".nii.gz" in filename: filename = filename.replace(".nii.gz","") elif ".nii" in filename: filename = filename.replace(".nii","") resource_name = resource_id + "_%s_%s" % (maskname, filename) elif ("_spatial_map_" in subject_path) and \ ("dr_tempreg" in subject_path): for dirname in split_fullpath: if "_spatial_map_" in dirname: mapname = dirname filename = split_fullpath[-1] if ".nii.gz" in filename: filename = filename.replace(".nii.gz","") elif ".nii" in filename: filename = filename.replace(".nii","") resource_name = resource_id + "_%s_%s" % (mapname, filename) elif ("_mask_" in subject_path) and ("centrality" in subject_path): for dirname in split_fullpath: if "_mask_" in dirname: maskname = dirname filename = split_fullpath[-1] if ".nii.gz" in filename: filename = filename.replace(".nii.gz","") elif ".nii" in filename: filename = filename.replace(".nii","") resource_name = resource_id + "_%s_%s" % (maskname, filename) else: resource_name = resource_id # get list of all unique IDs (session IDs) # loop through them and check subject_path for existence of any of the # session IDs # if it exists, load it into unique_id # init subject_id to None subject_id = None for sub in sublist: if sub['subject_id'] in subject_unique_id: subject_id = sub['subject_id'] # If subject_id never gets set for this specific subject, move on to next subject if not subject_id: continue # 'resource_id' is each type of output # 'key' is a path to each and every individual output file, # except with the subject ID replaced with a wildcard (*) # loop here to replace the one below it: # go through model configs, make a list of all ders included # enumerate list of selected derivatives and the models they are in # like: (resource_id, group_model, key) for group_config_file in c.modelConfigs: try: ga_config = Configuration(yaml.load(open(os.path.realpath(group_config_file), 'r'))) except: raise Exception("\n\nError in reading %s configuration file\n\n" % group_config_file) if len(ga_config.derivative_list) == 0: print '[!] CPAC says: You do not have any derivatives selected ' \ 'to run for group-level analysis. Return to your group-analysis ' \ 'configuration file and select at least one.' print 'Group analysis configuration file: %s\n\n' % group_config_file raise Exception if resource_id in ga_config.derivative_list: # include all of the scans and sessions in one model if True if ga_config.repeated_measures == True: key = subject_path.replace(subject_unique_id, '*') key = key.replace(scan_id, '*') else: # each group of subjects from each session go into their own # separate model, instead of combining all sessions into one try: key = subject_path.replace(subject_id, '*') except: # this fires if 'subject_id' was never given a value basically print '\n\n[!] CPAC says: Either the derivative path file ' \ 'you provided does not contain the output directory ' \ 'given in the pipeline configuration file.\n' print 'Derivative path file: ', output_path_file, '\n' print 'Output directory: ', c.outputDirectory, '\n' print '- OR -\n' print 'Your subject list does not contain all of the ' \ 'subjects you wish to run group-level analysis on.\n' print 'Please correct this and try again.\n\n\n' raise Exception analysis_map[(resource_name, group_config_file, key)].append((pipeline_id, subject_id, scan_id, subject_path)) analysis_map_gp[(resource_name, group_config_file, key)].append((pipeline_id, subject_id, scan_id, subject_path)) count += 1 if count == int(len(subject_paths)*0.7): print "Almost finished parsing output paths.." # with this loop, 'analysis_map_gp' is a dictionary with a key for # each individual output file - and each entry is a list of tuples, # one tuple for each subject in the subject list, containing # 'subject_path', which is a full path to that output file for that # one particular subject print "Finished parsing through output paths!\n" for resource, group_model, glob_key in analysis_map.keys(): if resource == 'functional_mni': if 1 in c.runBASC: if not c.runOnGrid: from CPAC.pipeline.cpac_basc_pipeline import prep_basc_workflow prep_basc_workflow(c, analysis_map[(resource, group_model, glob_key)]) else: if 'sge' in c.resourceManager.lower(): run_sge_jobs(c, config_file, resource, analysis_map[(resource, group_model, glob_key)]) elif 'pbs' in c.resourceManager.lower(): run_pbs_jobs(c, config_file, resource, analysis_map[(resource, group_model, glob_key)]) if 1 in c.runCWAS: if not c.runOnGrid: from CPAC.pipeline.cpac_cwas_pipeline import prep_cwas_workflow prep_cwas_workflow(c, analysis_map[(resource, group_model, glob_key)]) else: if 'sge' in c.resourceManager.lower(): run_sge_jobs(c, config_file, resource, analysis_map[(resource, group_model, glob_key)]) elif 'pbs' in c.resourceManager.lower(): run_pbs_jobs(c, config_file, resource, analysis_map[(resource, group_model, glob_key)]) procss = [] for resource, group_model, glob_key in analysis_map_gp.keys(): # 'resource' is each type of output # 'glob_key' is a path to each and every individual output file, # except with the subject ID replaced with a wildcard (*) #get all the motion parameters across subjects print "Pulling motion parameters for all subjects..\n" from CPAC.utils import extract_parameters scrub_threshold = extract_parameters.run(c.outputDirectory, c.runScrubbing) if not c.runOnGrid: print "Starting group analysis pipeline setup..\n" from CPAC.pipeline.cpac_ga_model_generator import prep_group_analysis_workflow procss.append(Process(target=prep_group_analysis_workflow, args=(c, group_model, resource, analysis_map_gp[(resource, group_model, glob_key)], scrub_threshold))) else: print "\n\n[!] CPAC says: Group-level analysis has not yet " \ "been implemented to handle runs on a cluster or grid.\n\n"\ "Please turn off 'Run CPAC On A Cluster/Grid' in order " \ "to continue with group-level analysis. This will submit " \ "the job to only one node, however.\n\nWe will update " \ "users on when this feature will be available through " \ "release note announcements.\n\n" pid = open(os.path.join(c.outputDirectory, 'pid_group.txt'), 'w') jobQueue = [] if len(procss) <= c.numGPAModelsAtOnce: """ Stream all the subjects as sublist is less than or equal to the number of subjects that need to run """ for p in procss: p.start() print >>pid,p.pid else: """ Stream the subject workflows for preprocessing. At Any time in the pipeline c.numSubjectsAtOnce will run, unless the number remaining is less than the value of the parameter stated above """ idx = 0 while(idx < len(procss)): if len(jobQueue) == 0 and idx == 0: idc = idx for p in procss[idc: idc + c.numGPAModelsAtOnce]: p.start() print >>pid,p.pid jobQueue.append(p) idx += 1 else: for job in jobQueue: if not job.is_alive(): print 'found dead job ', job loc = jobQueue.index(job) del jobQueue[loc] procss[idx].start() jobQueue.append(procss[idx]) idx += 1 pid.close()
def run(config_file, output_path_file): # Runs group analysis import re import os import glob import yaml # Load the config file into 'c' c = Configuration(yaml.load(open(os.path.realpath(config_file), 'r'))) subject_paths = [] for file in glob.glob(os.path.abspath(output_path_file)): path_list = open(file, 'r').readlines() subject_paths.extend([s.rstrip('\r\n') for s in path_list]) set_subject_paths = set(subject_paths) subject_paths = list(set_subject_paths) #base_path = os.path.dirname(os.path.commonprefix(subject_paths)) base_path = c.outputDirectory from collections import defaultdict analysis_map = defaultdict(list) analysis_map_gp = defaultdict(list) for subject_path in subject_paths: #Remove the base bath offset rs_path = subject_path.replace(base_path, "", 1) rs_path = rs_path.lstrip('/') folders = split_folders(rs_path) pipeline_id = folders[0] subject_id = folders[1] resource_id = folders[2] scan_id = folders[3] key = subject_path.replace(subject_id, '*') analysis_map[(resource_id, key)].append( (pipeline_id, subject_id, scan_id, subject_path)) # separate map for group analysis # if c.mixedScanAnalysis == True: # key = key.replace(scan_id, '*') analysis_map_gp[(resource_id, key)].append( (pipeline_id, subject_id, scan_id, subject_path)) for resource, glob_key in analysis_map.keys(): if resource == 'functional_mni': if 1 in c.runBASC: if not c.runOnGrid: from CPAC.pipeline.cpac_basc_pipeline import prep_basc_workflow prep_basc_workflow(c, analysis_map[(resource, glob_key)]) else: if 'sge' in c.resourceManager.lower(): run_sge_jobs(c, config_file, resource, analysis_map[(resource, glob_key)]) elif 'pbs' in c.resourceManager.lower(): run_pbs_jobs(c, config_file, resource, analysis_map[(resource, glob_key)]) if 1 in c.runCWAS: if not c.runOnGrid: from CPAC.pipeline.cpac_cwas_pipeline import prep_cwas_workflow prep_cwas_workflow(c, analysis_map[(resource, glob_key)]) else: if 'sge' in c.resourceManager.lower(): run_sge_jobs(c, config_file, resource, analysis_map[(resource, glob_key)]) elif 'pbs' in c.resourceManager.lower(): run_pbs_jobs(c, config_file, resource, analysis_map[(resource, glob_key)]) for resource, glob_key in analysis_map_gp.keys(): if resource in c.derivativeList: if 1 in c.runGroupAnalysis: #get all the motion parameters across subjects try: from CPAC.utils import extract_parameters extract_parameters.run(c.outputDirectory) except Exception: print "Extract parameters script did not run correctly" if not c.runOnGrid: from CPAC.pipeline.cpac_group_analysis_pipeline import prep_group_analysis_workflow #print c, " ", resource, " ", analysis_map_gp[(resource, glob_key)], " ", glob_key prep_group_analysis_workflow( c, resource, analysis_map_gp[(resource, glob_key)]) else: if 'sge' in c.resourceManager.lower(): run_sge_jobs(c, config_file, resource, analysis_map_gp[(resource, glob_key)]) elif 'pbs' in c.resourceManager.lower(): run_pbs_jobs(c, config_file, resource, analysis_map_gp[(resource, glob_key)])
def run(config_file, output_path_file): # Runs group analysis import re import os import glob import yaml # Load the config file into 'c' c = Configuration(yaml.load(open(os.path.realpath(config_file), 'r'))) #diag = open(os.path.join('/home/data/Projects/CPAC_Regression_Test/2013-08-19-20_v0-3-1/fsl-model/2013-09-03', 'group_runner_diagnostic.txt'), 'wt') #print >>diag, "Config file: ", c #print >>diag, "" #print >>diag, "Output path file: ", output_path_file #print >>diag, "" subject_paths = [] for file in glob.glob(os.path.abspath(output_path_file)): path_list = open(file, 'r').readlines() subject_paths.extend([s.rstrip('\r\n') for s in path_list]) #print >>diag, "Subject paths list size: " #print >>diag, len(subject_paths) #print >>diag, "" #print >>diag, "First subject path: " #print >>diag, subject_paths[0] #print >>diag, "" set_subject_paths = set(subject_paths) subject_paths = list(set_subject_paths) #base_path = os.path.dirname(os.path.commonprefix(subject_paths)) base_path = c.outputDirectory from collections import defaultdict analysis_map = defaultdict(list) analysis_map_gp = defaultdict(list) for subject_path in subject_paths: #Remove the base bath offset rs_path = subject_path.replace(base_path, "", 1) rs_path = rs_path.lstrip('/') folders = split_folders(rs_path) pipeline_id = folders[0] subject_id = folders[1] resource_id = folders[2] scan_id = folders[3] #if scan_id == '_scan_rest_1_rest': key = subject_path.replace(subject_id, '*') analysis_map[(resource_id, key)].append((pipeline_id, subject_id, scan_id, subject_path)) # separate map for group analysis #if c.mixedScanAnalysis == True: # key = key.replace(scan_id, '*') analysis_map_gp[(resource_id, key)].append((pipeline_id, subject_id, scan_id, subject_path)) #print >>diag, "" #print >>diag, "Analysis_map_gp dictionary size: " #print >>diag, len(analysis_map_gp) #print >>diag, "" #print >>diag, "Derivative list: " #print >>diag, c.derivativeList #print >>diag, "" timing = open(os.path.join(c.outputDirectory, 'group_analysis_timing.txt'), 'wt') #timing = open(os.path.join('/home/data/Projects/CPAC_Regression_Test/2013-08-19-20_v0-3-1/fsl-model/2013-09-03', 'group_analysis_timing.txt'), 'wt') sca_roi_runs = 0 sca_roi_time = 0 sca_seed_runs = 0 sca_seed_time = 0 sca_tempreg_runs = 0 sca_tempreg_time = 0 dr_tempreg_runs = 0 dr_tempreg_time = 0 vmhc_z_runs = 0 vmhc_z_time = 0 alff_Z_runs = 0 alff_Z_time = 0 falff_Z_runs = 0 falff_Z_time = 0 reho_Z_runs = 0 reho_Z_time = 0 centrality_outputs_runs = 0 centrality_outputs_time = 0 # Start timing here gpa_start_time = time.time() for resource, glob_key in analysis_map.keys(): if resource == 'functional_mni': wf_start_time = time.time() if 1 in c.runBASC: if not c.runOnGrid: from CPAC.pipeline.cpac_basc_pipeline import prep_basc_workflow prep_basc_workflow(c, analysis_map[(resource, glob_key)]) else: if 'sge' in c.resourceManager.lower(): run_sge_jobs(c, config_file, resource, analysis_map[(resource, glob_key)]) elif 'pbs' in c.resourceManager.lower(): run_pbs_jobs(c, config_file, resource, analysis_map[(resource, glob_key)]) if 1 in c.runCWAS: if not c.runOnGrid: from CPAC.pipeline.cpac_cwas_pipeline import prep_cwas_workflow prep_cwas_workflow(c, analysis_map[(resource, glob_key)]) else: if 'sge' in c.resourceManager.lower(): run_sge_jobs(c, config_file, resource, analysis_map[(resource, glob_key)]) elif 'pbs' in c.resourceManager.lower(): run_pbs_jobs(c, config_file, resource, analysis_map[(resource, glob_key)]) print >>timing, "Group analysis workflow completed for resource: ", resource print >>timing, "Elapsed run time (minutes): ", ((time.time() - wf_start_time)/60) print >>timing, "" for resource, glob_key in analysis_map_gp.keys(): if resource in c.derivativeList: wf_start_time = time.time() #print >>diag, "Resource: " #print >>diag, resource #print >>diag, "" #print >>diag, "glob key: " #print >>diag, glob_key #print >>diag, "" #print >>diag, "Analysis map gp entry: " #print >>diag, analysis_map_gp[(resource,glob_key)] #print >>diag, "" if 1 in c.runGroupAnalysis: #get all the motion parameters across subjects try: from CPAC.utils import extract_parameters extract_parameters.run(c.outputDirectory) except Exception: print "Extract parameters script did not run correctly" if not c.runOnGrid: from CPAC.pipeline.cpac_group_analysis_pipeline import prep_group_analysis_workflow #print c, " ", resource, " ", analysis_map_gp[(resource, glob_key)], " ", glob_key prep_group_analysis_workflow(c, resource, analysis_map_gp[(resource, glob_key)]) else: if 'sge' in c.resourceManager.lower(): run_sge_jobs(c, config_file, resource, analysis_map_gp[(resource, glob_key)]) elif 'pbs' in c.resourceManager.lower(): run_pbs_jobs(c, config_file, resource, analysis_map_gp[(resource, glob_key)]) print >>timing, "Group analysis workflow completed for resource: ", resource print >>timing, "Elapsed run time (minutes): ", ((time.time() - wf_start_time)/60) print >>timing, "" # This can be implemented more sleekly using a dictionary, have to do this at some point if resource == 'sca_roi_Z_to_standard_smooth': sca_roi_runs += 1 sca_roi_time = sca_roi_time + ((time.time() - wf_start_time)/60) elif resource == 'sca_seed_Z_to_standard_smooth': sca_seed_runs += 1 sca_seed_time = sca_seed_time + ((time.time() - wf_start_time)/60) elif resource == 'sca_tempreg_maps_z_files_smooth': sca_tempreg_runs += 1 sca_tempreg_time = sca_tempreg_time + ((time.time() - wf_start_time)/60) elif resource == 'dr_tempreg_maps_z_files_smooth': dr_tempreg_runs += 1 dr_tempreg_time = dr_tempreg_time + ((time.time() - wf_start_time)/60) elif resource == 'vmhc_z_score_stat_map': vmhc_z_runs += 1 vmhc_z_time = vmhc_z_time + ((time.time() - wf_start_time)/60) elif resource == 'alff_Z_to_standard_smooth': alff_Z_runs += 1 alff_Z_time = alff_Z_time + ((time.time() - wf_start_time)/60) elif resource == 'falff_Z_to_standard_smooth': falff_Z_runs += 1 falff_Z_time = falff_Z_time + ((time.time() - wf_start_time)/60) elif resource == 'reho_Z_to_standard_smooth': reho_Z_runs += 1 reho_Z_time = reho_Z_time + ((time.time() - wf_start_time)/60) elif resource == 'centrality_outputs_smoothed': centrality_outputs_runs += 1 centrality_outputs_time = centrality_outputs_time + ((time.time() - wf_start_time)/60) print >>timing, "Entire group analysis run complete." print >>timing, "Elapsed run time (minutes): ", ((time.time() - gpa_start_time)/60) print >>timing, "" print >>timing, "sca_roi_Z_to_standard_smooth" print >>timing, "Number of runs: ", sca_roi_runs print >>timing, "Total run time (minutes): ", sca_roi_time print >>timing, "" print >>timing, "sca_seed_Z_to_standard_smooth" print >>timing, "Number of runs: ", sca_seed_runs print >>timing, "Total run time (minutes): ", sca_seed_time print >>timing, "" print >>timing, "sca_tempreg_maps_z_files_smooth" print >>timing, "Number of runs: ", sca_tempreg_runs print >>timing, "Total run time (minutes): ", sca_tempreg_time print >>timing, "" print >>timing, "dr_tempreg_maps_z_files_smooth" print >>timing, "Number of runs: ", dr_tempreg_runs print >>timing, "Total run time (minutes): ", dr_tempreg_time print >>timing, "" print >>timing, "vmhc_z_score_stat_map" print >>timing, "Number of runs: ", vmhc_z_runs print >>timing, "Total run time (minutes): ", vmhc_z_time print >>timing, "" print >>timing, "alff_Z_to_standard_smooth" print >>timing, "Number of runs: ", alff_Z_runs print >>timing, "Total run time (minutes): ", alff_Z_time print >>timing, "" print >>timing, "falff_Z_to_standard_smooth" print >>timing, "Number of runs: ", falff_Z_runs print >>timing, "Total run time (minutes): ", falff_Z_time print >>timing, "" print >>timing, "reho_Z_to_standard_smooth" print >>timing, "Number of runs: ", reho_Z_runs print >>timing, "Total run time (minutes): ", reho_Z_time print >>timing, "" print >>timing, "centrality_outputs_smoothed" print >>timing, "Number of runs: ", centrality_outputs_runs print >>timing, "Total run time (minutes): ", centrality_outputs_time print >>timing, "" timing.close()
def run(config_file, output_path_file): import re import os import glob import yaml c = Configuration(yaml.load(open(os.path.realpath(config_file), 'r'))) subject_paths = [] for file in glob.glob(os.path.abspath(output_path_file)): path_list = open(file, 'r').readlines() subject_paths.extend([s.rstrip('\r\n') for s in path_list]) set_subject_paths = set(subject_paths) subject_paths = list(set_subject_paths) #base_path = os.path.dirname(os.path.commonprefix(subject_paths)) base_path = c.outputDirectory from collections import defaultdict analysis_map = defaultdict(list) analysis_map_gp = defaultdict(list) for subject_path in subject_paths: #Remove the base bath offset rs_path = subject_path.replace(base_path, "", 1) rs_path = rs_path.lstrip('/') folders = split_folders(rs_path) pipeline_id = folders[0] subject_id = folders[1] resource_id = folders[2] scan_id = folders[3] key = subject_path.replace(subject_id, '*') analysis_map[(resource_id, key)].append((pipeline_id, subject_id, scan_id, subject_path)) # separate map for group analysis # if c.mixedScanAnalysis == True: # key = key.replace(scan_id, '*') analysis_map_gp[(resource_id, key)].append((pipeline_id, subject_id, scan_id, subject_path)) for resource, glob_key in analysis_map.keys(): if resource == 'functional_mni': if 1 in c.runBASC: if not c.runOnGrid: from CPAC.pipeline.cpac_basc_pipeline import prep_basc_workflow prep_basc_workflow(c, analysis_map[(resource, glob_key)]) else: if 'sge' in c.resourceManager.lower(): run_sge_jobs(c, config_file, resource, analysis_map[(resource, glob_key)]) elif 'pbs' in c.resourceManager.lower(): run_pbs_jobs(c, config_file, resource, analysis_map[(resource, glob_key)]) if 1 in c.runCWAS: if not c.runOnGrid: from CPAC.pipeline.cpac_cwas_pipeline import prep_cwas_workflow prep_cwas_workflow(c, analysis_map[(resource, glob_key)]) else: if 'sge' in c.resourceManager.lower(): run_sge_jobs(c, config_file, resource, analysis_map[(resource, glob_key)]) elif 'pbs' in c.resourceManager.lower(): run_pbs_jobs(c, config_file, resource, analysis_map[(resource, glob_key)]) for resource, glob_key in analysis_map_gp.keys(): if resource in c.derivativeList: if 1 in c.runGroupAnalysis: #get all the motion parameters across subjects try: from CPAC.utils import extract_parameters extract_parameters.run(c.outputDirectory) except Exception: print "Extract parameters script did not run correctly" if not c.runOnGrid: from CPAC.pipeline.cpac_group_analysis_pipeline import prep_group_analysis_workflow prep_group_analysis_workflow(c, resource, analysis_map_gp[(resource, glob_key)]) else: if 'sge' in c.resourceManager.lower(): run_sge_jobs(c, config_file, resource, analysis_map_gp[(resource, glob_key)]) elif 'pbs' in c.resourceManager.lower(): run_pbs_jobs(c, config_file, resource, analysis_map_gp[(resource, glob_key)])