コード例 #1
0
def run(config_file, output_path_file):
    
    # Runs group analysis

    import yaml

    # Load the config file into 'c'
    c = Configuration(yaml.load(open(os.path.realpath(config_file), 'r')))

    subject_paths = []

    for file in glob.glob(os.path.abspath(output_path_file)):
        path_list = open(file, 'r').readlines()
        subject_paths.extend([s.rstrip('\r\n') for s in path_list])


    set_subject_paths = set(subject_paths)
    subject_paths = list(set_subject_paths)
    #base_path = os.path.dirname(os.path.commonprefix(subject_paths))
    base_path = c.outputDirectory

    from collections import defaultdict
    analysis_map = defaultdict(list)
    analysis_map_gp = defaultdict(list)

    for subject_path in subject_paths:
        #Remove the base bath offset

        rs_path = subject_path.replace(base_path, "", 1)

        rs_path = rs_path.lstrip('/')

        folders = split_folders(rs_path)
        
        pipeline_id = folders[0]
        subject_id = folders[1]
        resource_id = folders[2]
        scan_id = folders[3]


        #if scan_id == '_scan_rest_1_rest':

        key = subject_path.replace(subject_id, '*')
        analysis_map[(resource_id, key)].append((pipeline_id, subject_id, scan_id, subject_path))

        # separate map for group analysis
        #if c.mixedScanAnalysis == True:
        #    key = key.replace(scan_id, '*')

        analysis_map_gp[(resource_id, key)].append((pipeline_id, subject_id, scan_id, subject_path))


    gpa_start_datetime = strftime("%Y-%m-%d %H:%M:%S")
    gpa_starttime_string = gpa_start_datetime.replace(' ','_')
    gpa_starttime_string = gpa_starttime_string.replace(':','-')

    timing = open(os.path.join(c.outputDirectory, 'group_analysis_timing_%s_%s.txt' % (c.pipelineName, gpa_starttime_string)), 'wt')


    sca_roi_runs = 0
    sca_roi_time = 0
    sca_seed_runs = 0
    sca_seed_time = 0
    sca_tempreg_runs = 0
    sca_tempreg_time = 0
    dr_tempreg_runs = 0
    dr_tempreg_time = 0
    vmhc_z_runs = 0
    vmhc_z_time = 0
    alff_Z_runs = 0
    alff_Z_time = 0
    falff_Z_runs = 0
    falff_Z_time = 0
    reho_Z_runs = 0
    reho_Z_time = 0
    centrality_outputs_runs = 0
    centrality_outputs_time = 0

    # Start timing here
    gpa_start_time = time.time()


    for resource, glob_key in analysis_map.keys():
        if resource == 'functional_mni':

            wf_start_time = time.time()

            if 1 in c.runBASC:

                if not c.runOnGrid:
                    from CPAC.pipeline.cpac_basc_pipeline import prep_basc_workflow
                    prep_basc_workflow(c, analysis_map[(resource, glob_key)])
                else:
                    if 'sge' in c.resourceManager.lower():
                        run_sge_jobs(c, config_file, resource, analysis_map[(resource, glob_key)])

                    elif 'pbs' in c.resourceManager.lower():
                        run_pbs_jobs(c, config_file, resource, analysis_map[(resource, glob_key)])


            if 1 in c.runCWAS:

                if not c.runOnGrid:

                    from CPAC.pipeline.cpac_cwas_pipeline import prep_cwas_workflow
                    prep_cwas_workflow(c, analysis_map[(resource, glob_key)])

                else:
                    if 'sge' in c.resourceManager.lower():
                        run_sge_jobs(c, config_file, resource, analysis_map[(resource, glob_key)])

                    elif 'pbs' in c.resourceManager.lower():
                        run_pbs_jobs(c, config_file, resource, analysis_map[(resource, glob_key)])

            print >>timing, "Group analysis workflow completed for resource: ", resource
            print >>timing, "Elapsed run time (minutes): ", ((time.time() - wf_start_time)/60)
            print >>timing, ""



    
    for resource, glob_key in analysis_map_gp.keys():

        if resource in c.derivativeList:

            wf_start_time = time.time()


            if 1 in c.runGroupAnalysis:
              
                #get all the motion parameters across subjects

                try:

                    from CPAC.utils import extract_parameters
                    extract_parameters.run(c.outputDirectory)

                except Exception:

                    print "Extract parameters script did not run correctly"

                
                if not c.runOnGrid:

                    from CPAC.pipeline.cpac_group_analysis_pipeline import prep_group_analysis_workflow
                    
                    #procss = Process(target=prep_group_analysis_workflow, args=(c, resource, analysis_map_gp[(resource, glob_key)]))
                    
                    #print c, "   ", resource, "   ", analysis_map_gp[(resource, glob_key)], "   ", glob_key
                    prep_group_analysis_workflow(c, resource, analysis_map_gp[(resource, glob_key)])
                    
                

                if c.runOnGrid:

                    if 'sge' in c.resourceManager.lower():
                        
                        run_sge_jobs(c, config_file, resource, analysis_map_gp[(resource, glob_key)])
                       
                    elif 'pbs' in c.resourceManager.lower():
                     
                        run_pbs_jobs(c, config_file, resource, analysis_map_gp[(resource, glob_key)])


                print >>timing, "Group analysis workflow completed for resource: ", resource
                print >>timing, "Elapsed run time (minutes): ", ((time.time() - wf_start_time)/60)
                print >>timing, ""
        
                # This can be implemented more sleekly using a dictionary, have to do this at some point
                if resource == 'sca_roi_Z_to_standard_smooth':
                    sca_roi_runs += 1
                    sca_roi_time = sca_roi_time + ((time.time() - wf_start_time)/60)
                elif resource == 'sca_seed_Z_to_standard_smooth':
                    sca_seed_runs += 1
                    sca_seed_time = sca_seed_time + ((time.time() - wf_start_time)/60)
                elif resource == 'sca_tempreg_maps_z_files_smooth':
                    sca_tempreg_runs += 1
                    sca_tempreg_time = sca_tempreg_time + ((time.time() - wf_start_time)/60)
                elif resource == 'dr_tempreg_maps_z_files_smooth':
                    dr_tempreg_runs += 1
                    dr_tempreg_time = dr_tempreg_time + ((time.time() - wf_start_time)/60)
                elif resource == 'vmhc_z_score_stat_map':
                    vmhc_z_runs += 1
                    vmhc_z_time = vmhc_z_time + ((time.time() - wf_start_time)/60)
                elif resource == 'alff_Z_to_standard_smooth':
                    alff_Z_runs += 1
                    alff_Z_time = alff_Z_time + ((time.time() - wf_start_time)/60)
                elif resource == 'falff_Z_to_standard_smooth':
                    falff_Z_runs += 1
                    falff_Z_time = falff_Z_time + ((time.time() - wf_start_time)/60)
                elif resource == 'reho_Z_to_standard_smooth':
                    reho_Z_runs += 1
                    reho_Z_time = reho_Z_time + ((time.time() - wf_start_time)/60)
                elif resource == 'centrality_outputs_smoothed':
                    centrality_outputs_runs += 1
                    centrality_outputs_time = centrality_outputs_time + ((time.time() - wf_start_time)/60)
    '''

            
    procss = []
        
    for resource, glob_key in analysis_map_gp.keys():
        
        if resource in c.derivativeList:
            
            if 1 in c.runGroupAnalysis:
            
                #get all the motion parameters across subjects
                try:

                    from CPAC.utils import extract_parameters
                    extract_parameters.run(c.outputDirectory)

                except Exception:
                    print "Extract parameters script did not run correctly"

                if not c.runOnGrid:
                    
                    from CPAC.pipeline.cpac_group_analysis_pipeline import prep_group_analysis_workflow
                    procss.append(Process(target=prep_group_analysis_workflow, args=(c, resource, analysis_map_gp[(resource, glob_key)])))
          
          
          
    pid = open(os.path.join(c.outputDirectory, 'pid_group.txt'), 'w')
                        
    jobQueue = []
    if len(c.derivativeList) <= c.numSubjectsAtOnce:
        """
        Stream all the subjects as sublist is
        less than or equal to the number of 
        subjects that need to run
        """
        for p in procss:
            p.start()
            print >>pid,p.pid
                
    else:
        """
        Stream the subject workflows for preprocessing.
        At Any time in the pipeline c.numSubjectsAtOnce
        will run, unless the number remaining is less than
        the value of the parameter stated above
        """
        idx = 0
        while(idx < len(c.derivativeList)):
                
            if len(jobQueue) == 0 and idx == 0:
                
                idc = idx
                    
                for p in procss[idc: idc + c.numSubjectsAtOnce]:
                
                    p.start()
                    print >>pid,p.pid
                    jobQueue.append(p)
                    idx += 1
                
            else:
                
                for job in jobQueue:
                
                    if not job.is_alive():
                        print 'found dead job ', job
                        loc = jobQueue.index(job)
                        del jobQueue[loc]
                        procss[idx].start()
                
                        jobQueue.append(procss[idx])
                        idx += 1
                
    pid.close()
    '''
            
            
            
    print >>timing, "Entire group analysis run complete."
    print >>timing, "Elapsed run time (minutes): ", ((time.time() - gpa_start_time)/60)
    print >>timing, ""

    print >>timing, "sca_roi_Z_to_standard_smooth"
    print >>timing, "Number of runs: ", sca_roi_runs
    print >>timing, "Total run time (minutes): ", sca_roi_time
    print >>timing, ""

    print >>timing, "sca_seed_Z_to_standard_smooth"
    print >>timing, "Number of runs: ", sca_seed_runs
    print >>timing, "Total run time (minutes): ", sca_seed_time
    print >>timing, ""

    print >>timing, "sca_tempreg_maps_z_files_smooth"
    print >>timing, "Number of runs: ", sca_tempreg_runs
    print >>timing, "Total run time (minutes): ", sca_tempreg_time
    print >>timing, ""

    print >>timing, "dr_tempreg_maps_z_files_smooth"
    print >>timing, "Number of runs: ", dr_tempreg_runs
    print >>timing, "Total run time (minutes): ", dr_tempreg_time
    print >>timing, ""

    print >>timing, "vmhc_z_score_stat_map"
    print >>timing, "Number of runs: ", vmhc_z_runs
    print >>timing, "Total run time (minutes): ", vmhc_z_time
    print >>timing, ""

    print >>timing, "alff_Z_to_standard_smooth"
    print >>timing, "Number of runs: ", alff_Z_runs
    print >>timing, "Total run time (minutes): ", alff_Z_time
    print >>timing, ""

    print >>timing, "falff_Z_to_standard_smooth"
    print >>timing, "Number of runs: ", falff_Z_runs
    print >>timing, "Total run time (minutes): ", falff_Z_time
    print >>timing, ""

    print >>timing, "reho_Z_to_standard_smooth"
    print >>timing, "Number of runs: ", reho_Z_runs
    print >>timing, "Total run time (minutes): ", reho_Z_time
    print >>timing, ""

    print >>timing, "centrality_outputs_smoothed"
    print >>timing, "Number of runs: ", centrality_outputs_runs
    print >>timing, "Total run time (minutes): ", centrality_outputs_time
    print >>timing, ""



    timing.close()
コード例 #2
0
def run(config_file, subject_list_file, output_path_file):
    
    # Runs group analysis

    import yaml

    # Load the config file into 'c'
    c = Configuration(yaml.load(open(os.path.realpath(config_file), 'r')))


    # load the subject list (in the main GUI window, not the group analysis
    # one), and parse the yaml so that the subIDs and session IDs can be
    # accessed for below
    try:
        sublist = yaml.load(open(os.path.realpath(subject_list_file), 'r'))
    except:
        print "Subject list is not in proper YAML format. Please check your file"
        raise Exception


    subject_paths = []


    # 'output_path_file' is the wildcard-filled path to the 'Derivative Path
    # File' provided in the dialog box when group analysis is first run
    for file in glob.glob(os.path.abspath(output_path_file)):
        path_list = open(file, 'r').readlines()
        subject_paths.extend([s.rstrip('\r\n') for s in path_list])


    if len(subject_paths) == 0:
        print '[!] CPAC says: No individual-level analysis outputs were ' \
              'found given the path file you provided.\n\nDerivative ' \
              'Path File provided: ', output_path_file, '\n\nEither make ' \
              'sure your Derivative Path File is correctly formatted, or ' \
              'that individual-level analysis completed successfully and ' \
              'generated the \'path_files_here\' folder found in the ' \
              'output directory, then try again.\n\n'
        raise Exception


    if len(c.derivativeList) == 0:
        print '[!] CPAC says: You do not have any derivatives selected ' \
              'to run for group-level analysis. Return to your pipeline ' \
              'configuration file and select at least one.\n\n'
        raise Exception


    if len(c.modelConfigs) == 0:
        print '[!] CPAC says: You do not have any models selected ' \
              'to run for group-level analysis. Return to your pipeline ' \
              'configuration file and create or select at least one.\n\n'
        raise Exception



    # 'subject_paths' is a list of every output from every subject included
    # in the output folder of the run

    # converts the subject_paths list into a set to enforce no duplicates
    set_subject_paths = set(subject_paths)

    # converts the set back into a list
    subject_paths = list(set_subject_paths)


    #base_path = os.path.dirname(os.path.commonprefix(subject_paths))
    base_path = c.outputDirectory


    from collections import defaultdict
    analysis_map = defaultdict(list)
    analysis_map_gp = defaultdict(list)


    for subject_path in subject_paths:

        # each 'subject_path' is a full filepath to one of the output files

        # Remove the base bath offset
        rs_path = subject_path.replace(base_path, "", 1)
        rs_path = rs_path.lstrip('/')

        # rs_path is now the path to the output file, except everything before
        # the pipeline folder (named with the pipeline ID) is stripped from
        # the path

        folders = split_folders(rs_path)
 
        pipeline_id = folders[0]
        subject_unique_id = folders[1]
        resource_id = folders[2]
        scan_id = folders[3]


        # get list of all unique IDs (session IDs)
        # loop through them and check subject_path for existence of any of the
        # session IDs
        # if it exists, load it into unique_id

        for sub in sublist:
            if sub['subject_id'] in subject_unique_id:
                subject_id = sub['subject_id']
              

        # include all of the scans and sessions in one model if True
        if c.repeatedMeasures == True:
            key = subject_path.replace(subject_unique_id, '*')
            key = key.replace(scan_id, '*')
        else:
            # each group of subjects from each session go into their own
            # separate model, instead of combining all sessions into one
            try:
                key = subject_path.replace(subject_id, '*')
            except:
                # this fires if 'subject_id' was never given a value basically
                print '\n\n[!] CPAC says: The derivative path file you ' \
                      'provided does not contain the output directory ' \
                      'given in the pipeline configuration file.\n'
                print 'Derivative path file: ', output_path_file, '\n'
                print 'Output directory: ', c.outputDirectory, '\n'
                print 'Please correct this and try again.\n\n\n'
                raise Exception


        # 'resource_id' is each type of output
        # 'key' is a path to each and every individual output file,
        # except with the subject ID replaced with a wildcard (*)

        if resource_id in c.derivativeList:

            analysis_map[(resource_id, key)].append((pipeline_id, subject_id, scan_id, subject_path))

            analysis_map_gp[(resource_id, key)].append((pipeline_id, subject_id, scan_id, subject_path))


        # with this loop, 'analysis_map_gp' is a dictionary with a key for
        # each individual output file - and each entry is a list of tuples,
        # one tuple for each subject in the subject list, containing
        # 'subject_path', which is a full path to that output file for that
        # one particular subject



    for resource, glob_key in analysis_map.keys():
        if resource == 'functional_mni':


            if 1 in c.runBASC:

                if not c.runOnGrid:
                    from CPAC.pipeline.cpac_basc_pipeline import prep_basc_workflow
                    prep_basc_workflow(c, analysis_map[(resource, glob_key)])
                else:
                    if 'sge' in c.resourceManager.lower():
                        run_sge_jobs(c, config_file, resource, analysis_map[(resource, glob_key)])

                    elif 'pbs' in c.resourceManager.lower():
                        run_pbs_jobs(c, config_file, resource, analysis_map[(resource, glob_key)])


            if 1 in c.runCWAS:

                if not c.runOnGrid:

                    from CPAC.pipeline.cpac_cwas_pipeline import prep_cwas_workflow
                    prep_cwas_workflow(c, analysis_map[(resource, glob_key)])

                else:
                    if 'sge' in c.resourceManager.lower():
                        run_sge_jobs(c, config_file, resource, analysis_map[(resource, glob_key)])

                    elif 'pbs' in c.resourceManager.lower():
                        run_pbs_jobs(c, config_file, resource, analysis_map[(resource, glob_key)])



    procss = []
    

    for resource, glob_key in analysis_map_gp.keys():

        # 'resource' is each type of output
        # 'glob_key' is a path to each and every individual output file,
        # except with the subject ID replaced with a wildcard (*)
        
        if resource in c.derivativeList:
                 
            #get all the motion parameters across subjects
            try:

                from CPAC.utils import extract_parameters
                extract_parameters.run(c.outputDirectory, c.runScrubbing)

            except:
                print '\n\n [!] CPAC says: Extract parameters script did ' \
                      'not run correctly.\n\n'
                raise Exception

            if not c.runOnGrid:
                    
                from CPAC.pipeline.cpac_group_analysis_pipeline import prep_group_analysis_workflow
                procss.append(Process(target=prep_group_analysis_workflow, args=(c, resource, analysis_map_gp[(resource, glob_key)])))

       
          
    
    
          
    pid = open(os.path.join(c.outputDirectory, 'pid_group.txt'), 'w')
                        
    jobQueue = []
    if len(procss) <= c.numGPAModelsAtOnce:
        """
        Stream all the subjects as sublist is
        less than or equal to the number of 
        subjects that need to run
        """
        for p in procss:
            p.start()
            print >>pid,p.pid
                
    else:
        """
        Stream the subject workflows for preprocessing.
        At Any time in the pipeline c.numSubjectsAtOnce
        will run, unless the number remaining is less than
        the value of the parameter stated above
        """
        idx = 0
        while(idx < len(procss)):
                
            if len(jobQueue) == 0 and idx == 0:
                
                idc = idx
                    
                for p in procss[idc: idc + c.numGPAModelsAtOnce]:
                
                    p.start()
                    print >>pid,p.pid
                    jobQueue.append(p)
                    idx += 1
                
            else:
                
                for job in jobQueue:
                
                    if not job.is_alive():
                        print 'found dead job ', job
                        loc = jobQueue.index(job)
                        del jobQueue[loc]
                        procss[idx].start()
                
                        jobQueue.append(procss[idx])
                        idx += 1
                
    pid.close()
コード例 #3
0
def run(config_file, subject_list_file, output_path_file):

    # Runs group analysis

    import yaml

    # Load the config file into 'c'
    c = Configuration(yaml.load(open(os.path.realpath(config_file), 'r')))

    # load the subject list (in the main GUI window, not the group analysis
    # one), and parse the yaml so that the subIDs and session IDs can be
    # accessed for below
    try:
        sublist = yaml.load(open(os.path.realpath(subject_list_file), 'r'))
    except:
        print "Subject list is not in proper YAML format. Please check your file"
        raise Exception

    subject_paths = []

    # 'output_path_file' is the wildcard-filled path to the 'Derivative Path
    # File' provided in the dialog box when group analysis is first run
    #for file in glob.glob(os.path.abspath(output_path_file)):
    #    path_list = open(file, 'r').readlines()
    #    subject_paths.extend([s.rstrip('\r\n') for s in path_list])

    ind_outputs = [
        'alff_to_standard_zstd', 'alff_to_standard_smooth_zstd',
        'falff_to_standard_zstd', 'falff_to_standard_smooth_zstd',
        'reho_to_standard_zstd', 'reho_to_standard_smooth_zstd',
        'sca_roi_files_to_standard_fisher_zstd',
        'sca_roi_files_to_standard_smooth_fisher_zstd',
        'sca_seed_to_standard_fisher_zstd',
        'sca_seed_to_standard_smooth_fisher_zstd',
        'sca_tempreg_maps_zstat_files_smooth', 'vmhc_fisher_zstd',
        'vmhc_fisher_zstd_zstat_map', 'centrality_outputs_zstd',
        'centrality_outputs_smoothed_zstd',
        'dr_tempreg_maps_files_to_standard',
        'dr_tempreg_maps_files_to_standard_smooth',
        'dr_tempreg_maps_zstat_files_to_standard',
        'dr_tempreg_maps_zstat_files_to_standard_smooth', 'alff_to_standard',
        'alff_to_standard_smooth', 'falff_to_standard',
        'falff_to_standard_smooth', 'reho_to_standard',
        'reho_to_standard_smooth', 'sca_roi_files_to_standard',
        'sca_roi_files_to_standard_smooth', 'sca_seed_to_standard',
        'sca_seed_to_standard_smooth', 'sca_tempreg_maps_files',
        'sca_tempreg_maps_files_smooth', 'sca_tempreg_maps_zstat_files',
        'sca_tempreg_maps_zstat_files_smooth', 'vmhc_raw_score',
        'centrality_outputs', 'centrality_outputs_smoothed',
        'dr_tempreg_maps_files_to_standard',
        'dr_tempreg_maps_files_to_standard_smooth',
        'dr_tempreg_maps_zstat_files_to_standard',
        'dr_tempreg_maps_zstat_files_to_standard_smooth'
    ]

    # collect all of the output paths

    for root, folders, files in os.walk(output_path_file):

        split_output_dir_path = output_path_file.split("/")

        for filename in files:

            if filename.endswith("nii.gz"):

                fullpath = os.path.join(root, filename)

                split_fullpath = fullpath.split("/")

                #subID = split_fullpath[len(split_output_dir_path)]
                deriv_folder_name = split_fullpath[len(split_output_dir_path) +
                                                   1]

                #second_half_filepath = fullpath.split(subID)

                for output_name in ind_outputs:

                    if output_name == deriv_folder_name:

                        subject_paths.append(fullpath)

    if len(subject_paths) == 0:
        print '[!] CPAC says: No individual-level analysis outputs were ' \
              'found given the path file you provided.\n\nPipeline Output ' \
              'Directory provided: ', output_path_file, '\n\nEither make ' \
              'sure your Output Directory path is correct, or that ' \
              'individual-level analysis completed successfully.\n\n'
        raise Exception

    if len(c.modelConfigs) == 0:
        print '[!] CPAC says: You do not have any models selected ' \
              'to run for group-level analysis. Return to your pipeline ' \
              'configuration file and create or select at least one.\n\n'
        raise Exception

    # 'subject_paths' is a list of every output from every subject included
    # in the output folder of the run

    # converts the subject_paths list into a set to enforce no duplicates
    set_subject_paths = set(subject_paths)

    # converts the set back into a list
    subject_paths = list(set_subject_paths)

    #base_path = os.path.dirname(os.path.commonprefix(subject_paths))
    base_path = c.outputDirectory

    from collections import defaultdict
    analysis_map = defaultdict(list)
    analysis_map_gp = defaultdict(list)


    print "Parsing through output paths. This may take a little while " \
          "depending on how many subjects, group analysis models, or " \
          "selected derivatives you have..\n"

    count = 0

    for subject_path in subject_paths:

        # each 'subject_path' is a full filepath to one of the output files

        # Remove the base bath offset
        #rs_path = subject_path.replace(base_path, "", 1)
        #rs_path = rs_path.lstrip('/')

        # rs_path is now the path to the output file, except everything before
        # the pipeline folder (named with the pipeline ID) is stripped from
        # the path

        #folders = split_folders(rs_path)

        #pipeline_id = folders[0]
        #subject_unique_id = folders[1]
        #resource_id = folders[2]
        #scan_id = folders[3]

        split_output_dir_path = output_path_file.split("/")
        split_fullpath = subject_path.split("/")

        pipeline_id = split_fullpath[len(split_output_dir_path) - 1]
        subject_unique_id = split_fullpath[len(split_output_dir_path)]
        resource_id = split_fullpath[len(split_output_dir_path) + 1]
        scan_id = split_fullpath[len(split_output_dir_path) + 2]

        # add auxiliary stuff to resource_id if applicable

        if ("_mask_" in subject_path) and (("sca_roi" in subject_path) or \
            ("sca_tempreg" in subject_path)):

            for dirname in split_fullpath:
                if "_mask_" in dirname:
                    maskname = dirname

            filename = split_fullpath[-1]

            if ".nii.gz" in filename:
                filename = filename.replace(".nii.gz", "")
            elif ".nii" in filename:
                filename = filename.replace(".nii", "")

            resource_name = resource_id + "_%s_%s" % (maskname, filename)


        elif ("_spatial_map_" in subject_path) and \
            ("dr_tempreg" in subject_path):

            for dirname in split_fullpath:
                if "_spatial_map_" in dirname:
                    mapname = dirname

            filename = split_fullpath[-1]

            if ".nii.gz" in filename:
                filename = filename.replace(".nii.gz", "")
            elif ".nii" in filename:
                filename = filename.replace(".nii", "")

            resource_name = resource_id + "_%s_%s" % (mapname, filename)

        elif ("_mask_" in subject_path) and ("centrality" in subject_path):

            for dirname in split_fullpath:
                if "_mask_" in dirname:
                    maskname = dirname

            filename = split_fullpath[-1]

            if ".nii.gz" in filename:
                filename = filename.replace(".nii.gz", "")
            elif ".nii" in filename:
                filename = filename.replace(".nii", "")

            resource_name = resource_id + "_%s_%s" % (maskname, filename)

        else:

            resource_name = resource_id

        # get list of all unique IDs (session IDs)
        # loop through them and check subject_path for existence of any of the
        # session IDs
        # if it exists, load it into unique_id

        # init subject_id to None
        subject_id = None
        for sub in sublist:
            if sub['subject_id'] in subject_unique_id:
                subject_id = sub['subject_id']

        # If subject_id never gets set for this specific subject, move on to next subject
        if not subject_id:
            continue

        # 'resource_id' is each type of output
        # 'key' is a path to each and every individual output file,
        # except with the subject ID replaced with a wildcard (*)

        # loop here to replace the one below it:
        #     go through model configs, make a list of all ders included
        #     enumerate list of selected derivatives and the models they are in
        #     like: (resource_id, group_model, key)
        for group_config_file in c.modelConfigs:

            try:
                ga_config = Configuration(
                    yaml.load(open(os.path.realpath(group_config_file), 'r')))
            except:
                raise Exception(
                    "\n\nError in reading %s configuration file\n\n" %
                    group_config_file)

            if len(ga_config.derivative_list) == 0:
                print '[!] CPAC says: You do not have any derivatives selected ' \
                      'to run for group-level analysis. Return to your group-analysis ' \
                      'configuration file and select at least one.'
                print 'Group analysis configuration file: %s\n\n' % group_config_file
                raise Exception

            if resource_id in ga_config.derivative_list:

                # include all of the scans and sessions in one model if True
                if ga_config.repeated_measures == True:
                    key = subject_path.replace(subject_unique_id, '*')
                    key = key.replace(scan_id, '*')
                else:
                    # each group of subjects from each session go into their own
                    # separate model, instead of combining all sessions into one
                    try:
                        key = subject_path.replace(subject_id, '*')
                    except:
                        # this fires if 'subject_id' was never given a value basically
                        print '\n\n[!] CPAC says: Either the derivative path file ' \
                              'you provided does not contain the output directory ' \
                              'given in the pipeline configuration file.\n'
                        print 'Derivative path file: ', output_path_file, '\n'
                        print 'Output directory: ', c.outputDirectory, '\n'
                        print '- OR -\n'
                        print 'Your subject list does not contain all of the ' \
                              'subjects you wish to run group-level analysis on.\n'
                        print 'Please correct this and try again.\n\n\n'
                        raise Exception

                analysis_map[(resource_name, group_config_file, key)].append(
                    (pipeline_id, subject_id, scan_id, subject_path))

                analysis_map_gp[(resource_name, group_config_file,
                                 key)].append((pipeline_id, subject_id,
                                               scan_id, subject_path))

        count += 1

        if count == int(len(subject_paths) * 0.7):
            print "Almost finished parsing output paths.."

        # with this loop, 'analysis_map_gp' is a dictionary with a key for
        # each individual output file - and each entry is a list of tuples,
        # one tuple for each subject in the subject list, containing
        # 'subject_path', which is a full path to that output file for that
        # one particular subject

    print "Finished parsing through output paths!\n"

    for resource, group_model, glob_key in analysis_map.keys():
        if resource == 'functional_mni':

            if 1 in c.runBASC:

                if not c.runOnGrid:
                    from CPAC.pipeline.cpac_basc_pipeline import prep_basc_workflow
                    prep_basc_workflow(
                        c, analysis_map[(resource, group_model, glob_key)])
                else:
                    if 'sge' in c.resourceManager.lower():
                        run_sge_jobs(
                            c, config_file, resource,
                            analysis_map[(resource, group_model, glob_key)])

                    elif 'pbs' in c.resourceManager.lower():
                        run_pbs_jobs(
                            c, config_file, resource,
                            analysis_map[(resource, group_model, glob_key)])

            if 1 in c.runCWAS:

                if not c.runOnGrid:

                    from CPAC.pipeline.cpac_cwas_pipeline import prep_cwas_workflow
                    prep_cwas_workflow(
                        c, analysis_map[(resource, group_model, glob_key)])

                else:
                    if 'sge' in c.resourceManager.lower():
                        run_sge_jobs(
                            c, config_file, resource,
                            analysis_map[(resource, group_model, glob_key)])

                    elif 'pbs' in c.resourceManager.lower():
                        run_pbs_jobs(
                            c, config_file, resource,
                            analysis_map[(resource, group_model, glob_key)])

    procss = []

    for resource, group_model, glob_key in analysis_map_gp.keys():

        # 'resource' is each type of output
        # 'glob_key' is a path to each and every individual output file,
        # except with the subject ID replaced with a wildcard (*)

        #get all the motion parameters across subjects

        print "Pulling motion parameters for all subjects..\n"

        from CPAC.utils import extract_parameters
        scrub_threshold = extract_parameters.run(c.outputDirectory,
                                                 c.runScrubbing)

        if not c.runOnGrid:

            print "Starting group analysis pipeline setup..\n"

            from CPAC.pipeline.cpac_ga_model_generator import prep_group_analysis_workflow
            procss.append(
                Process(target=prep_group_analysis_workflow,
                        args=(c, group_model, resource,
                              analysis_map_gp[(resource, group_model,
                                               glob_key)], scrub_threshold)))

        else:

            print "\n\n[!] CPAC says: Group-level analysis has not yet " \
                  "been implemented to handle runs on a cluster or grid.\n\n"\
                  "Please turn off 'Run CPAC On A Cluster/Grid' in order " \
                  "to continue with group-level analysis. This will submit " \
                  "the job to only one node, however.\n\nWe will update " \
                  "users on when this feature will be available through " \
                  "release note announcements.\n\n"

    pid = open(os.path.join(c.outputDirectory, 'pid_group.txt'), 'w')

    jobQueue = []
    if len(procss) <= c.numGPAModelsAtOnce:
        """
        Stream all the subjects as sublist is
        less than or equal to the number of 
        subjects that need to run
        """
        for p in procss:
            p.start()
            print >> pid, p.pid

    else:
        """
        Stream the subject workflows for preprocessing.
        At Any time in the pipeline c.numSubjectsAtOnce
        will run, unless the number remaining is less than
        the value of the parameter stated above
        """
        idx = 0
        while (idx < len(procss)):

            if len(jobQueue) == 0 and idx == 0:

                idc = idx

                for p in procss[idc:idc + c.numGPAModelsAtOnce]:

                    p.start()
                    print >> pid, p.pid
                    jobQueue.append(p)
                    idx += 1

            else:

                for job in jobQueue:

                    if not job.is_alive():
                        print 'found dead job ', job
                        loc = jobQueue.index(job)
                        del jobQueue[loc]
                        procss[idx].start()

                        jobQueue.append(procss[idx])
                        idx += 1

    pid.close()
コード例 #4
0
ファイル: cpac_group_runner.py プロジェクト: krsna6/C-PAC
def run(config_file, output_path_file):

    # Runs group analysis

    import yaml

    # Load the config file into 'c'
    c = Configuration(yaml.load(open(os.path.realpath(config_file), 'r')))

    subject_paths = []

    for file in glob.glob(os.path.abspath(output_path_file)):
        path_list = open(file, 'r').readlines()
        subject_paths.extend([s.rstrip('\r\n') for s in path_list])

    set_subject_paths = set(subject_paths)
    subject_paths = list(set_subject_paths)
    #base_path = os.path.dirname(os.path.commonprefix(subject_paths))
    base_path = c.outputDirectory

    from collections import defaultdict
    analysis_map = defaultdict(list)
    analysis_map_gp = defaultdict(list)

    for subject_path in subject_paths:
        #Remove the base bath offset

        rs_path = subject_path.replace(base_path, "", 1)

        rs_path = rs_path.lstrip('/')

        folders = split_folders(rs_path)

        pipeline_id = folders[0]
        subject_id = folders[1]
        resource_id = folders[2]
        scan_id = folders[3]

        #if scan_id == '_scan_rest_1_rest':

        key = subject_path.replace(subject_id, '*')
        analysis_map[(resource_id, key)].append(
            (pipeline_id, subject_id, scan_id, subject_path))

        # separate map for group analysis
        #if c.mixedScanAnalysis == True:
        #    key = key.replace(scan_id, '*')

        analysis_map_gp[(resource_id, key)].append(
            (pipeline_id, subject_id, scan_id, subject_path))

    gpa_start_datetime = strftime("%Y-%m-%d %H:%M:%S")
    gpa_starttime_string = gpa_start_datetime.replace(' ', '_')
    gpa_starttime_string = gpa_starttime_string.replace(':', '-')

    timing = open(
        os.path.join(
            c.outputDirectory, 'group_analysis_timing_%s_%s.txt' %
            (c.pipelineName, gpa_starttime_string)), 'wt')

    sca_roi_runs = 0
    sca_roi_time = 0
    sca_seed_runs = 0
    sca_seed_time = 0
    sca_tempreg_runs = 0
    sca_tempreg_time = 0
    dr_tempreg_runs = 0
    dr_tempreg_time = 0
    vmhc_z_runs = 0
    vmhc_z_time = 0
    alff_Z_runs = 0
    alff_Z_time = 0
    falff_Z_runs = 0
    falff_Z_time = 0
    reho_Z_runs = 0
    reho_Z_time = 0
    centrality_outputs_runs = 0
    centrality_outputs_time = 0

    # Start timing here
    gpa_start_time = time.time()

    for resource, glob_key in analysis_map.keys():
        if resource == 'functional_mni':

            wf_start_time = time.time()

            if 1 in c.runBASC:

                if not c.runOnGrid:
                    from CPAC.pipeline.cpac_basc_pipeline import prep_basc_workflow
                    prep_basc_workflow(c, analysis_map[(resource, glob_key)])
                else:
                    if 'sge' in c.resourceManager.lower():
                        run_sge_jobs(c, config_file, resource,
                                     analysis_map[(resource, glob_key)])

                    elif 'pbs' in c.resourceManager.lower():
                        run_pbs_jobs(c, config_file, resource,
                                     analysis_map[(resource, glob_key)])

            if 1 in c.runCWAS:

                if not c.runOnGrid:

                    from CPAC.pipeline.cpac_cwas_pipeline import prep_cwas_workflow
                    prep_cwas_workflow(c, analysis_map[(resource, glob_key)])

                else:
                    if 'sge' in c.resourceManager.lower():
                        run_sge_jobs(c, config_file, resource,
                                     analysis_map[(resource, glob_key)])

                    elif 'pbs' in c.resourceManager.lower():
                        run_pbs_jobs(c, config_file, resource,
                                     analysis_map[(resource, glob_key)])

            print >> timing, "Group analysis workflow completed for resource: ", resource
            print >> timing, "Elapsed run time (minutes): ", (
                (time.time() - wf_start_time) / 60)
            print >> timing, ""

    for resource, glob_key in analysis_map_gp.keys():

        if resource in c.derivativeList:

            wf_start_time = time.time()

            if 1 in c.runGroupAnalysis:

                #get all the motion parameters across subjects

                try:

                    from CPAC.utils import extract_parameters
                    extract_parameters.run(c.outputDirectory)

                except Exception:

                    print "Extract parameters script did not run correctly"

                if not c.runOnGrid:

                    from CPAC.pipeline.cpac_group_analysis_pipeline import prep_group_analysis_workflow

                    #procss = Process(target=prep_group_analysis_workflow, args=(c, resource, analysis_map_gp[(resource, glob_key)]))

                    #print c, "   ", resource, "   ", analysis_map_gp[(resource, glob_key)], "   ", glob_key
                    prep_group_analysis_workflow(
                        c, resource, analysis_map_gp[(resource, glob_key)])

                if c.runOnGrid:

                    if 'sge' in c.resourceManager.lower():

                        run_sge_jobs(c, config_file, resource,
                                     analysis_map_gp[(resource, glob_key)])

                    elif 'pbs' in c.resourceManager.lower():

                        run_pbs_jobs(c, config_file, resource,
                                     analysis_map_gp[(resource, glob_key)])

                print >> timing, "Group analysis workflow completed for resource: ", resource
                print >> timing, "Elapsed run time (minutes): ", (
                    (time.time() - wf_start_time) / 60)
                print >> timing, ""

                # This can be implemented more sleekly using a dictionary, have to do this at some point
                if resource == 'sca_roi_Z_to_standard_smooth':
                    sca_roi_runs += 1
                    sca_roi_time = sca_roi_time + (
                        (time.time() - wf_start_time) / 60)
                elif resource == 'sca_seed_Z_to_standard_smooth':
                    sca_seed_runs += 1
                    sca_seed_time = sca_seed_time + (
                        (time.time() - wf_start_time) / 60)
                elif resource == 'sca_tempreg_maps_z_files_smooth':
                    sca_tempreg_runs += 1
                    sca_tempreg_time = sca_tempreg_time + (
                        (time.time() - wf_start_time) / 60)
                elif resource == 'dr_tempreg_maps_z_files_smooth':
                    dr_tempreg_runs += 1
                    dr_tempreg_time = dr_tempreg_time + (
                        (time.time() - wf_start_time) / 60)
                elif resource == 'vmhc_z_score_stat_map':
                    vmhc_z_runs += 1
                    vmhc_z_time = vmhc_z_time + (
                        (time.time() - wf_start_time) / 60)
                elif resource == 'alff_Z_to_standard_smooth':
                    alff_Z_runs += 1
                    alff_Z_time = alff_Z_time + (
                        (time.time() - wf_start_time) / 60)
                elif resource == 'falff_Z_to_standard_smooth':
                    falff_Z_runs += 1
                    falff_Z_time = falff_Z_time + (
                        (time.time() - wf_start_time) / 60)
                elif resource == 'reho_Z_to_standard_smooth':
                    reho_Z_runs += 1
                    reho_Z_time = reho_Z_time + (
                        (time.time() - wf_start_time) / 60)
                elif resource == 'centrality_outputs_smoothed':
                    centrality_outputs_runs += 1
                    centrality_outputs_time = centrality_outputs_time + (
                        (time.time() - wf_start_time) / 60)
    '''

            
    procss = []
        
    for resource, glob_key in analysis_map_gp.keys():
        
        if resource in c.derivativeList:
            
            if 1 in c.runGroupAnalysis:
            
                #get all the motion parameters across subjects
                try:

                    from CPAC.utils import extract_parameters
                    extract_parameters.run(c.outputDirectory)

                except Exception:
                    print "Extract parameters script did not run correctly"

                if not c.runOnGrid:
                    
                    from CPAC.pipeline.cpac_group_analysis_pipeline import prep_group_analysis_workflow
                    procss.append(Process(target=prep_group_analysis_workflow, args=(c, resource, analysis_map_gp[(resource, glob_key)])))
          
          
          
    pid = open(os.path.join(c.outputDirectory, 'pid_group.txt'), 'w')
                        
    jobQueue = []
    if len(c.derivativeList) <= c.numSubjectsAtOnce:
        """
        Stream all the subjects as sublist is
        less than or equal to the number of 
        subjects that need to run
        """
        for p in procss:
            p.start()
            print >>pid,p.pid
                
    else:
        """
        Stream the subject workflows for preprocessing.
        At Any time in the pipeline c.numSubjectsAtOnce
        will run, unless the number remaining is less than
        the value of the parameter stated above
        """
        idx = 0
        while(idx < len(c.derivativeList)):
                
            if len(jobQueue) == 0 and idx == 0:
                
                idc = idx
                    
                for p in procss[idc: idc + c.numSubjectsAtOnce]:
                
                    p.start()
                    print >>pid,p.pid
                    jobQueue.append(p)
                    idx += 1
                
            else:
                
                for job in jobQueue:
                
                    if not job.is_alive():
                        print 'found dead job ', job
                        loc = jobQueue.index(job)
                        del jobQueue[loc]
                        procss[idx].start()
                
                        jobQueue.append(procss[idx])
                        idx += 1
                
    pid.close()
    '''

    print >> timing, "Entire group analysis run complete."
    print >> timing, "Elapsed run time (minutes): ", (
        (time.time() - gpa_start_time) / 60)
    print >> timing, ""

    print >> timing, "sca_roi_Z_to_standard_smooth"
    print >> timing, "Number of runs: ", sca_roi_runs
    print >> timing, "Total run time (minutes): ", sca_roi_time
    print >> timing, ""

    print >> timing, "sca_seed_Z_to_standard_smooth"
    print >> timing, "Number of runs: ", sca_seed_runs
    print >> timing, "Total run time (minutes): ", sca_seed_time
    print >> timing, ""

    print >> timing, "sca_tempreg_maps_z_files_smooth"
    print >> timing, "Number of runs: ", sca_tempreg_runs
    print >> timing, "Total run time (minutes): ", sca_tempreg_time
    print >> timing, ""

    print >> timing, "dr_tempreg_maps_z_files_smooth"
    print >> timing, "Number of runs: ", dr_tempreg_runs
    print >> timing, "Total run time (minutes): ", dr_tempreg_time
    print >> timing, ""

    print >> timing, "vmhc_z_score_stat_map"
    print >> timing, "Number of runs: ", vmhc_z_runs
    print >> timing, "Total run time (minutes): ", vmhc_z_time
    print >> timing, ""

    print >> timing, "alff_Z_to_standard_smooth"
    print >> timing, "Number of runs: ", alff_Z_runs
    print >> timing, "Total run time (minutes): ", alff_Z_time
    print >> timing, ""

    print >> timing, "falff_Z_to_standard_smooth"
    print >> timing, "Number of runs: ", falff_Z_runs
    print >> timing, "Total run time (minutes): ", falff_Z_time
    print >> timing, ""

    print >> timing, "reho_Z_to_standard_smooth"
    print >> timing, "Number of runs: ", reho_Z_runs
    print >> timing, "Total run time (minutes): ", reho_Z_time
    print >> timing, ""

    print >> timing, "centrality_outputs_smoothed"
    print >> timing, "Number of runs: ", centrality_outputs_runs
    print >> timing, "Total run time (minutes): ", centrality_outputs_time
    print >> timing, ""

    timing.close()
コード例 #5
0
ファイル: cpac_group_runner.py プロジェクト: danlurie/C-PAC
def run(config_file, subject_list_file, output_path_file):
    
    # Runs group analysis

    import yaml

    # Load the config file into 'c'
    c = Configuration(yaml.load(open(os.path.realpath(config_file), 'r')))


    # load the subject list (in the main GUI window, not the group analysis
    # one), and parse the yaml so that the subIDs and session IDs can be
    # accessed for below
    try:
        sublist = yaml.load(open(os.path.realpath(subject_list_file), 'r'))
    except:
        print "Subject list is not in proper YAML format. Please check your file"
        raise Exception


    subject_paths = []


    # 'output_path_file' is the wildcard-filled path to the 'Derivative Path
    # File' provided in the dialog box when group analysis is first run
    #for file in glob.glob(os.path.abspath(output_path_file)):
    #    path_list = open(file, 'r').readlines()
    #    subject_paths.extend([s.rstrip('\r\n') for s in path_list])
        
           
    ind_outputs = ['alff_to_standard_zstd', 'alff_to_standard_smooth_zstd', 'falff_to_standard_zstd', 'falff_to_standard_smooth_zstd', 'reho_to_standard_zstd', 'reho_to_standard_smooth_zstd', 'sca_roi_files_to_standard_fisher_zstd', 'sca_roi_files_to_standard_smooth_fisher_zstd', 'sca_seed_to_standard_fisher_zstd', 'sca_seed_to_standard_smooth_fisher_zstd', 'sca_tempreg_maps_zstat_files_smooth', 'vmhc_fisher_zstd', 'vmhc_fisher_zstd_zstat_map', 'centrality_outputs_zstd', 'centrality_outputs_smoothed_zstd', 'dr_tempreg_maps_files_to_standard', 'dr_tempreg_maps_files_to_standard_smooth', 'dr_tempreg_maps_zstat_files_to_standard', 'dr_tempreg_maps_zstat_files_to_standard_smooth', 'alff_to_standard', 'alff_to_standard_smooth', 'falff_to_standard', 'falff_to_standard_smooth', 'reho_to_standard', 'reho_to_standard_smooth', 'sca_roi_files_to_standard', 'sca_roi_files_to_standard_smooth', 'sca_seed_to_standard', 'sca_seed_to_standard_smooth', 'sca_tempreg_maps_files', 'sca_tempreg_maps_files_smooth', 'sca_tempreg_maps_zstat_files', 'sca_tempreg_maps_zstat_files_smooth', 'vmhc_raw_score', 'centrality_outputs', 'centrality_outputs_smoothed', 'dr_tempreg_maps_files_to_standard', 'dr_tempreg_maps_files_to_standard_smooth', 'dr_tempreg_maps_zstat_files_to_standard', 'dr_tempreg_maps_zstat_files_to_standard_smooth']
            
            
    
    # collect all of the output paths
    
    for root, folders, files in os.walk(output_path_file):
    
        split_output_dir_path = output_path_file.split("/")
    
        for filename in files:
        
            if filename.endswith("nii.gz"):
    
                fullpath = os.path.join(root, filename)
            
                split_fullpath = fullpath.split("/")
                
                #subID = split_fullpath[len(split_output_dir_path)]
                deriv_folder_name = split_fullpath[len(split_output_dir_path)+1]
            
                #second_half_filepath = fullpath.split(subID)
            
                for output_name in ind_outputs:
            
                    if output_name == deriv_folder_name:
        
                        subject_paths.append(fullpath)  
        


    if len(subject_paths) == 0:
        print '[!] CPAC says: No individual-level analysis outputs were ' \
              'found given the path file you provided.\n\nPipeline Output ' \
              'Directory provided: ', output_path_file, '\n\nEither make ' \
              'sure your Output Directory path is correct, or that ' \
              'individual-level analysis completed successfully.\n\n'
        raise Exception


    if len(c.modelConfigs) == 0:
        print '[!] CPAC says: You do not have any models selected ' \
              'to run for group-level analysis. Return to your pipeline ' \
              'configuration file and create or select at least one.\n\n'
        raise Exception



    # 'subject_paths' is a list of every output from every subject included
    # in the output folder of the run

    # converts the subject_paths list into a set to enforce no duplicates
    set_subject_paths = set(subject_paths)

    # converts the set back into a list
    subject_paths = list(set_subject_paths)


    #base_path = os.path.dirname(os.path.commonprefix(subject_paths))
    base_path = c.outputDirectory


    from collections import defaultdict
    analysis_map = defaultdict(list)
    analysis_map_gp = defaultdict(list)


    print "Parsing through output paths. This may take a little while " \
          "depending on how many subjects, group analysis models, or " \
          "selected derivatives you have..\n"

    count = 0

    for subject_path in subject_paths:

        # each 'subject_path' is a full filepath to one of the output files

        # Remove the base bath offset
        #rs_path = subject_path.replace(base_path, "", 1)
        #rs_path = rs_path.lstrip('/')

        # rs_path is now the path to the output file, except everything before
        # the pipeline folder (named with the pipeline ID) is stripped from
        # the path

        #folders = split_folders(rs_path)
 
        #pipeline_id = folders[0]
        #subject_unique_id = folders[1]
        #resource_id = folders[2]
        #scan_id = folders[3]


        split_output_dir_path = output_path_file.split("/")
        split_fullpath = subject_path.split("/")

        pipeline_id = split_fullpath[len(split_output_dir_path)-1]
        subject_unique_id = split_fullpath[len(split_output_dir_path)]
        resource_id = split_fullpath[len(split_output_dir_path)+1]
        scan_id = split_fullpath[len(split_output_dir_path)+2]

        
        # add auxiliary stuff to resource_id if applicable
        
        if ("_mask_" in subject_path) and (("sca_roi" in subject_path) or \
            ("sca_tempreg" in subject_path)):
            
            for dirname in split_fullpath:
                if "_mask_" in dirname:
                    maskname = dirname
                    
            filename = split_fullpath[-1]
            
            if ".nii.gz" in filename:
                filename = filename.replace(".nii.gz","")
            elif ".nii" in filename:
                filename = filename.replace(".nii","")
            
            resource_name = resource_id + "_%s_%s" % (maskname, filename)

            
        elif ("_spatial_map_" in subject_path) and \
            ("dr_tempreg" in subject_path):
            
            for dirname in split_fullpath:
                if "_spatial_map_" in dirname:
                    mapname = dirname
                    
            filename = split_fullpath[-1]
            
            if ".nii.gz" in filename:
                filename = filename.replace(".nii.gz","")
            elif ".nii" in filename:
                filename = filename.replace(".nii","")
            
            resource_name = resource_id + "_%s_%s" % (mapname, filename)
            
            
        elif ("_mask_" in subject_path) and ("centrality" in subject_path):
            
            for dirname in split_fullpath:
                if "_mask_" in dirname:
                    maskname = dirname
                    
            filename = split_fullpath[-1]
            
            if ".nii.gz" in filename:
                filename = filename.replace(".nii.gz","")
            elif ".nii" in filename:
                filename = filename.replace(".nii","")
            
            resource_name = resource_id + "_%s_%s" % (maskname, filename)
            
            
        else:
        
            resource_name = resource_id


        # get list of all unique IDs (session IDs)
        # loop through them and check subject_path for existence of any of the
        # session IDs
        # if it exists, load it into unique_id

        # init subject_id to None
        subject_id = None
        for sub in sublist:
            if sub['subject_id'] in subject_unique_id:
                subject_id = sub['subject_id']

        # If subject_id never gets set for this specific subject, move on to next subject
        if not subject_id:
            continue

        # 'resource_id' is each type of output
        # 'key' is a path to each and every individual output file,
        # except with the subject ID replaced with a wildcard (*)

        # loop here to replace the one below it:
        #     go through model configs, make a list of all ders included
        #     enumerate list of selected derivatives and the models they are in
        #     like: (resource_id, group_model, key)
        for group_config_file in c.modelConfigs:

            try:
                ga_config = Configuration(yaml.load(open(os.path.realpath(group_config_file), 'r')))
            except:
                raise Exception("\n\nError in reading %s configuration file\n\n" % group_config_file)

            if len(ga_config.derivative_list) == 0:
                print '[!] CPAC says: You do not have any derivatives selected ' \
                      'to run for group-level analysis. Return to your group-analysis ' \
                      'configuration file and select at least one.'
                print 'Group analysis configuration file: %s\n\n' % group_config_file
                raise Exception


            if resource_id in ga_config.derivative_list:

                # include all of the scans and sessions in one model if True
                if ga_config.repeated_measures == True:
                    key = subject_path.replace(subject_unique_id, '*')
                    key = key.replace(scan_id, '*')
                else:
                    # each group of subjects from each session go into their own
                    # separate model, instead of combining all sessions into one
                    try:
                        key = subject_path.replace(subject_id, '*')
                    except:
                        # this fires if 'subject_id' was never given a value basically
                        print '\n\n[!] CPAC says: Either the derivative path file ' \
                              'you provided does not contain the output directory ' \
                              'given in the pipeline configuration file.\n'
                        print 'Derivative path file: ', output_path_file, '\n'
                        print 'Output directory: ', c.outputDirectory, '\n'
                        print '- OR -\n'
                        print 'Your subject list does not contain all of the ' \
                              'subjects you wish to run group-level analysis on.\n'
                        print 'Please correct this and try again.\n\n\n'
                        raise Exception


                analysis_map[(resource_name, group_config_file, key)].append((pipeline_id, subject_id, scan_id, subject_path))

                analysis_map_gp[(resource_name, group_config_file, key)].append((pipeline_id, subject_id, scan_id, subject_path))

        count += 1

        if count == int(len(subject_paths)*0.7):
            print "Almost finished parsing output paths.."     

        # with this loop, 'analysis_map_gp' is a dictionary with a key for
        # each individual output file - and each entry is a list of tuples,
        # one tuple for each subject in the subject list, containing
        # 'subject_path', which is a full path to that output file for that
        # one particular subject


    print "Finished parsing through output paths!\n"



    for resource, group_model, glob_key in analysis_map.keys():
        if resource == 'functional_mni':


            if 1 in c.runBASC:

                if not c.runOnGrid:
                    from CPAC.pipeline.cpac_basc_pipeline import prep_basc_workflow
                    prep_basc_workflow(c, analysis_map[(resource, group_model, glob_key)])
                else:
                    if 'sge' in c.resourceManager.lower():
                        run_sge_jobs(c, config_file, resource, analysis_map[(resource, group_model, glob_key)])

                    elif 'pbs' in c.resourceManager.lower():
                        run_pbs_jobs(c, config_file, resource, analysis_map[(resource, group_model, glob_key)])


            if 1 in c.runCWAS:

                if not c.runOnGrid:

                    from CPAC.pipeline.cpac_cwas_pipeline import prep_cwas_workflow
                    prep_cwas_workflow(c, analysis_map[(resource, group_model, glob_key)])

                else:
                    if 'sge' in c.resourceManager.lower():
                        run_sge_jobs(c, config_file, resource, analysis_map[(resource, group_model, glob_key)])

                    elif 'pbs' in c.resourceManager.lower():
                        run_pbs_jobs(c, config_file, resource, analysis_map[(resource, group_model, glob_key)])



    procss = []
    

    for resource, group_model, glob_key in analysis_map_gp.keys():

        # 'resource' is each type of output
        # 'glob_key' is a path to each and every individual output file,
        # except with the subject ID replaced with a wildcard (*)
                      
        #get all the motion parameters across subjects

        print "Pulling motion parameters for all subjects..\n"

        from CPAC.utils import extract_parameters
        scrub_threshold = extract_parameters.run(c.outputDirectory, c.runScrubbing)

        if not c.runOnGrid:
                    
            print "Starting group analysis pipeline setup..\n"

            from CPAC.pipeline.cpac_ga_model_generator import prep_group_analysis_workflow
            procss.append(Process(target=prep_group_analysis_workflow, args=(c, group_model, resource, analysis_map_gp[(resource, group_model, glob_key)], scrub_threshold)))
            
        else:
        
            print "\n\n[!] CPAC says: Group-level analysis has not yet " \
                  "been implemented to handle runs on a cluster or grid.\n\n"\
                  "Please turn off 'Run CPAC On A Cluster/Grid' in order " \
                  "to continue with group-level analysis. This will submit " \
                  "the job to only one node, however.\n\nWe will update " \
                  "users on when this feature will be available through " \
                  "release note announcements.\n\n"

    
          
    pid = open(os.path.join(c.outputDirectory, 'pid_group.txt'), 'w')
                        
    jobQueue = []
    if len(procss) <= c.numGPAModelsAtOnce:
        """
        Stream all the subjects as sublist is
        less than or equal to the number of 
        subjects that need to run
        """
        for p in procss:
            p.start()
            print >>pid,p.pid
                
    else:
        """
        Stream the subject workflows for preprocessing.
        At Any time in the pipeline c.numSubjectsAtOnce
        will run, unless the number remaining is less than
        the value of the parameter stated above
        """
        idx = 0
        while(idx < len(procss)):
                
            if len(jobQueue) == 0 and idx == 0:
                
                idc = idx
                    
                for p in procss[idc: idc + c.numGPAModelsAtOnce]:
                
                    p.start()
                    print >>pid,p.pid
                    jobQueue.append(p)
                    idx += 1
                
            else:
                
                for job in jobQueue:
                
                    if not job.is_alive():
                        print 'found dead job ', job
                        loc = jobQueue.index(job)
                        del jobQueue[loc]
                        procss[idx].start()
                
                        jobQueue.append(procss[idx])
                        idx += 1

                
    pid.close()
コード例 #6
0
def run(config_file, output_path_file):

    # Runs group analysis

    import re
    import os
    import glob
    import yaml

    # Load the config file into 'c'
    c = Configuration(yaml.load(open(os.path.realpath(config_file), 'r')))

    subject_paths = []

    for file in glob.glob(os.path.abspath(output_path_file)):
        path_list = open(file, 'r').readlines()
        subject_paths.extend([s.rstrip('\r\n') for s in path_list])

    set_subject_paths = set(subject_paths)
    subject_paths = list(set_subject_paths)
    #base_path = os.path.dirname(os.path.commonprefix(subject_paths))
    base_path = c.outputDirectory

    from collections import defaultdict
    analysis_map = defaultdict(list)
    analysis_map_gp = defaultdict(list)

    for subject_path in subject_paths:
        #Remove the base bath offset

        rs_path = subject_path.replace(base_path, "", 1)

        rs_path = rs_path.lstrip('/')

        folders = split_folders(rs_path)
        pipeline_id = folders[0]
        subject_id = folders[1]
        resource_id = folders[2]
        scan_id = folders[3]

        key = subject_path.replace(subject_id, '*')
        analysis_map[(resource_id, key)].append(
            (pipeline_id, subject_id, scan_id, subject_path))

        # separate map for group analysis
        #         if c.mixedScanAnalysis == True:
        #             key = key.replace(scan_id, '*')

        analysis_map_gp[(resource_id, key)].append(
            (pipeline_id, subject_id, scan_id, subject_path))

    for resource, glob_key in analysis_map.keys():
        if resource == 'functional_mni':
            if 1 in c.runBASC:

                if not c.runOnGrid:
                    from CPAC.pipeline.cpac_basc_pipeline import prep_basc_workflow
                    prep_basc_workflow(c, analysis_map[(resource, glob_key)])
                else:
                    if 'sge' in c.resourceManager.lower():
                        run_sge_jobs(c, config_file, resource,
                                     analysis_map[(resource, glob_key)])

                    elif 'pbs' in c.resourceManager.lower():
                        run_pbs_jobs(c, config_file, resource,
                                     analysis_map[(resource, glob_key)])

            if 1 in c.runCWAS:

                if not c.runOnGrid:

                    from CPAC.pipeline.cpac_cwas_pipeline import prep_cwas_workflow
                    prep_cwas_workflow(c, analysis_map[(resource, glob_key)])

                else:
                    if 'sge' in c.resourceManager.lower():
                        run_sge_jobs(c, config_file, resource,
                                     analysis_map[(resource, glob_key)])

                    elif 'pbs' in c.resourceManager.lower():
                        run_pbs_jobs(c, config_file, resource,
                                     analysis_map[(resource, glob_key)])

    for resource, glob_key in analysis_map_gp.keys():

        if resource in c.derivativeList:

            if 1 in c.runGroupAnalysis:

                #get all the motion parameters across subjects

                try:

                    from CPAC.utils import extract_parameters
                    extract_parameters.run(c.outputDirectory)

                except Exception:

                    print "Extract parameters script did not run correctly"

                if not c.runOnGrid:

                    from CPAC.pipeline.cpac_group_analysis_pipeline import prep_group_analysis_workflow

                    #print c, "   ", resource, "   ", analysis_map_gp[(resource, glob_key)], "   ", glob_key
                    prep_group_analysis_workflow(
                        c, resource, analysis_map_gp[(resource, glob_key)])

                else:

                    if 'sge' in c.resourceManager.lower():

                        run_sge_jobs(c, config_file, resource,
                                     analysis_map_gp[(resource, glob_key)])

                    elif 'pbs' in c.resourceManager.lower():

                        run_pbs_jobs(c, config_file, resource,
                                     analysis_map_gp[(resource, glob_key)])
コード例 #7
0
ファイル: cpac_group_runner.py プロジェクト: czarrar/C-PAC
def run(config_file, output_path_file):
    
    # Runs group analysis

    import re
    import os
    import glob
    import yaml

    # Load the config file into 'c'
    c = Configuration(yaml.load(open(os.path.realpath(config_file), 'r')))

    #diag = open(os.path.join('/home/data/Projects/CPAC_Regression_Test/2013-08-19-20_v0-3-1/fsl-model/2013-09-03', 'group_runner_diagnostic.txt'), 'wt')

    #print >>diag, "Config file: ", c
    #print >>diag, ""
    #print >>diag, "Output path file: ", output_path_file
    #print >>diag, ""

    subject_paths = []

    for file in glob.glob(os.path.abspath(output_path_file)):
        path_list = open(file, 'r').readlines()
        subject_paths.extend([s.rstrip('\r\n') for s in path_list])

    #print >>diag, "Subject paths list size: "
    #print >>diag, len(subject_paths)
    #print >>diag, ""

    #print >>diag, "First subject path: "
    #print >>diag, subject_paths[0]
    #print >>diag, ""


    set_subject_paths = set(subject_paths)
    subject_paths = list(set_subject_paths)
    #base_path = os.path.dirname(os.path.commonprefix(subject_paths))
    base_path = c.outputDirectory

    from collections import defaultdict
    analysis_map = defaultdict(list)
    analysis_map_gp = defaultdict(list)

    for subject_path in subject_paths:
        #Remove the base bath offset

        rs_path = subject_path.replace(base_path, "", 1)

        rs_path = rs_path.lstrip('/')

        folders = split_folders(rs_path)
        
        pipeline_id = folders[0]
        subject_id = folders[1]
        resource_id = folders[2]
        scan_id = folders[3]


        #if scan_id == '_scan_rest_1_rest':

        key = subject_path.replace(subject_id, '*')
        analysis_map[(resource_id, key)].append((pipeline_id, subject_id, scan_id, subject_path))

        # separate map for group analysis
        #if c.mixedScanAnalysis == True:
        #    key = key.replace(scan_id, '*')

        analysis_map_gp[(resource_id, key)].append((pipeline_id, subject_id, scan_id, subject_path))


    #print >>diag, ""
    #print >>diag, "Analysis_map_gp dictionary size: "
    #print >>diag, len(analysis_map_gp)
    #print >>diag, ""


    #print >>diag, "Derivative list: "
    #print >>diag, c.derivativeList
    #print >>diag, ""


    timing = open(os.path.join(c.outputDirectory, 'group_analysis_timing.txt'), 'wt')
    #timing = open(os.path.join('/home/data/Projects/CPAC_Regression_Test/2013-08-19-20_v0-3-1/fsl-model/2013-09-03', 'group_analysis_timing.txt'), 'wt')

    sca_roi_runs = 0
    sca_roi_time = 0
    sca_seed_runs = 0
    sca_seed_time = 0
    sca_tempreg_runs = 0
    sca_tempreg_time = 0
    dr_tempreg_runs = 0
    dr_tempreg_time = 0
    vmhc_z_runs = 0
    vmhc_z_time = 0
    alff_Z_runs = 0
    alff_Z_time = 0
    falff_Z_runs = 0
    falff_Z_time = 0
    reho_Z_runs = 0
    reho_Z_time = 0
    centrality_outputs_runs = 0
    centrality_outputs_time = 0

    # Start timing here
    gpa_start_time = time.time()


    for resource, glob_key in analysis_map.keys():
        if resource == 'functional_mni':

            wf_start_time = time.time()

            if 1 in c.runBASC:

                if not c.runOnGrid:
                    from CPAC.pipeline.cpac_basc_pipeline import prep_basc_workflow
                    prep_basc_workflow(c, analysis_map[(resource, glob_key)])
                else:
                    if 'sge' in c.resourceManager.lower():
                        run_sge_jobs(c, config_file, resource, analysis_map[(resource, glob_key)])

                    elif 'pbs' in c.resourceManager.lower():
                        run_pbs_jobs(c, config_file, resource, analysis_map[(resource, glob_key)])


            if 1 in c.runCWAS:

                if not c.runOnGrid:

                    from CPAC.pipeline.cpac_cwas_pipeline import prep_cwas_workflow
                    prep_cwas_workflow(c, analysis_map[(resource, glob_key)])

                else:
                    if 'sge' in c.resourceManager.lower():
                        run_sge_jobs(c, config_file, resource, analysis_map[(resource, glob_key)])

                    elif 'pbs' in c.resourceManager.lower():
                        run_pbs_jobs(c, config_file, resource, analysis_map[(resource, glob_key)])

            print >>timing, "Group analysis workflow completed for resource: ", resource
            print >>timing, "Elapsed run time (minutes): ", ((time.time() - wf_start_time)/60)
            print >>timing, ""




    for resource, glob_key in analysis_map_gp.keys():

        if resource in c.derivativeList:

            wf_start_time = time.time()

            #print >>diag, "Resource: "
            #print >>diag, resource
            #print >>diag, ""

            #print >>diag, "glob key: "
            #print >>diag, glob_key
            #print >>diag, ""

            #print >>diag, "Analysis map gp entry: "
            #print >>diag, analysis_map_gp[(resource,glob_key)]
            #print >>diag, ""

            if 1 in c.runGroupAnalysis:
              
                #get all the motion parameters across subjects

                try:

                    from CPAC.utils import extract_parameters
                    extract_parameters.run(c.outputDirectory)

                except Exception:

                    print "Extract parameters script did not run correctly"


                if not c.runOnGrid:

                    from CPAC.pipeline.cpac_group_analysis_pipeline import prep_group_analysis_workflow

                    #print c, "   ", resource, "   ", analysis_map_gp[(resource, glob_key)], "   ", glob_key
                    prep_group_analysis_workflow(c, resource, analysis_map_gp[(resource, glob_key)])



                else:

                    if 'sge' in c.resourceManager.lower():
                        
                        run_sge_jobs(c, config_file, resource, analysis_map_gp[(resource, glob_key)])
                       
                    elif 'pbs' in c.resourceManager.lower():
                     
                        run_pbs_jobs(c, config_file, resource, analysis_map_gp[(resource, glob_key)])


            print >>timing, "Group analysis workflow completed for resource: ", resource
            print >>timing, "Elapsed run time (minutes): ", ((time.time() - wf_start_time)/60)
            print >>timing, ""

            # This can be implemented more sleekly using a dictionary, have to do this at some point
            if resource == 'sca_roi_Z_to_standard_smooth':
                sca_roi_runs += 1
                sca_roi_time = sca_roi_time + ((time.time() - wf_start_time)/60)
            elif resource == 'sca_seed_Z_to_standard_smooth':
                sca_seed_runs += 1
                sca_seed_time = sca_seed_time + ((time.time() - wf_start_time)/60)
            elif resource == 'sca_tempreg_maps_z_files_smooth':
                sca_tempreg_runs += 1
                sca_tempreg_time = sca_tempreg_time + ((time.time() - wf_start_time)/60)
            elif resource == 'dr_tempreg_maps_z_files_smooth':
                dr_tempreg_runs += 1
                dr_tempreg_time = dr_tempreg_time + ((time.time() - wf_start_time)/60)
            elif resource == 'vmhc_z_score_stat_map':
                vmhc_z_runs += 1
                vmhc_z_time = vmhc_z_time + ((time.time() - wf_start_time)/60)
            elif resource == 'alff_Z_to_standard_smooth':
                alff_Z_runs += 1
                alff_Z_time = alff_Z_time + ((time.time() - wf_start_time)/60)
            elif resource == 'falff_Z_to_standard_smooth':
                falff_Z_runs += 1
                falff_Z_time = falff_Z_time + ((time.time() - wf_start_time)/60)
            elif resource == 'reho_Z_to_standard_smooth':
                reho_Z_runs += 1
                reho_Z_time = reho_Z_time + ((time.time() - wf_start_time)/60)
            elif resource == 'centrality_outputs_smoothed':
                centrality_outputs_runs += 1
                centrality_outputs_time = centrality_outputs_time + ((time.time() - wf_start_time)/60)
            
    print >>timing, "Entire group analysis run complete."
    print >>timing, "Elapsed run time (minutes): ", ((time.time() - gpa_start_time)/60)
    print >>timing, ""

    print >>timing, "sca_roi_Z_to_standard_smooth"
    print >>timing, "Number of runs: ", sca_roi_runs
    print >>timing, "Total run time (minutes): ", sca_roi_time
    print >>timing, ""

    print >>timing, "sca_seed_Z_to_standard_smooth"
    print >>timing, "Number of runs: ", sca_seed_runs
    print >>timing, "Total run time (minutes): ", sca_seed_time
    print >>timing, ""

    print >>timing, "sca_tempreg_maps_z_files_smooth"
    print >>timing, "Number of runs: ", sca_tempreg_runs
    print >>timing, "Total run time (minutes): ", sca_tempreg_time
    print >>timing, ""

    print >>timing, "dr_tempreg_maps_z_files_smooth"
    print >>timing, "Number of runs: ", dr_tempreg_runs
    print >>timing, "Total run time (minutes): ", dr_tempreg_time
    print >>timing, ""

    print >>timing, "vmhc_z_score_stat_map"
    print >>timing, "Number of runs: ", vmhc_z_runs
    print >>timing, "Total run time (minutes): ", vmhc_z_time
    print >>timing, ""

    print >>timing, "alff_Z_to_standard_smooth"
    print >>timing, "Number of runs: ", alff_Z_runs
    print >>timing, "Total run time (minutes): ", alff_Z_time
    print >>timing, ""

    print >>timing, "falff_Z_to_standard_smooth"
    print >>timing, "Number of runs: ", falff_Z_runs
    print >>timing, "Total run time (minutes): ", falff_Z_time
    print >>timing, ""

    print >>timing, "reho_Z_to_standard_smooth"
    print >>timing, "Number of runs: ", reho_Z_runs
    print >>timing, "Total run time (minutes): ", reho_Z_time
    print >>timing, ""

    print >>timing, "centrality_outputs_smoothed"
    print >>timing, "Number of runs: ", centrality_outputs_runs
    print >>timing, "Total run time (minutes): ", centrality_outputs_time
    print >>timing, ""



    timing.close()
コード例 #8
0
ファイル: cpac_group_runner.py プロジェクト: RanjitK/C-PAC
def run(config_file, output_path_file):
    
    import re
    import os
    import glob
    import yaml

    c = Configuration(yaml.load(open(os.path.realpath(config_file), 'r')))

    subject_paths = []

    for file in glob.glob(os.path.abspath(output_path_file)):
        path_list = open(file, 'r').readlines()
        subject_paths.extend([s.rstrip('\r\n') for s in path_list])

    set_subject_paths = set(subject_paths)
    subject_paths = list(set_subject_paths)
    #base_path = os.path.dirname(os.path.commonprefix(subject_paths))
    base_path = c.outputDirectory

    from collections import defaultdict
    analysis_map = defaultdict(list)
    analysis_map_gp = defaultdict(list)

    for subject_path in subject_paths:
        #Remove the base bath offset

        rs_path = subject_path.replace(base_path, "", 1)

        rs_path = rs_path.lstrip('/')

        folders = split_folders(rs_path)
        pipeline_id = folders[0]
        subject_id = folders[1]
        resource_id = folders[2]
        scan_id = folders[3]

        key = subject_path.replace(subject_id, '*')
        analysis_map[(resource_id, key)].append((pipeline_id, subject_id, scan_id, subject_path))

        # separate map for group analysis
#         if c.mixedScanAnalysis == True:
#             key = key.replace(scan_id, '*')

        analysis_map_gp[(resource_id, key)].append((pipeline_id, subject_id, scan_id, subject_path))


    for resource, glob_key in analysis_map.keys():
        if resource == 'functional_mni':
            if 1 in c.runBASC:

                if not c.runOnGrid:
                    from CPAC.pipeline.cpac_basc_pipeline import prep_basc_workflow
                    prep_basc_workflow(c, analysis_map[(resource, glob_key)])
                else:
                    if 'sge' in c.resourceManager.lower():
                        run_sge_jobs(c, config_file, resource, analysis_map[(resource, glob_key)])

                    elif 'pbs' in c.resourceManager.lower():
                        run_pbs_jobs(c, config_file, resource, analysis_map[(resource, glob_key)])


            if 1 in c.runCWAS:

                if not c.runOnGrid:

                    from CPAC.pipeline.cpac_cwas_pipeline import prep_cwas_workflow
                    prep_cwas_workflow(c, analysis_map[(resource, glob_key)])

                else:
                    if 'sge' in c.resourceManager.lower():
                        run_sge_jobs(c, config_file, resource, analysis_map[(resource, glob_key)])

                    elif 'pbs' in c.resourceManager.lower():
                        run_pbs_jobs(c, config_file, resource, analysis_map[(resource, glob_key)])


    for resource, glob_key in analysis_map_gp.keys():

        if resource in c.derivativeList:

            if 1 in c.runGroupAnalysis:
                #get all the motion parameters across subjects
                try:
                    from CPAC.utils import extract_parameters
                    extract_parameters.run(c.outputDirectory)
                except Exception:
                    print "Extract parameters script did not run correctly"


                if not c.runOnGrid:
                    from CPAC.pipeline.cpac_group_analysis_pipeline import prep_group_analysis_workflow
                    prep_group_analysis_workflow(c, resource, analysis_map_gp[(resource, glob_key)])

                else:
                    if 'sge' in c.resourceManager.lower():
                        run_sge_jobs(c, config_file, resource, analysis_map_gp[(resource, glob_key)])

                    elif 'pbs' in c.resourceManager.lower():
                        run_pbs_jobs(c, config_file, resource, analysis_map_gp[(resource, glob_key)])