Example #1
0
def run(config_file, subject_list_file, p_name = None):
    
    try:
    
        if not os.path.exists(config_file):
            raise IOError
        else:
            c = Configuration(yaml.load(open(os.path.realpath(config_file), 'r')))
    
    except IOError:
        print "config file %s doesn't exist" %config_file
        raise
    except Exception:
        raise Exception("Error reading config file - %s"%config_file) 

    #do some validation
    validate(c)

    try:
        sublist = yaml.load(open(os.path.realpath(subject_list_file), 'r'))
    except:
        raise Exception ("Subject list is not in proper YAML format. Please check your file")

    strategies = sorted(build_strategies(c))
    
    print "strategies ---> ", strategies
    
    sub_ids =[]
    for sub in sublist:
        if sub['unique_id']:
            sub_ids.append(sub['subject_id']+"_" + sub["unique_id"])
        else:
            sub_ids.append(sub['subject_id'])
            
    create_group_log_template(sub_ids, os.path.join(c.outputDirectory, 'logs'))

    seeds_created = []
    if not (c.seedSpecificationFile is None):

        try:
            if os.path.exists(c.seedSpecificationFile):
                seeds_created = create_seeds_(c.seedOutputLocation, c.seedSpecificationFile, c.FSLDIR)
                print 'seeds created %s -> ' % seeds_created
        except:
            raise IOError('Problem in seedSpecificationFile')

    if 1 in c.runVoxelTimeseries:

        if 2 in c.useSeedInAnalysis:

            c.maskSpecificationFile = append_seeds_to_file(c.workingDirectory, seeds_created, c.maskSpecificationFile)

    if 1 in c.runROITimeseries:

        if 1 in c.useSeedInAnalysis:

            c.roiSpecificationFile = append_seeds_to_file(c.workingDirectory, seeds_created, c.roiSpecificationFile)

    if 1 in c.runNetworkCentrality:

        if 3 in c.useSeedInAnalysis:

            c.templateSpecificationFile = append_seeds_to_file(c.workingDirectory, seeds_created, c.templateSpecificationFile)



    if not c.runOnGrid:

        from CPAC.pipeline.cpac_pipeline import prep_workflow
        procss = [Process(target=prep_workflow, args=(sub, c, strategies, p_name)) for sub in sublist]
        pid = open(os.path.join(c.outputDirectory, 'pid.txt'), 'w')
        import subprocess
        
        jobQueue = []
        if len(sublist) <= c.numSubjectsAtOnce:
            """
            Stream all the subjects as sublist is
            less than or equal to the number of 
            subjects that need to run
            """
            for p in procss:
                p.start()
                print >>pid,p.pid

        else:

            """
            Stream the subject worlflows for preprocessing.
            At Any time in the pipeline c.numSubjectsAtOnce
            will run, unless the number remaining is less than
            the value of the parameter stated above
            """
            idx = 0
            while(idx < len(sublist)):

                if len(jobQueue) == 0 and idx == 0:

                    idc = idx
                    for p in procss[idc: idc + c.numSubjectsAtOnce]:

                        p.start()
                        print >>pid,p.pid
                        jobQueue.append(p)
                        idx += 1

                else:

                    for job in jobQueue:

                        if not job.is_alive():
                            print 'found dead job ', job
                            loc = jobQueue.index(job)
                            del jobQueue[loc]
                            procss[idx].start()

                            jobQueue.append(procss[idx])
                            idx += 1


        pid.close()
    else:

        import commands
        import pickle
        from time import strftime

        temp_files_dir = os.path.join(os.getcwd(), 'cluster_temp_files')
        print commands.getoutput("mkdir -p %s" % temp_files_dir)


        strategies_file = os.path.join(temp_files_dir, 'strategies.obj')
        f = open(strategies_file, 'w')
        pickle.dump(strategies, f)
        f.close()




        if 'sge' in c.resourceManager.lower():

            run_sge_jobs(c, config_file, strategies_file, subject_list_file, p_name)


        elif 'pbs' in c.resourceManager.lower():

            run_pbs_jobs(c, config_file, strategies_file, subject_list_file, p_name)

        elif 'condor' in c.resourceManager.lower():

            run_condor_jobs(c, config_file, strategies_file, subject_list_file, p_name)

    return 1
Example #2
0
def run(config_file, subject_list_file, p_name=None, plugin=None,
        plugin_args=None):
    '''
    '''

    # Import packages
    import commands
    import os
    import pickle
    import time

    from CPAC.pipeline.cpac_pipeline import prep_workflow

    # Init variables
    config_file = os.path.realpath(config_file)
    subject_list_file = os.path.realpath(subject_list_file)

    # take date+time stamp for run identification purposes
    unique_pipeline_id = strftime("%Y%m%d%H%M%S")
    pipeline_start_stamp = strftime("%Y-%m-%d_%H:%M:%S")

    # Load in pipeline config file
    try:
        if not os.path.exists(config_file):
            raise IOError
        else:
            c = Configuration(yaml.load(open(config_file, 'r')))
    except IOError:
        print "config file %s doesn't exist" % config_file
        raise
    except Exception:
        print "Error reading config file - %s" % config_file
        raise Exception

    # Do some validation
    validate(c)

    # Get the pipeline name
    p_name = p_name or c.pipelineName

    # Load in subject list
    try:
        sublist = yaml.load(open(subject_list_file, 'r'))
    except:
        print "Subject list is not in proper YAML format. Please check " \
              "your file"
        raise Exception

    # NOTE: strategies list is only needed in cpac_pipeline prep_workflow for
    # creating symlinks
    strategies = sorted(build_strategies(c))

    # Populate subject scan map
    sub_scan_map ={}
    try:
        for sub in sublist:
            if sub['unique_id']:
                s = sub['subject_id'] + "_" + sub["unique_id"]
            else:
                s = sub['subject_id']
            scan_ids = ['scan_anat']
            try:
                for id in sub['func']:
                    scan_ids.append('scan_'+ str(id))
            except KeyError:
                for id in sub['rest']:
                    scan_ids.append('scan_'+ str(id))
            sub_scan_map[s] = scan_ids
    except:
        print "\n\n" + "ERROR: Subject list file not in proper format - " \
              "check if you loaded the correct file?" + "\n" + \
              "Error name: cpac_runner_0001" + "\n\n"
        raise Exception

    create_group_log_template(sub_scan_map, c.logDirectory)

    pipeline_timing_info = []
    pipeline_timing_info.append(unique_pipeline_id)
    pipeline_timing_info.append(pipeline_start_stamp)
    pipeline_timing_info.append(len(sublist))

    # If we're running on cluster, execute job scheduler
    if c.runOnGrid:
        # Create cluster log dir
        cluster_files_dir = os.path.join(c.logDirectory, 'cluster_files')
        if not os.path.exists(cluster_files_dir):
            os.makedirs(cluster_files_dir)

        # Create strategies file
        strategies_file = os.path.join(cluster_files_dir, 'strategies.obj')
        with open(strategies_file, 'w') as f:
            pickle.dump(strategies, f)

        # Check if its a condor job, and run that
        if 'condor' in c.resourceManager.lower():
            run_condor_jobs(c, config_file, strategies_file,
                            subject_list_file, p_name)
        # All other schedulers are supported
        else:
            run_cpac_on_cluster(config_file, subject_list_file,
                                strategies_file, cluster_files_dir)

    # Run on one computer
    else:
        # Init variables
        procss = [Process(target=prep_workflow,
                          args=(sub, c, strategies, 1, pipeline_timing_info,
                                p_name, plugin, plugin_args))
                  for sub in sublist]

        if not os.path.exists(c.workingDirectory):
            try:
                os.makedirs(c.workingDirectory)
            except:
                err = "\n\n[!] CPAC says: Could not create the working " \
                      "directory: %s\n\nMake sure you have permissions " \
                      "to write to this directory.\n\n" % c.workingDirectory
                raise Exception(err)
                
        pid = open(os.path.join(c.workingDirectory, 'pid.txt'), 'w')
        # Init job queue
        jobQueue = []

        # If we're allocating more processes than are subjects, run them all
        if len(sublist) <= c.numParticipantsAtOnce:
            for p in procss:
                p.start()
                print >>pid, p.pid
        # Otherwise manage resources to run processes incrementally
        else:
            idx = 0
            while idx < len(sublist):
                # If the job queue is empty and we haven't started indexing
                if len(jobQueue) == 0 and idx == 0:
                    # Init subject process index
                    idc = idx
                    # Launch processes (one for each subject)
                    for p in procss[idc: idc+c.numParticipantsAtOnce]:
                        p.start()
                        print >>pid, p.pid
                        jobQueue.append(p)
                        idx += 1
                # Otherwise, jobs are running - check them
                else:
                    # Check every job in the queue's status
                    for job in jobQueue:
                        # If the job is not alive
                        if not job.is_alive():
                            # Find job and delete it from queue
                            print 'found dead job ', job
                            loc = jobQueue.index(job)
                            del jobQueue[loc]
                            # ...and start the next available process
                            # (subject)
                            procss[idx].start()
                            # Append this to job queue and increment index
                            jobQueue.append(procss[idx])
                            idx += 1
                    # Add sleep so while loop isn't consuming 100% of CPU
                    time.sleep(2)
        # Close PID txt file to indicate finish
        pid.close()
def run(config_file, subject_list_file, p_name = None):
    
    # Import packages
    import time

    # take date+time stamp for run identification purposes
    unique_pipeline_id = strftime("%Y%m%d%H%M%S")
    pipeline_start_stamp = strftime("%Y-%m-%d_%H:%M:%S")

    try:
        if not os.path.exists(config_file):
            raise IOError
        else:
            c = Configuration(yaml.load(open(os.path.realpath(config_file), 'r')))
    
    except IOError:
        print("config file %s doesn't exist" % config_file)
        raise
    except Exception:
        print("Error reading config file - %s" % config_file)
        raise Exception

    #do some validation
    validate(c)

    # get the pipeline name
    p_name = c.pipelineName


    try:
        sublist = yaml.load(open(os.path.realpath(subject_list_file), 'r'))
    except:
        print("Subject list is not in proper YAML format. Please check your file")
        raise Exception


    # NOTE: strategies list is only needed in cpac_pipeline prep_workflow for
    # creating symlinks
    strategies = sorted(build_strategies(c))

    
    print("strategies ---> ")
    print(strategies)
    
    sub_scan_map ={}

    print("subject list: ")
    print(sublist)
    
    try:
    
        for sub in sublist:
            if sub['unique_id']:
                s = sub['subject_id']+"_" + sub["unique_id"]
            else:
                s = sub['subject_id']
        
            scan_ids = ['scan_anat']
            for id in sub['rest']:
                scan_ids.append('scan_'+ str(id))
            sub_scan_map[s] = scan_ids
            
    except:
        
        print("\n\n" + "ERROR: Subject list file not in proper format - check if you loaded the correct file?" + "\n" + \
              "Error name: cpac_runner_0001" + "\n\n")
        raise Exception

        
        
    create_group_log_template(sub_scan_map, os.path.join(c.outputDirectory, 'logs'))
 

    seeds_created = []
    if not (c.seedSpecificationFile is None):

        try:
            if os.path.exists(c.seedSpecificationFile):
                seeds_created = create_seeds_(c.seedOutputLocation, c.seedSpecificationFile, c.FSLDIR)
                print('seeds created %s -> ' % seeds_created)
        except:
            raise IOError('Problem in seedSpecificationFile')

    if 1 in c.runVoxelTimeseries:

        if 'roi_voxelwise' in c.useSeedInAnalysis:

            c.maskSpecificationFile = append_seeds_to_file(c.workingDirectory, seeds_created, c.maskSpecificationFile)

    if 1 in c.runROITimeseries:

        if 'roi_average' in c.useSeedInAnalysis:

            c.roiSpecificationFile = append_seeds_to_file(c.workingDirectory, seeds_created, c.roiSpecificationFile)

    if 1 in c.runSCA:

        if 'roi_average' in c.useSeedInAnalysis:

            c.roiSpecificationFileForSCA = append_seeds_to_file(c.workingDirectory, seeds_created, c.roiSpecificationFileForSCA)

    if 1 in c.runNetworkCentrality:

        if 'centrality_outputs_smoothed' in c.useSeedInAnalysis:

            c.templateSpecificationFile = append_seeds_to_file(c.workingDirectory, seeds_created, c.templateSpecificationFile)


    pipeline_timing_info = []
    pipeline_timing_info.append(unique_pipeline_id)
    pipeline_timing_info.append(pipeline_start_stamp)
    pipeline_timing_info.append(len(sublist))


    if not c.runOnGrid:

        # Import packages
        from CPAC.pipeline.cpac_pipeline import prep_workflow

        # Init variables
        procss = [Process(target=prep_workflow,
                          args=(sub, c, strategies, 1,
                                pipeline_timing_info, p_name)) \
                  for sub in sublist]
        pid = open(os.path.join(c.outputDirectory, 'pid.txt'), 'w')
        # Init job queue
        jobQueue = []

        # If we're allocating more processes than are subjects, run them all
        if len(sublist) <= c.numSubjectsAtOnce:
            """
            Stream all the subjects as sublist is
            less than or equal to the number of 
            subjects that need to run
            """
            for p in procss:
                p.start()
                print(p.pid, file=pid)
        # Otherwise manage resources to run processes incrementally
        else:
            """
            Stream the subject workflows for preprocessing.
            At Any time in the pipeline c.numSubjectsAtOnce
            will run, unless the number remaining is less than
            the value of the parameter stated above
            """
            idx = 0
            while(idx < len(sublist)):
                # If the job queue is empty and we haven't started indexing
                if len(jobQueue) == 0 and idx == 0:
                    # Init subject process index
                    idc = idx
                    # Launch processes (one for each subject)
                    for p in procss[idc: idc + c.numSubjectsAtOnce]:
                        p.start()
                        print(p.pid, file=pid)
                        jobQueue.append(p)
                        idx += 1
                # Otherwise, jobs are running - check them
                else:
                    # Check every job in the queue's status
                    for job in jobQueue:
                        # If the job is not alive
                        if not job.is_alive():
                            # Find job and delete it from queue
                            print('found dead job ', job)
                            loc = jobQueue.index(job)
                            del jobQueue[loc]
                            # ...and start the next available process (subject)
                            procss[idx].start()
                            # Append this to job queue and increment index
                            jobQueue.append(procss[idx])
                            idx += 1

                    # Add sleep so while loop isn't consuming 100% of CPU
                    time.sleep(2)
        pid.close()
        
        
    else:

        import subprocess
        import pickle

        temp_files_dir = os.path.join(os.getcwd(), 'cluster_temp_files')
        print(subprocess.getoutput("mkdir -p %s" % temp_files_dir))


        strategies_file = os.path.join(temp_files_dir, 'strategies.obj')
        f = open(strategies_file, 'w')
        pickle.dump(strategies, f)
        f.close()




        if 'sge' in c.resourceManager.lower():

            run_sge_jobs(c, config_file, strategies_file, subject_list_file, p_name)


        elif 'pbs' in c.resourceManager.lower():

            run_pbs_jobs(c, config_file, strategies_file, subject_list_file, p_name)

        elif 'condor' in c.resourceManager.lower():

            run_condor_jobs(c, config_file, strategies_file, subject_list_file, p_name)
Example #4
0
def run(config_file, subject_list_file, p_name = None):
    
    # take date+time stamp for run identification purposes
    unique_pipeline_id = strftime("%Y%m%d%H%M%S")
    pipeline_start_stamp = strftime("%Y-%m-%d_%H:%M:%S")

    try:
        if not os.path.exists(config_file):
            raise IOError
        else:
            c = Configuration(yaml.load(open(os.path.realpath(config_file), 'r')))
    
    except IOError:
        print "config file %s doesn't exist" % config_file
        raise
    except Exception:
        print "Error reading config file - %s" % config_file
        raise Exception

    #do some validation
    validate(c)


    try:
        sublist = yaml.load(open(os.path.realpath(subject_list_file), 'r'))
    except:
        print "Subject list is not in proper YAML format. Please check your file"
        raise Exception


    strategies = sorted(build_strategies(c))

    
    print "strategies ---> "
    print strategies
    
    sub_scan_map ={}

    print "subject list: "
    print sublist
    
    try:
    
        for sub in sublist:
            if sub['unique_id']:
                s = sub['subject_id']+"_" + sub["unique_id"]
            else:
                s = sub['subject_id']
        
            scan_ids = ['scan_anat']
            for id in sub['rest']:
                scan_ids.append('scan_'+ str(id))
            sub_scan_map[s] = scan_ids
            
    except:
        
        print "\n\n" + "ERROR: Subject list file not in proper format - check if you loaded the correct file?" + "\n" + \
              "Error name: cpac_runner_0001" + "\n\n"
        raise Exception

        
        
    create_group_log_template(sub_scan_map, os.path.join(c.outputDirectory, 'logs'))
 

    seeds_created = []
    if not (c.seedSpecificationFile is None):

        try:
            if os.path.exists(c.seedSpecificationFile):
                seeds_created = create_seeds_(c.seedOutputLocation, c.seedSpecificationFile, c.FSLDIR)
                print 'seeds created %s -> ' % seeds_created
        except:
            raise IOError('Problem in seedSpecificationFile')

    if 1 in c.runVoxelTimeseries:

        if 'roi_voxelwise' in c.useSeedInAnalysis:

            c.maskSpecificationFile = append_seeds_to_file(c.workingDirectory, seeds_created, c.maskSpecificationFile)

    if 1 in c.runROITimeseries:

        if 'roi_average' in c.useSeedInAnalysis:

            c.roiSpecificationFile = append_seeds_to_file(c.workingDirectory, seeds_created, c.roiSpecificationFile)

    if 1 in c.runNetworkCentrality:

        if 'centrality_outputs_smoothed' in c.useSeedInAnalysis:

            c.templateSpecificationFile = append_seeds_to_file(c.workingDirectory, seeds_created, c.templateSpecificationFile)


    pipeline_timing_info = []
    pipeline_timing_info.append(unique_pipeline_id)
    pipeline_timing_info.append(pipeline_start_stamp)
    pipeline_timing_info.append(len(sublist))


    if not c.runOnGrid:

        from CPAC.pipeline.cpac_pipeline import prep_workflow
        procss = [Process(target=prep_workflow, args=(sub, c, strategies, 1, pipeline_timing_info, p_name)) for sub in sublist]
        pid = open(os.path.join(c.outputDirectory, 'pid.txt'), 'w')
        
        jobQueue = []
        if len(sublist) <= c.numSubjectsAtOnce:
            """
            Stream all the subjects as sublist is
            less than or equal to the number of 
            subjects that need to run
            """
            for p in procss:
                p.start()
                print >>pid,p.pid

        else:

            """
            Stream the subject workflows for preprocessing.
            At Any time in the pipeline c.numSubjectsAtOnce
            will run, unless the number remaining is less than
            the value of the parameter stated above
            """
            idx = 0
            while(idx < len(sublist)):

                if len(jobQueue) == 0 and idx == 0:

                    idc = idx
                    for p in procss[idc: idc + c.numSubjectsAtOnce]:

                        p.start()
                        print >>pid,p.pid
                        jobQueue.append(p)
                        idx += 1

                else:

                    for job in jobQueue:

                        if not job.is_alive():
                            print 'found dead job ', job
                            loc = jobQueue.index(job)
                            del jobQueue[loc]
                            procss[idx].start()

                            jobQueue.append(procss[idx])
                            idx += 1

        pid.close()
        
        
    else:

        import commands
        import pickle

        temp_files_dir = os.path.join(os.getcwd(), 'cluster_temp_files')
        print commands.getoutput("mkdir -p %s" % temp_files_dir)


        strategies_file = os.path.join(temp_files_dir, 'strategies.obj')
        f = open(strategies_file, 'w')
        pickle.dump(strategies, f)
        f.close()




        if 'sge' in c.resourceManager.lower():

            run_sge_jobs(c, config_file, strategies_file, subject_list_file, p_name)


        elif 'pbs' in c.resourceManager.lower():

            run_pbs_jobs(c, config_file, strategies_file, subject_list_file, p_name)

        elif 'condor' in c.resourceManager.lower():

            run_condor_jobs(c, config_file, strategies_file, subject_list_file, p_name)
Example #5
0
def run(config_file, subject_list_file, p_name=None):

    # Import packages
    import time

    # take date+time stamp for run identification purposes
    unique_pipeline_id = strftime("%Y%m%d%H%M%S")
    pipeline_start_stamp = strftime("%Y-%m-%d_%H:%M:%S")

    try:
        if not os.path.exists(config_file):
            raise IOError
        else:
            c = Configuration(
                yaml.load(open(os.path.realpath(config_file), 'r')))

    except IOError:
        print "config file %s doesn't exist" % config_file
        raise
    except Exception:
        print "Error reading config file - %s" % config_file
        raise Exception

    #do some validation
    validate(c)

    # get the pipeline name
    p_name = c.pipelineName

    try:
        sublist = yaml.load(open(os.path.realpath(subject_list_file), 'r'))
    except:
        print "Subject list is not in proper YAML format. Please check your file"
        raise Exception

    # NOTE: strategies list is only needed in cpac_pipeline prep_workflow for
    # creating symlinks
    strategies = sorted(build_strategies(c))

    print "strategies ---> "
    print strategies

    sub_scan_map = {}

    print "subject list: "
    print sublist

    try:

        for sub in sublist:
            if sub['unique_id']:
                s = sub['subject_id'] + "_" + sub["unique_id"]
            else:
                s = sub['subject_id']

            scan_ids = ['scan_anat']
            for id in sub['rest']:
                scan_ids.append('scan_' + str(id))
            sub_scan_map[s] = scan_ids

    except:

        print "\n\n" + "ERROR: Subject list file not in proper format - check if you loaded the correct file?" + "\n" + \
              "Error name: cpac_runner_0001" + "\n\n"
        raise Exception

    create_group_log_template(sub_scan_map,
                              os.path.join(c.outputDirectory, 'logs'))

    seeds_created = []
    if not (c.seedSpecificationFile is None):

        try:
            if os.path.exists(c.seedSpecificationFile):
                seeds_created = create_seeds_(c.seedOutputLocation,
                                              c.seedSpecificationFile,
                                              c.FSLDIR)
                print 'seeds created %s -> ' % seeds_created
        except:
            raise IOError('Problem in seedSpecificationFile')

    if 1 in c.runVoxelTimeseries:

        if 'roi_voxelwise' in c.useSeedInAnalysis:

            c.maskSpecificationFile = append_seeds_to_file(
                c.workingDirectory, seeds_created, c.maskSpecificationFile)

    if 1 in c.runROITimeseries:

        if 'roi_average' in c.useSeedInAnalysis:

            c.roiSpecificationFile = append_seeds_to_file(
                c.workingDirectory, seeds_created, c.roiSpecificationFile)

    if 1 in c.runSCA:

        if 'roi_average' in c.useSeedInAnalysis:

            c.roiSpecificationFileForSCA = append_seeds_to_file(
                c.workingDirectory, seeds_created,
                c.roiSpecificationFileForSCA)

    if 1 in c.runNetworkCentrality:

        if 'centrality_outputs_smoothed' in c.useSeedInAnalysis:

            c.templateSpecificationFile = append_seeds_to_file(
                c.workingDirectory, seeds_created, c.templateSpecificationFile)

    pipeline_timing_info = []
    pipeline_timing_info.append(unique_pipeline_id)
    pipeline_timing_info.append(pipeline_start_stamp)
    pipeline_timing_info.append(len(sublist))

    if not c.runOnGrid:

        # Import packages
        from CPAC.pipeline.cpac_pipeline import prep_workflow

        # Init variables
        procss = [Process(target=prep_workflow,
                          args=(sub, c, strategies, 1,
                                pipeline_timing_info, p_name)) \
                  for sub in sublist]
        pid = open(os.path.join(c.outputDirectory, 'pid.txt'), 'w')
        # Init job queue
        jobQueue = []

        # If we're allocating more processes than are subjects, run them all
        if len(sublist) <= c.numSubjectsAtOnce:
            """
            Stream all the subjects as sublist is
            less than or equal to the number of 
            subjects that need to run
            """
            for p in procss:
                p.start()
                print >> pid, p.pid
        # Otherwise manage resources to run processes incrementally
        else:
            """
            Stream the subject workflows for preprocessing.
            At Any time in the pipeline c.numSubjectsAtOnce
            will run, unless the number remaining is less than
            the value of the parameter stated above
            """
            idx = 0
            while (idx < len(sublist)):
                # If the job queue is empty and we haven't started indexing
                if len(jobQueue) == 0 and idx == 0:
                    # Init subject process index
                    idc = idx
                    # Launch processes (one for each subject)
                    for p in procss[idc:idc + c.numSubjectsAtOnce]:
                        p.start()
                        print >> pid, p.pid
                        jobQueue.append(p)
                        idx += 1
                # Otherwise, jobs are running - check them
                else:
                    # Check every job in the queue's status
                    for job in jobQueue:
                        # If the job is not alive
                        if not job.is_alive():
                            # Find job and delete it from queue
                            print 'found dead job ', job
                            loc = jobQueue.index(job)
                            del jobQueue[loc]
                            # ...and start the next available process (subject)
                            procss[idx].start()
                            # Append this to job queue and increment index
                            jobQueue.append(procss[idx])
                            idx += 1

                    # Add sleep so while loop isn't consuming 100% of CPU
                    time.sleep(2)
        pid.close()

    else:

        import commands
        import pickle

        temp_files_dir = os.path.join(os.getcwd(), 'cluster_temp_files')
        print commands.getoutput("mkdir -p %s" % temp_files_dir)

        strategies_file = os.path.join(temp_files_dir, 'strategies.obj')
        f = open(strategies_file, 'w')
        pickle.dump(strategies, f)
        f.close()

        if 'sge' in c.resourceManager.lower():

            run_sge_jobs(c, config_file, strategies_file, subject_list_file,
                         p_name)

        elif 'pbs' in c.resourceManager.lower():

            run_pbs_jobs(c, config_file, strategies_file, subject_list_file,
                         p_name)

        elif 'condor' in c.resourceManager.lower():

            run_condor_jobs(c, config_file, strategies_file, subject_list_file,
                            p_name)
Example #6
0
def run(config_file,
        subject_list_file,
        p_name=None,
        plugin=None,
        plugin_args=None):
    '''
    '''

    # Import packages
    import commands
    import os
    import pickle
    import time

    from CPAC.pipeline.cpac_pipeline import prep_workflow

    # Init variables
    config_file = os.path.realpath(config_file)
    subject_list_file = os.path.realpath(subject_list_file)

    # take date+time stamp for run identification purposes
    unique_pipeline_id = strftime("%Y%m%d%H%M%S")
    pipeline_start_stamp = strftime("%Y-%m-%d_%H:%M:%S")

    # Load in pipeline config file
    try:
        if not os.path.exists(config_file):
            raise IOError
        else:
            c = Configuration(yaml.load(open(config_file, 'r')))
    except IOError:
        print "config file %s doesn't exist" % config_file
        raise
    except Exception as e:
        raise Exception("Error reading config file - {0}\n\nError details:"
                        "\n{1}\n\n".format(config_file, e))

    c.logDirectory = os.path.abspath(c.logDirectory)
    c.workingDirectory = os.path.abspath(c.workingDirectory)
    c.outputDirectory = os.path.abspath(c.outputDirectory)
    c.crashLogDirectory = os.path.abspath(c.crashLogDirectory)

    # Do some validation
    validate(c)

    # Get the pipeline name
    p_name = p_name or c.pipelineName

    # Load in subject list
    try:
        sublist = yaml.load(open(subject_list_file, 'r'))
    except:
        print "Subject list is not in proper YAML format. Please check " \
              "your file"
        raise Exception

    # NOTE: strategies list is only needed in cpac_pipeline prep_workflow for
    # creating symlinks
    strategies = sorted(build_strategies(c))

    # Populate subject scan map
    sub_scan_map = {}
    try:
        for sub in sublist:
            if sub['unique_id']:
                s = sub['subject_id'] + "_" + sub["unique_id"]
            else:
                s = sub['subject_id']
            scan_ids = ['scan_anat']
            try:
                for id in sub['func']:
                    scan_ids.append('scan_' + str(id))
            except KeyError:
                for id in sub['rest']:
                    scan_ids.append('scan_' + str(id))
            sub_scan_map[s] = scan_ids
    except:
        print "\n\n" + "ERROR: Subject list file not in proper format - " \
              "check if you loaded the correct file?" + "\n" + \
              "Error name: cpac_runner_0001" + "\n\n"
        raise Exception

    create_group_log_template(sub_scan_map, c.logDirectory)

    pipeline_timing_info = []
    pipeline_timing_info.append(unique_pipeline_id)
    pipeline_timing_info.append(pipeline_start_stamp)
    pipeline_timing_info.append(len(sublist))

    # If we're running on cluster, execute job scheduler
    if c.runOnGrid:
        # Create cluster log dir
        cluster_files_dir = os.path.join(c.logDirectory, 'cluster_files')
        if not os.path.exists(cluster_files_dir):
            os.makedirs(cluster_files_dir)

        # Create strategies file
        strategies_file = os.path.join(cluster_files_dir, 'strategies.obj')
        with open(strategies_file, 'w') as f:
            pickle.dump(strategies, f)

        # Check if its a condor job, and run that
        if 'condor' in c.resourceManager.lower():
            run_condor_jobs(c, config_file, strategies_file, subject_list_file,
                            p_name)
        # All other schedulers are supported
        else:
            run_cpac_on_cluster(config_file, subject_list_file,
                                strategies_file, cluster_files_dir)

    # Run on one computer
    else:
        # Init variables
        procss = [
            Process(target=prep_workflow,
                    args=(sub, c, strategies, 1, pipeline_timing_info, p_name,
                          plugin, plugin_args)) for sub in sublist
        ]

        if not os.path.exists(c.workingDirectory):
            try:
                os.makedirs(c.workingDirectory)
            except:
                err = "\n\n[!] CPAC says: Could not create the working " \
                      "directory: %s\n\nMake sure you have permissions " \
                      "to write to this directory.\n\n" % c.workingDirectory
                raise Exception(err)

        pid = open(os.path.join(c.workingDirectory, 'pid.txt'), 'w')
        # Init job queue
        jobQueue = []

        # If we're allocating more processes than are subjects, run them all
        if len(sublist) <= c.numParticipantsAtOnce:
            for p in procss:
                p.start()
                print >> pid, p.pid
        # Otherwise manage resources to run processes incrementally
        else:
            idx = 0
            while idx < len(sublist):
                # If the job queue is empty and we haven't started indexing
                if len(jobQueue) == 0 and idx == 0:
                    # Init subject process index
                    idc = idx
                    # Launch processes (one for each subject)
                    for p in procss[idc:idc + c.numParticipantsAtOnce]:
                        p.start()
                        print >> pid, p.pid
                        jobQueue.append(p)
                        idx += 1
                # Otherwise, jobs are running - check them
                else:
                    # Check every job in the queue's status
                    for job in jobQueue:
                        # If the job is not alive
                        if not job.is_alive():
                            # Find job and delete it from queue
                            print 'found dead job ', job
                            loc = jobQueue.index(job)
                            del jobQueue[loc]
                            # ...and start the next available process
                            # (subject)
                            procss[idx].start()
                            # Append this to job queue and increment index
                            jobQueue.append(procss[idx])
                            idx += 1
                    # Add sleep so while loop isn't consuming 100% of CPU
                    time.sleep(2)
        # Close PID txt file to indicate finish
        pid.close()
Example #7
0
def run(config_file, subject_list_file, p_name=None):

    try:

        if not os.path.exists(config_file):
            raise IOError
        else:
            c = Configuration(
                yaml.load(open(os.path.realpath(config_file), 'r')))

    except IOError:
        print "config file %s doesn't exist" % config_file
        raise
    except Exception:
        print "Error reading config file - %s" % config_file
        raise Exception

    #do some validation
    validate(c)

    try:
        sublist = yaml.load(open(os.path.realpath(subject_list_file), 'r'))
    except:
        print "Subject list is not in proper YAML format. Please check your file"
        raise Exception

    strategies = sorted(build_strategies(c))

    print "strategies ---> "
    print strategies

    sub_scan_map = {}

    print "subject list: "
    print sublist

    try:

        for sub in sublist:
            if sub['unique_id']:
                s = sub['subject_id'] + "_" + sub["unique_id"]
            else:
                s = sub['subject_id']

            scan_ids = ['scan_anat']
            for id in sub['rest']:
                scan_ids.append('scan_' + str(id))
            sub_scan_map[s] = scan_ids

    except:

        print "\n\n" + "ERROR: Subject list file not in proper format - check if you loaded the correct file?" + "\n" + \
              "Error name: cpac_runner_0001" + "\n\n"
        raise Exception

    create_group_log_template(sub_scan_map,
                              os.path.join(c.outputDirectory, 'logs'))

    seeds_created = []
    if not (c.seedSpecificationFile is None):

        try:
            if os.path.exists(c.seedSpecificationFile):
                seeds_created = create_seeds_(c.seedOutputLocation,
                                              c.seedSpecificationFile,
                                              c.FSLDIR)
                print 'seeds created %s -> ' % seeds_created
        except:
            raise IOError('Problem in seedSpecificationFile')

    if 1 in c.runVoxelTimeseries:

        if 'roi_voxelwise' in c.useSeedInAnalysis:

            c.maskSpecificationFile = append_seeds_to_file(
                c.workingDirectory, seeds_created, c.maskSpecificationFile)

    if 1 in c.runROITimeseries:

        if 'roi_average' in c.useSeedInAnalysis:

            c.roiSpecificationFile = append_seeds_to_file(
                c.workingDirectory, seeds_created, c.roiSpecificationFile)

    if 1 in c.runNetworkCentrality:

        if 'centrality_outputs_smoothed' in c.useSeedInAnalysis:

            c.templateSpecificationFile = append_seeds_to_file(
                c.workingDirectory, seeds_created, c.templateSpecificationFile)

    if not c.runOnGrid:

        from CPAC.pipeline.cpac_pipeline import prep_workflow
        procss = [
            Process(target=prep_workflow, args=(sub, c, strategies, 1, p_name))
            for sub in sublist
        ]
        pid = open(os.path.join(c.outputDirectory, 'pid.txt'), 'w')

        jobQueue = []
        if len(sublist) <= c.numSubjectsAtOnce:
            """
            Stream all the subjects as sublist is
            less than or equal to the number of 
            subjects that need to run
            """
            for p in procss:
                p.start()
                print >> pid, p.pid

        else:
            """
            Stream the subject workflows for preprocessing.
            At Any time in the pipeline c.numSubjectsAtOnce
            will run, unless the number remaining is less than
            the value of the parameter stated above
            """
            idx = 0
            while (idx < len(sublist)):

                if len(jobQueue) == 0 and idx == 0:

                    idc = idx
                    for p in procss[idc:idc + c.numSubjectsAtOnce]:

                        p.start()
                        print >> pid, p.pid
                        jobQueue.append(p)
                        idx += 1

                else:

                    for job in jobQueue:

                        if not job.is_alive():
                            print 'found dead job ', job
                            loc = jobQueue.index(job)
                            del jobQueue[loc]
                            procss[idx].start()

                            jobQueue.append(procss[idx])
                            idx += 1

        pid.close()

    else:

        import commands
        import pickle

        temp_files_dir = os.path.join(os.getcwd(), 'cluster_temp_files')
        print commands.getoutput("mkdir -p %s" % temp_files_dir)

        strategies_file = os.path.join(temp_files_dir, 'strategies.obj')
        f = open(strategies_file, 'w')
        pickle.dump(strategies, f)
        f.close()

        if 'sge' in c.resourceManager.lower():

            run_sge_jobs(c, config_file, strategies_file, subject_list_file,
                         p_name)

        elif 'pbs' in c.resourceManager.lower():

            run_pbs_jobs(c, config_file, strategies_file, subject_list_file,
                         p_name)

        elif 'condor' in c.resourceManager.lower():

            run_condor_jobs(c, config_file, strategies_file, subject_list_file,
                            p_name)