Example #1
0
def make_sge_jobs(commands,
                  job_prefix,
                  queue,
                  jobs_dir="jobs/",
                  num_jobs=100,
                  max_hours_per_job=24):
    """prepare qsub text files.

    command: list of commands

    job_prefix: a short, descriptive name for the job.

    queue: name of the queue to submit to

    jobs_dir: path to directory where job submision scripts are written

    max_hours_per_job: the maximum expected time for each command (this will be multiplied by number of commands per job to get a 'walltime'

    ncpus: number of cpus

    nodes: number of nodes

    keep_output: keep standard error, standard out, both, or neither
                 o=std out, e=std err, oe=both, n=neither
    """

    filenames = []
    create_dir(jobs_dir)

    #calculate the number of commands to put in each job
    num_commands_per_job = int(ceil(len(commands) / float(num_jobs)))

    #calculate the walltime (time before job will be killed by scheduler if still running)
    total_time = max_hours_per_job * num_commands_per_job
    walltime = "{0}:00:00".format(total_time)

    for command_group in grouper(commands, num_commands_per_job, ''):
        job_name = get_tmp_filename(tmp_dir=jobs_dir,
                                    prefix=job_prefix + "_",
                                    suffix=".txt")
        out_fh = open(job_name, "w")

        stderr_fp = job_name + "_stderr"
        stdout_fp = job_name + "_stdout"
        out_fh.write(
            SGE_QSUB_TEXT %
            (walltime, stderr_fp, stdout_fp, "\n".join(command_group)))
        out_fh.close()
        filenames.append(job_name)
    return filenames
Example #2
0
def make_sge_jobs(commands, job_prefix, queue, jobs_dir="jobs/",num_jobs=100,max_hours_per_job=24):
    """prepare qsub text files.
    
    command: list of commands
    
    job_prefix: a short, descriptive name for the job.

    queue: name of the queue to submit to
    
    jobs_dir: path to directory where job submision scripts are written

    max_hours_per_job: the maximum expected time for each command (this will be multiplied by number of commands per job to get a 'walltime' 
    
    ncpus: number of cpus
    
    nodes: number of nodes
    
    keep_output: keep standard error, standard out, both, or neither
                 o=std out, e=std err, oe=both, n=neither
    """

    filenames=[]
    create_dir(jobs_dir)

    #calculate the number of commands to put in each job
    num_commands_per_job=int(ceil(len(commands)/float(num_jobs)))

    #calculate the walltime (time before job will be killed by scheduler if still running)
    total_time = max_hours_per_job*num_commands_per_job
    walltime= "{0}:00:00".format(total_time)
    
    for command_group in grouper(commands,num_commands_per_job,''):
        job_name = get_tmp_filename(tmp_dir=jobs_dir, prefix=job_prefix+"_",
                                    suffix = ".txt")
        out_fh = open(job_name,"w")

        stderr_fp = job_name+"_stderr"
        stdout_fp = job_name+"_stdout"
        out_fh.write(SGE_QSUB_TEXT % (walltime, stderr_fp, stdout_fp, "\n".join(command_group)))        
        out_fh.close()
        filenames.append(job_name)
    return filenames
Example #3
0
        # point paths_to_remove at job_fps
        paths_to_remove = job_fps

    # This is messy right now as our clusters (bmf, bmf2) require us to
    # start and exit a shell for some reason which we haven't figured out.
    # Running these commands as parallel shell scripts gets screwed up by
    # this. For the time-being, I'm stripping this out here. Once the new
    # clusters are up, I'm going to move the wrapping of commands in
    # bash/exit to the cluster_jobs script. At that point this function
    # will be greatly simplified.
    ignored_subcommands = {}.fromkeys(["/bin/bash", "exit"])

    # calculate the number of commands to put in each job
    num_commands_per_job = int(ceil(len(commands) / float(num_jobs)))

    for i, command_group in enumerate(grouper(commands, num_commands_per_job, "")):
        job_fp = "%s/%s%d" % (jobs_dir, run_id, i)
        f = open(job_fp, "w")
        for command in command_group:
            f.write(
                "\n".join(
                    [subcommand for subcommand in command.split(";") if subcommand.strip() not in ignored_subcommands]
                )
            )
        f.close()
        chmod(job_fp, S_IRWXU)
        job_fps.append(job_fp)

    return job_fps, paths_to_remove

Example #4
0
        paths_to_remove = job_fps

    # This is messy right now as our clusters (bmf, bmf2) require us to
    # start and exit a shell for some reason which we haven't figured out.
    # Running these commands as parallel shell scripts gets screwed up by
    # this. For the time-being, I'm stripping this out here. Once the new
    # clusters are up, I'm going to move the wrapping of commands in
    # bash/exit to the cluster_jobs script. At that point this function
    # will be greatly simplified.
    ignored_subcommands = {}.fromkeys(['/bin/bash', 'exit'])

    #calculate the number of commands to put in each job
    num_commands_per_job = int(ceil(len(commands) / float(num_jobs)))

    for i, command_group in enumerate(
            grouper(commands, num_commands_per_job, '')):
        job_fp = '%s/%s%d' % (jobs_dir, run_id, i)
        f = open(job_fp, 'w')
        for command in command_group:
            f.write('\n'.join([subcommand \
              for subcommand in command.split(';')
              if subcommand.strip() not in ignored_subcommands]))
        f.close()
        chmod(job_fp, S_IRWXU)
        job_fps.append(job_fp)

    return job_fps, paths_to_remove


def run_commands(output_dir,
                 commands,
Example #5
0
        # point paths_to_remove at job_fps
        paths_to_remove = job_fps
    
    # This is messy right now as our clusters (bmf, bmf2) require us to
    # start and exit a shell for some reason which we haven't figured out. 
    # Running these commands as parallel shell scripts gets screwed up by 
    # this. For the time-being, I'm stripping this out here. Once the new 
    # clusters are up, I'm going to move the wrapping of commands in 
    # bash/exit to the cluster_jobs script. At that point this function
    # will be greatly simplified.
    ignored_subcommands = {}.fromkeys(['/bin/bash','exit'])

    #calculate the number of commands to put in each job
    num_commands_per_job=int(ceil(len(commands)/float(num_jobs)))

    for i,command_group in enumerate(grouper(commands,num_commands_per_job,'')):
        job_fp = '%s/%s%d' % (jobs_dir, run_id, i)
        f = open(job_fp,'w')
        for command in command_group:
            f.write('\n'.join([subcommand \
              for subcommand in command.split(';') 
              if subcommand.strip() not in ignored_subcommands]))
        f.close()
        chmod(job_fp, S_IRWXU)
        job_fps.append(job_fp)
    
    return job_fps, paths_to_remove

def run_commands(output_dir,commands,run_id,submit_jobs,keep_temp,num_jobs=4):
    """
    """