def make_sge_jobs(commands, job_prefix, queue, jobs_dir="jobs/", num_jobs=100, max_hours_per_job=24): """prepare qsub text files. command: list of commands job_prefix: a short, descriptive name for the job. queue: name of the queue to submit to jobs_dir: path to directory where job submision scripts are written max_hours_per_job: the maximum expected time for each command (this will be multiplied by number of commands per job to get a 'walltime' ncpus: number of cpus nodes: number of nodes keep_output: keep standard error, standard out, both, or neither o=std out, e=std err, oe=both, n=neither """ filenames = [] create_dir(jobs_dir) #calculate the number of commands to put in each job num_commands_per_job = int(ceil(len(commands) / float(num_jobs))) #calculate the walltime (time before job will be killed by scheduler if still running) total_time = max_hours_per_job * num_commands_per_job walltime = "{0}:00:00".format(total_time) for command_group in grouper(commands, num_commands_per_job, ''): job_name = get_tmp_filename(tmp_dir=jobs_dir, prefix=job_prefix + "_", suffix=".txt") out_fh = open(job_name, "w") stderr_fp = job_name + "_stderr" stdout_fp = job_name + "_stdout" out_fh.write( SGE_QSUB_TEXT % (walltime, stderr_fp, stdout_fp, "\n".join(command_group))) out_fh.close() filenames.append(job_name) return filenames
def make_sge_jobs(commands, job_prefix, queue, jobs_dir="jobs/",num_jobs=100,max_hours_per_job=24): """prepare qsub text files. command: list of commands job_prefix: a short, descriptive name for the job. queue: name of the queue to submit to jobs_dir: path to directory where job submision scripts are written max_hours_per_job: the maximum expected time for each command (this will be multiplied by number of commands per job to get a 'walltime' ncpus: number of cpus nodes: number of nodes keep_output: keep standard error, standard out, both, or neither o=std out, e=std err, oe=both, n=neither """ filenames=[] create_dir(jobs_dir) #calculate the number of commands to put in each job num_commands_per_job=int(ceil(len(commands)/float(num_jobs))) #calculate the walltime (time before job will be killed by scheduler if still running) total_time = max_hours_per_job*num_commands_per_job walltime= "{0}:00:00".format(total_time) for command_group in grouper(commands,num_commands_per_job,''): job_name = get_tmp_filename(tmp_dir=jobs_dir, prefix=job_prefix+"_", suffix = ".txt") out_fh = open(job_name,"w") stderr_fp = job_name+"_stderr" stdout_fp = job_name+"_stdout" out_fh.write(SGE_QSUB_TEXT % (walltime, stderr_fp, stdout_fp, "\n".join(command_group))) out_fh.close() filenames.append(job_name) return filenames
# point paths_to_remove at job_fps paths_to_remove = job_fps # This is messy right now as our clusters (bmf, bmf2) require us to # start and exit a shell for some reason which we haven't figured out. # Running these commands as parallel shell scripts gets screwed up by # this. For the time-being, I'm stripping this out here. Once the new # clusters are up, I'm going to move the wrapping of commands in # bash/exit to the cluster_jobs script. At that point this function # will be greatly simplified. ignored_subcommands = {}.fromkeys(["/bin/bash", "exit"]) # calculate the number of commands to put in each job num_commands_per_job = int(ceil(len(commands) / float(num_jobs))) for i, command_group in enumerate(grouper(commands, num_commands_per_job, "")): job_fp = "%s/%s%d" % (jobs_dir, run_id, i) f = open(job_fp, "w") for command in command_group: f.write( "\n".join( [subcommand for subcommand in command.split(";") if subcommand.strip() not in ignored_subcommands] ) ) f.close() chmod(job_fp, S_IRWXU) job_fps.append(job_fp) return job_fps, paths_to_remove
paths_to_remove = job_fps # This is messy right now as our clusters (bmf, bmf2) require us to # start and exit a shell for some reason which we haven't figured out. # Running these commands as parallel shell scripts gets screwed up by # this. For the time-being, I'm stripping this out here. Once the new # clusters are up, I'm going to move the wrapping of commands in # bash/exit to the cluster_jobs script. At that point this function # will be greatly simplified. ignored_subcommands = {}.fromkeys(['/bin/bash', 'exit']) #calculate the number of commands to put in each job num_commands_per_job = int(ceil(len(commands) / float(num_jobs))) for i, command_group in enumerate( grouper(commands, num_commands_per_job, '')): job_fp = '%s/%s%d' % (jobs_dir, run_id, i) f = open(job_fp, 'w') for command in command_group: f.write('\n'.join([subcommand \ for subcommand in command.split(';') if subcommand.strip() not in ignored_subcommands])) f.close() chmod(job_fp, S_IRWXU) job_fps.append(job_fp) return job_fps, paths_to_remove def run_commands(output_dir, commands,
# point paths_to_remove at job_fps paths_to_remove = job_fps # This is messy right now as our clusters (bmf, bmf2) require us to # start and exit a shell for some reason which we haven't figured out. # Running these commands as parallel shell scripts gets screwed up by # this. For the time-being, I'm stripping this out here. Once the new # clusters are up, I'm going to move the wrapping of commands in # bash/exit to the cluster_jobs script. At that point this function # will be greatly simplified. ignored_subcommands = {}.fromkeys(['/bin/bash','exit']) #calculate the number of commands to put in each job num_commands_per_job=int(ceil(len(commands)/float(num_jobs))) for i,command_group in enumerate(grouper(commands,num_commands_per_job,'')): job_fp = '%s/%s%d' % (jobs_dir, run_id, i) f = open(job_fp,'w') for command in command_group: f.write('\n'.join([subcommand \ for subcommand in command.split(';') if subcommand.strip() not in ignored_subcommands])) f.close() chmod(job_fp, S_IRWXU) job_fps.append(job_fp) return job_fps, paths_to_remove def run_commands(output_dir,commands,run_id,submit_jobs,keep_temp,num_jobs=4): """ """