def test_submit(): """Test submit formatting function.""" assert_equal( submit(job_command="python main.py", backend="sge"), 'echo \'#!/bin/bash\npython main.py\' | qsub -N "job" -l ' 'h_rt=24:00:00 -l h_vmem=4000M') assert_equal( submit(job_command="python main.py", backend="slurm"), "echo '#!/bin/bash\npython main.py' | sbatch --job-name=job " "--time=24:00:00 --mem=4000") assert_equal( submit(job_command="python main.py", email="*****@*****.**", email_options="beas", backend="sge"), 'echo \'#!/bin/bash\npython main.py\' | qsub -N "job" ' '-l h_rt=24:00:00 -l h_vmem=4000M -M [email protected] -m beas') assert_equal( submit(job_command="python main.py", log_directory="/path/test", backend="sge"), 'echo \'#!/bin/bash\npython main.py\' | qsub -N "job" ' '-l h_rt=24:00:00 -l h_vmem=4000M -j y ' '-o \'/path/test/$JOB_NAME.$JOB_ID.txt\'') assert_equal( submit(job_command="python main.py", log_directory="/path/test", backend="slurm"), "echo \'#!/bin/bash\npython main.py\' | sbatch --job-name=job " "--time=24:00:00 --mem=4000 -o /path/test/job.%j.txt") assert_raises(ValueError, submit, job_command="", backend="unknown")
def check_job_name_queued_or_running(job_name): with TemporaryDirectory() as temp_folder: user = getuser() # Launch job command = submit(job_command="sleep 600", job_name=job_name, time="700", memory=500, log_directory=temp_folder) job_id = _check_job_id(command) # Assert that the job has been launched try: running_jobs = queued_or_running_jobs(user=user) assert_in(job_name, running_jobs) finally: # Make sure to clean up even if there is a failure if _get_backend() == "slurm": subprocess.call(["scancel", job_id]) elif _get_backend() == "sge": subprocess.call(["qdel", job_id]) else: raise NotImplementedError("backend not implemented")
def make_scripts(self, cmds, job_names=None): sys.stdout.write("Writing Scripts...\n") scheduled_jobs = set(queued_or_running_jobs(user=self.user)) scripts = [] if job_names is None: job_names = [] for cmd in cmds: job_names.append(None) for index, cmd in enumerate(cmds): job_name = job_names[index] or self.__get_job_name(cmd) if job_name not in scheduled_jobs: script = submit(cmd, memory="%s" % self.memory, job_name=job_name, log_directory=self.slurm_logs, backend='slurm', time=0, shell_script="#!/usr/bin/env bash") script = script + " --partition=%s" % self.cluster_partition script = script + " --cpus-per-task=%s" % self.threads scripts.append(script) if self.verbose: sys.stdout.write(script + "\n") return scripts
def dispatch_to_slurm(commands): scripts = {} for job_name, command in commands.iteritems(): script = submit(command, job_name=job_name, time="0", memory="{}G".format(maxmem), backend="slurm", shell_script="#!/usr/bin/env bash") script += " --partition={}".format(partition) script += " --ntasks=1" script += " --cpus-per-task={}".format(maxcpu) script += " --mail-type=END,FAIL" script += " --mail-user={}".format(email) scripts[job_name] = script scheduled_jobs = set(queued_or_running_jobs()) for job_name, script in scripts.iteritems(): if job_name not in scheduled_jobs: if verbose: print("{}".format(script), file=sys.stdout) if not dry_run: subprocess.call(script, shell=True) else: print("{} already running, skipping".format(job_name), file=sys.stderr)
def dispatchscript(cmd): script = submit(cmd, memory="240000", job_name="bwa_mem", backend="slurm", time="0", shell_script="#!/usr/bin/env bash", log_directory=slurm_logs) script = script + " --partition=bigmemm" script = script + " --cpus-per-task=30" print "\nCalling:\n%s\n" % script call("%s" % script, shell=True)
def get_submission_script(cmd, name, workdir, config): backend = config.get("brute", "env") assert cmd assert name script = None if backend == "sge": script = submit(cmd, job_name=name, time=config.get("sge","time"), memory=config.getint("sge","memory"), backend=backend, log_directory=workdir) elif backend == "slurm": script = submit(cmd, job_name=name, time=config.get("slurm","time"), memory=config.getint("slurm","memory"), backend=backend, log_directory=workdir) if config.has_option("slurm","gres"): script += " --gres=" + config.get("slurm", "gres") if config.has_option("slurm","partition"): script += " --partition=" + config.get("slurm", "partition") if config.has_option("slurm", "account"): script += " --account=" + config.get("slurm", "account") script += " --cpus-per-task=" + str(config.getint("slurm", "cpus-per-task")) script += " --ntasks-per-node=" + str(config.getint("slurm", "ntasks-per-node")) elif backend == "local": script = """#! /usr/bin/env bash cd $(dirname $0) CMD="%s" $CMD &> %s""" % (cmd, os.path.join(workdir, "log.txt")) else: print("[fatal] unknown backend: " + str(backend)) sys.exit(1) return script
def dispatch(commands): scripts = {} for job_name, command in commands.iteritems(): script = submit(command, job_name=job_name, time="0", memory=max_memory + "G", backend="slurm", shell_script="#!/usr/bin/env bash") script = script + " --partition=bigmemm" scripts[job_name] = script scheduled_jobs = set(queued_or_running_jobs()) for job_name, script in scripts.iteritems(): if job_name not in scheduled_jobs: sys.stdout.write("\n{}\n".format(script)) subprocess.call(script, shell=True) else: sys.stderr.write("{} running, skipping.\n".format(job_name))
def write_scripts(commands): scripts = {} for job_name, command in commands.iteritems(): script = submit(command, job_name=job_name, time="0", memory=max_memory, backend="slurm", shell_script="#!/usr/bin/env bash") script += " --partition={}".format(partition) script += " --ntasks=1" script += " --cpus-per-task={}".format(max_cpus) script += " --mail-type=END,FAIL" script += " --mail-user={}".format(email) scripts[job_name] = script return scripts
def dispatch(filelist, commands): scripts = [] for i, command in enumerate(commands): script = submit(command, job_name="samtools_sort_%s" % filelist[i], time="0", memory=240000, backend="slurm", shell_script="#!/usr/bin/env bash") script = script + " --cpus-per-task=12" script = script + " --ntasks=1" script = script + " --partition=bigmemm" scripts.append(script) scheduled_jobs = set(queued_or_running_jobs()) for i, script in enumerate(scripts): if "samtools_sort_%s" % filelist[i] not in scheduled_jobs: sys.stdout.write("\n%s\n" % script) subprocess.call(script, shell=True) else: sys.stderr.write("Job name 'samtools_sort_%s' found in queued or \ running jobs list" % filelist[i])
def test_log_output(n_trials=30): """Test that log output is uniform accross scheduler.""" with TemporaryDirectory() as temp_folder: user = getuser() job_completed = False # Launch a sleepy SGE job job_name = 'ok_job' command = submit(job_command="echo ok", job_name=job_name, time="700", memory=500, log_directory=temp_folder) job_id = _check_job_id(command) try: for _ in range(n_trials): if job_name not in queued_or_running_jobs(user=user): # job has completed, let's check the output job_completed = True filename = "%s.%s.txt" % (job_name, job_id) assert_equal(os.listdir(temp_folder), [filename]) with open(op.join(temp_folder, filename)) as fhandle: assert_equal(fhandle.read().strip(), "ok") break else: # Let's wait a bit before retrying sleep(5) finally: # Make sure to clean up even if there is a failure if not job_completed: if _get_backend('auto') == 'slurm': subprocess.call(["scancel", job_id]) else: subprocess.call(["qdel", job_id]) raise AssertionError( "job %s (%s) has not completed after 5min." % (job_id, job_name))
# clusterlib_launcher.py import sys from clusterlib.scheduler import queued_or_running_jobs from clusterlib.scheduler import submit from clusterlib.storage import sqlite3_loads from clusterlib_main import NOSQL_PATH if __name__ == "__main__": scheduled_jobs = set(queued_or_running_jobs()) done_jobs = sqlite3_loads(NOSQL_PATH) for param in range(100): job_name = "job-param=%s" % param job_command = "%s clusterlib_main.py --param %s" % (sys.executable, param) if job_name not in scheduled_jobs and job_command not in done_jobs: script = submit(job_command, job_name=job_name) print(script) # Uncomment those lines to launch the jobs # import os # os.system(script)
# clusterlib_launcher.py import sys from clusterlib.scheduler import queued_or_running_jobs from clusterlib.scheduler import submit from clusterlib.storage import sqlite3_loads from clusterlib_main import NOSQL_PATH if __name__ == "__main__": scheduled_jobs = set(queued_or_running_jobs()) done_jobs = sqlite3_loads(NOSQL_PATH) for param in range(100): job_name = "job-param=%s" % param job_command = "%s clusterlib.py --param %s" % (sys.executable, param) if job_name not in scheduled_jobs and job_command not in done_jobs: script = submit(job_command, job_name=job_name) print(script) # Uncomment those lines to launch the jobs # import os # os.system(script)
else: n_jobs_launched += 1 cmd_parameters = " ".join("--%s %s" % (key, parameters[key]) for key in sorted(parameters)) scripts_args = parse_arguments(shlex.split(cmd_parameters)) if make_hash(scripts_args) != job_hash: pprint(scripts_args) pprint(parameters) raise ValueError("hash are not equal, all parameters are " "not specified.") cmd = submit(job_command="%s main.py %s" % (sys.executable, cmd_parameters), job_name=job_hash, time="100:00:00", memory=24000, log_directory=LOG_DIRECTORY, backend="slurm") if not args["debug"]: os.system(cmd) elif args["verbose"]: print("[launched] %s" % job_hash) print(cmd) if os.path.exists( os.path.join(LOG_DIRECTORY, "%s.txt" % job_hash)): os.system("cat %s" % os.path.join(LOG_DIRECTORY, "%s.txt" % job_hash))
else: n_jobs_launched += 1 cmd_parameters = " ".join("--%s %s" % (key, parameters[key]) for key in sorted(parameters)) scripts_args = parse_arguments(shlex.split(cmd_parameters)) if make_hash(scripts_args) != job_hash: pprint(scripts_args) pprint(parameters) raise ValueError("hash are not equal, all parameters are " "not specified.") cmd = submit(job_command="%s main.py %s" % (sys.executable, cmd_parameters), job_name=job_hash, time="100:00:00", memory=24000, log_directory=LOG_DIRECTORY, backend="slurm") if not args["debug"]: os.system(cmd) elif args["verbose"]: print("[launched] %s" % job_hash) print(cmd) if os.path.exists(os.path.join(LOG_DIRECTORY, "%s.txt" % job_hash)): os.system("cat %s" % os.path.join(LOG_DIRECTORY, "%s.txt" % job_hash))
for job_hash, parameters in list(to_launch.items())[:max_n_launch]: cmd_parameters = " ".join("--%s %s" % (key, parameters[key]) for key in sorted(parameters)) scripts_args = parse_arguments(shlex.split(cmd_parameters)) if make_hash(scripts_args) != job_hash: pprint(scripts_args) pprint(parameters) raise ValueError("hash are not equal, all parameters are " "not specified.") cmd = submit(job_command=" ".join( [sys.executable, os.path.abspath("main.py"), cmd_parameters]), job_name=job_hash, time="%s:00:00" % time[job_hash], memory=memory[job_hash], log_directory=LOG_DIRECTORY, backend="slurm") cmd += select_queue(memory[job_hash], time[job_hash]) if not args["debug"]: os.system(cmd) n_jobs_launched += 1 elif args["verbose"]: print("[launched] %s " % (job_hash, )) print(cmd) print("\nSummary launched")