def main(args=None): ''' entry point for scheduler.py ''' parser = get_parser() args = parser.parse_args(args) jobscript = args.snakescript job_properties = read_job_properties(jobscript) shutil.copy2(jobscript, args.script_dir) jobscript = os.path.join(args.script_dir, os.path.basename(jobscript)) if args.profile == 'slurm': jobid = '%j' scheduler_cmd = SbatchScheduler() if args.dependencies: scheduler_cmd.dependency = ','.join( ["afterok:%s" % d for d in args.dependencies]) elif args.profile == 'qsub': jobid = '${JOB_ID}' scheduler_cmd = QsubScheduler() scheduler_cmd.dependency = args.dependencies if not args.mail_type: mail_type = job_properties["cluster"]["mail_type"] sample_config = read_sample_config(input_json=args.sample_config) sacct_file = os.path.join(args.log_dir, sample_config["analysis"]["case_id"] + ".sacct") balsamic_run_mode = os.getenv("BALSAMIC_STATUS", "conda") # if balsamic_run_mode == 'container' and 'singularity' in sample_config: # sbatch_script = os.path.join(args.script_dir, # "sbatch." + os.path.basename(jobscript)) # sbatch_file = singularity_param(sample_config=sample_config, # script_dir=args.script_dir, # jobscript=jobscript, # sbatch_script=sbatch_script) # jobscript = sbatch_script scheduler_cmd.account = args.account scheduler_cmd.mail_type = mail_type scheduler_cmd.error = os.path.join( args.log_dir, os.path.basename(jobscript) + "_" + jobid + ".err") scheduler_cmd.output = os.path.join( args.log_dir, os.path.basename(jobscript) + "_" + jobid + ".out") scheduler_cmd.ntasks = job_properties["cluster"]["n"] scheduler_cmd.time = job_properties["cluster"]["time"] scheduler_cmd.mail_user = args.mail_user scheduler_cmd.script = jobscript jobid = submit_job(scheduler_cmd.build_cmd(), args.profile) # scheduler_file = os.path.join(args.script_dir, sample_config["analysis"]["case_id"] + ".scheduler_dump") # if balsamic_run_mode == 'container' and 'singularity' in sample_config: # write_scheduler_dump(scheduler_file=scheduler_file, cmd=scheduler_cmd.build_cmd()) write_sacct_file(sacct_file=sacct_file, job_id=jobid)
def get_job_info(self, jobpath): job_properties = read_job_properties(jobpath) mem_mb = 500 disk_gb = 0 if 'resources' in job_properties: if 'mem_mb' in job_properties['resources']: mem_mb = job_properties['resources']['mem_mb'] elif 'mem_gb' in job_properties['resources']: mem_mb = 1024 * job_properties['resources']['mem_gb'] if 'disk_gb' in job_properties['resources']: disk_gb = job_properties['resources']['disk_gb'] elif 'disk_mb' in job_properties['resources']: disk_gb = math.ceil(job_properties['resources']['disk_mb'] / 1024) jobname = "hd-{}-{}".format(job_properties['rule'], job_properties['jobid']) return { 'jobname': jobname, 'mem_mb': mem_mb, 'disk_gb': disk_gb, 'cpus': job_properties.get('threads', 1), 'resources': job_properties.get('resources', {}), 'log': job_properties.get('log', []), 'rule': job_properties.get('rule', ''), 'wildcards': job_properties.get('wildcards', {}), }
def main(): parser = argparse.ArgumentParser() parser.add_argument("jobscript") parser.add_argument("-e", help="Write bsub stderr here") parser.add_argument("-o", help="Write bsub stdout here") args = parser.parse_args() job_properties = read_job_properties(args.jobscript) # By default, we use 1 thread. threads = job_properties.get("threads", 1) # We'll leave unspecified the memory and runtime with 0 MB and 0 minutes. mem = int(job_properties["resources"].get("mem", "0")) runtime = int(job_properties["resources"].get("runtime", "0")) # Let the user specify the queue. queue = job_properties["resources"].get("queue", None) # Otherwise, choose an appropriate queue based on required resources. if not queue: queue = get_queue(threads, mem, runtime) # If we fail to find a queue, exit with an error. if not queue: msg = "No valid queue! job_properties:\n" js = json.dumps(job_properties, indent=4, sort_keys=True) sys.stderr.write(msg + js) sys.exit(1) # Submit the job to the queue. run_bsub(queue, threads, mem, runtime, args.jobscript, args.o, args.e)
def main(): parser = argparse.ArgumentParser() parser.add_argument("jobscript") parser.add_argument("-e", help="Write bsub stderr here") parser.add_argument("-o", help="Write bsub stdout here") args = parser.parse_args() job_properties = read_job_properties(args.jobscript) # By default, we use 1 thread. threads = job_properties.get('threads', 1) # We'll leave unspecified the memory and runtime with 0 MB and 0 minutes. mem = int(job_properties['resources'].get('mem', '0')) runtime = int(job_properties['resources'].get('runtime', '0')) # Let the user specify the queue. queue = job_properties['resources'].get('queue', None) # Otherwise, choose an appropriate queue based on required resources. if not queue: queue = get_queue(threads, mem, runtime) # If we fail to find a queue, exit with an error. if not queue: msg = 'No valid queue! job_properties:\n' js = json.dumps(job_properties, indent=4, sort_keys=True) sys.stderr.write(msg + js) sys.exit(1) # Submit the job to the queue. run_bsub(queue, threads, mem, runtime, args.jobscript, args.o, args.e)
def __init__(self, jobscript, dependencies=None): self.jobscript = jobscript self.dependencies = dependencies job_properties = read_job_properties(jobscript) self.jobname = str(job_properties['rule']) self.threads = str(job_properties.get('threads', '1')) self.mem = str(job_properties['resources'].get('mem_mb', '10000')) self.command = self.construct_command()
def __init__(self): self.cfg = {} self.partitions_info = {} self.submission_settings = defaultdict(lambda: None) self.command = '' self.partitions_file = '' self.jobscript = sys.argv[1] self.job_properties = read_job_properties(self.jobscript) self.load_slurm_config() self.update_partitions_info() self.load_partitions_info()
def __init__(self, snakebashfile, dependencies=None, config=None): self.scriptname = snakebashfile job_properties = read_job_properties(snakebashfile) self.rule = job_properties['rule'] self.ifiles = job_properties['input'] self.ofiles = job_properties['output'] if dependencies == None or len(dependencies) < 1: self.dependencies = None else: # expects snakemake like list of numbers self.dependencies = dependencies assert len(self.dependencies) >= 1 self.config = config
def __init__(self, jobscript: PathLike, cluster_cmds: List[str] = None, memory_units: Unit = Unit.GIGA, uge_config: Optional[Config] = None): if cluster_cmds is None: cluster_cmds = [] if uge_config is None: uge_config = Config() self._jobscript = jobscript self._cluster_cmd = " ".join(cluster_cmds) self._memory_units = memory_units self._job_properties = read_job_properties(self._jobscript) self.uge_config = uge_config self._cluster_config = load_cluster_config("cluster.yaml")
def main(): # Parse command line parser = argparse.ArgumentParser() parser.add_argument("jobscript") args = parser.parse_args() # Parse the job properties job_properties = read_job_properties(args.jobscript) # By default, we use 1 thread. threads = job_properties.get("threads", 1) # Get defualt mem, runtimes and output files from cluster.json mem = int(job_properties["cluster"]["mem"]) runtime = int(job_properties["cluster"]["runtime"]) stdout = job_properties["cluster"]["output"] stderr = job_properties["cluster"]["error"] jobname = job_properties["cluster"]["name"] # If the rule has specified resources, replace with those mem = int(job_properties["resources"].get("mem", mem)) runtime = int(job_properties["resources"].get("runtime", runtime)) # Make log file directories os.makedirs(os.path.dirname(stdout), exist_ok=True) os.makedirs(os.path.dirname(stderr), exist_ok=True) # Let the user specify the queue. queue = job_properties["resources"].get("queue", None) # Otherwise, choose an appropriate queue based on required resources. if not queue: queue = get_queue(threads, mem, runtime) # If we fail to find a queue, exit with an error. if not queue: msg = "No valid queue! job_properties:\n" js = json.dumps(job_properties, indent=4, sort_keys=True) sys.stderr.write(msg + js) sys.exit(1) # Submit the job to the queue. run_bsub(queue, threads, mem, runtime, args.jobscript, jobname, stdout, stderr) time.sleep(1)
def __init__( self, jobscript: PathLike, cluster_cmds: List[str] = None, memory_units: Unit = Unit.MEGA, lsf_config: Optional[Config] = None, ): if cluster_cmds is None: cluster_cmds = [] if lsf_config is None: lsf_config = Config() self._jobscript = jobscript self._cluster_cmd = " ".join(cluster_cmds) self._job_properties = read_job_properties(self._jobscript) self.random_string = OSLayer.get_uuid4_string() self._memory_units = memory_units self.lsf_config = lsf_config
def compute_node_configuration(jobscript, num_jobs): """ compute and return * partition * number of nodes * number of paralle tasks per node * processing time """ props = read_job_properties(jobscript=jobscript) job_threads = props['threads'] job_gpus = props['resources']['gpus'] job_mem = props['resources']['mem'] job_time = props['resources']['time'] partition = props['cluster']['partition'] node_cores = props['cluster']['cores'] node_gpus = props['cluster']['gpus'] node_mem = props['cluster']['mem'] node_time = props['cluster']['time'] # convert node time to minutes node_time = mktime(strptime(node_time, '%H:%M:%S')) / 60.0 # compute number of parallel tasks per node num_tasks = min(floor(node_cores / job_threads), floor(node_mem / job_mem)) # run more jobs on one node if compute time is very short overbooking = max(1, int(30 / job_time)) # compute number of nodes num_nodes = ceil((num_jobs / overbooking) / num_tasks) # compute processing time with buffer of 10% proc_time = job_time * overbooking * 1.1 print('job_time: {}'.format(job_time)) print('proc_time: {}'.format(proc_time)) proc_time = strftime('%H:%M:%S', gmtime(proc_time * 60)) print('Partition: {}'.format(partition)) print('Num Nodes: {}'.format(num_nodes)) print('Num Tasks: {}'.format(num_tasks)) print('Proc Time: {}'.format(proc_time)) print('Overbooking: {}'.format(overbooking)) return (partition, num_nodes, num_tasks, proc_time)
# cookiecutter arguments SBATCH_DEFAULTS = """time=360 mem=16000 job-name=kalquant partition=holy-info,shared nodes=1 ntasks=6""" CLUSTER_CONFIG = "config.yaml" ADVANCED_ARGUMENT_CONVERSION = {"yes": True, "no": False}["yes"] RESOURCE_MAPPING = { "time": ("time", "runtime", "walltime"), "mem": ("mem", "mem_mb", "ram", "memory"), "mem-per-cpu": ("mem-per-cpu", "mem_per_cpu", "mem_per_thread"), "nodes": ("nodes", "nnodes") } # parse job jobscript = slurm_utils.parse_jobscript() job_properties = read_job_properties(jobscript) sbatch_options = {} cluster_config = slurm_utils.load_cluster_config(CLUSTER_CONFIG) # 1) sbatch default arguments sbatch_options.update(slurm_utils.parse_sbatch_defaults(SBATCH_DEFAULTS)) # 2) cluster_config defaults sbatch_options.update(cluster_config["__default__"]) # 3) Convert resources (no unit conversion!) and threads sbatch_options.update( slurm_utils.convert_job_properties(job_properties, RESOURCE_MAPPING) )
parser.add_argument("dependencies", nargs="*", help="{{dependencies}} string given by snakemake\n") parser.add_argument( "jobscript", help= "Snakemake generated shell script with commands to execute snakemake rule\n" ) args = parser.parse_args() if args.dependencies: dependencies = '-d afterok:' + ':'.join(args.dependencies) else: dependencies = '' job_properties = read_job_properties(args.jobscript) cluster_param = {} job_resources = job_properties["resources"] if not "mem" in job_resources: warnings.warn( "Rule {rule} has no memory specified, set to default.".format( **job_properties)) sample = job_properties['wildcards'].get('sample', '') jobname = job_properties['rule'] if sample: jobname = jobname + '-' + sample # do something useful with the threads
import os import sys from python_utils.hpc import get_loc from snakemake.utils import read_job_properties loc = get_loc() submit_cmd = loc.submit_job_cmd job_cmd = sys.argv[1] job_properties = read_job_properties(job_cmd) submit_cmd = submit_cmd.replace('{threads}', str(job_properties.get('threads', 1))) submit_cmd = submit_cmd.replace( '{resources.mem_mb}', str(job_properties.get('resources', {}).get('mem_mb', 2000))) job_name = job_properties.get('rule') or job_properties.get( 'groupid') or 'umccrise' submit_cmd = submit_cmd.replace('{job_name}', job_name) cmd = f'{submit_cmd} {job_cmd}' sys.stderr.write(cmd + '\n') os.system(cmd)
#!/usr/bin/env python3 import os, sys, re from snakemake.utils import read_job_properties LOGDIR = sys.argv[-2] jobscript = sys.argv[-1] mo = re.match(r'(\S+)/snakejob\.\S+\.(\d+)\.sh', jobscript) assert mo sm_tmpdir, sm_jobid = mo.groups() props = read_job_properties(jobscript) # set up job name, project name jobname = "{rule}-{jobid}".format(rule=props["rule"], jobid=sm_jobid) if props["params"].get("logid"): jobname = "{rule}-{id}".format(rule=props["rule"], id=props["params"]["logid"]) cmdline = "bsub -P {proj_name} -J {jobname} -r ".format( proj_name='viral_ngs', jobname=jobname) # log file output if "-N" not in props["params"].get("LSF",""): cmdline += "-oo {logdir}/LSF-{jobname}.txt ".format( logdir=LOGDIR, jobname=jobname) # pass memory resource request to LSF mem = props.get('resources',{}).get('mem') if mem: cmdline += '-R "rusage[mem={}]" -M {} '.format(mem, 2*int(mem)) # rule-specific LSF parameters (e.g. queue, runtime) cmdline += props["params"].get("LSF","") + " "
import sys sys.path.append(os.path.expanduser("~/lib/python3")) from snakemake.utils import read_job_properties DEFAULT_PROPERTIES = { "queue": ("day", "-q {queue}"), "threads": ("1", "-n {threads}"), "memory": (8, "-M {memory}"), "exclusive": ("", "-x"), "log_dir": ("jobs/job%J.out", "-o {log_dir}") } jobscript = sys.argv[1] job_properties = read_job_properties(jobscript)["params"] #os.system("echo " + str(read_job_properties(jobscript))) # os.system("echo {} >>test.out".format(str(job_properties))) def get_job_property(prop): if prop in job_properties: return (job_properties[prop], DEFAULT_PROPERTIES[prop][1]) elif prop in DEFAULT_PROPERTIES: return DEFAULT_PROPERTIES[prop] else: return (None, None)