def concoct_the_hell(name): if not os.path.exists(merged_ass + name + "/concoct/" ) : os.makedirs(merged_ass + name + "/concoct/") script = raw_concoct_slurm % (merged_ass + name + "/concoct/" , name, merged_ass + name + "/concoct/",merged_ass + name + "/concoct/") script += raw_concoct % (stats_out + name +"_contig_coverages_for_concoct.csv", merged_ass + name + "/454AllContigs.fna", 2000, merged_ass + name + "/concoct/") with open(merged_ass + name + "/concoct/" + "concoct_slurm_script.sh","w") as handle: handle.writelines(script) sh.sbatch(merged_ass + name + "/concoct/" + "concoct_slurm_script.sh")
def merge(self): fastas = [self.out_dir + "ray_" + str(k) + "/Contigs.fasta" for k in self.success_ks if os.path.exists(self.out_dir + "ray_" + str(k) + "/Contigs.fasta")] if not os.path.exists(self.merge_dir) : os.makedirs(self.merge_dir) in_size = sum([os.stat(f).st_size for f in fastas]) newbler = raw_newbler % (self.merge_dir, " ".join(fastas)) header = raw_newbler_slurm % ( self.merge_dir, self.name, self.merge_dir, self.merge_dir) with open(self.merge_dir + "merge_slurm_script.sh","w") as handle: handle.writelines(header + newbler) sh.sbatch(self.merge_dir + "merge_slurm_script.sh")
def merge_set(asses, name): fastas = [ass.merge_dir + "454AllContigs.fna" for ass in asses] if not os.path.exists(merged_ass + name ) : os.makedirs(merged_ass + name) in_size = sum([os.stat(f).st_size for f in fastas]) newbler = raw_newbler % (merged_ass + name, " ".join(fastas)) header = raw_newbler_slurm % ( merged_ass + name ,"merged_" + name, merged_ass + name, merged_ass +name) with open(merged_ass + name + "merge_slurm_script.sh","w") as handle: handle.writelines(header + newbler) sh.sbatch(merged_ass + name + "merge_slurm_script.sh")
def map2ref(self,ref): slurm_header = raw_map_slurm % ( self.out_dir, self.name, "2ref", self.out_dir + "mapper_" + "2ref" + "/", self.out_dir + "mapper_" + "2ref" + "/") mapper_cmd = raw_map % (self.pair, self.name, ref, "ray_" + "2ref", self.out_dir + "mapper_" + "2ref" + "/") self.map_script = slurm_header + mapper_cmd with open(self.out_dir + "mapper_" + "2ref" + "/" + "map_slurm_script_" + "2ref" + ".sh","w") as handle: handle.writelines(self.map_script) sh.sbatch(self.out_dir + "mapper_" + "2ref" + "/" + "map_slurm_script_" + "2ref" + ".sh") print "Launched mapper for", self.name, "with reference", ref
def run_singles(self, k = 41 ): self.arg = "-p " + " ".join(self.pair) script = make_slurm_header(name = "ray_" + self.name + "_" + str(k), path = self.raw_ass + str(k) +"/") script = script + raw_ray_single % (k, self.raw_ass + str(k) + "/ass/", self.arg) with open(self.raw_ass + str(k) + "/slurm_script.sh","w") as handle: handle.writelines(script) sh.sbatch(self.raw_ass + str(k) + "/slurm_script.sh")
def map_merged(self): if os.path.exists(self.merge_dir + "454AllContigs.fna"): slurm_header = raw_map_slurm % ( self.out_dir, self.name, "merged", self.merge_dir, self.merge_dir) mapper_cmd = raw_map % (self.reads, self.name, self.merge_dir + "454AllContigs.fna" , "merged", self.merge_dir + "mapper/") self.map_script = slurm_header + mapper_cmd with open(self.merge_dir + "map_slurm_script.sh","w") as handle: handle.writelines(self.map_script) sh.sbatch(self.merge_dir + "map_slurm_script.sh") print "Launched mapper for", self.name, "with merged assembly"
def merge_final(): asses = [Assembly(p.replace(".fastq.gz","")) for p in os.listdir(raw_path) if ".fastq.gz" in p and not "clean" in p] fastas = [ass.merge_dir + "454AllContigs.fna" for ass in asses] if not os.path.exists(merged_ass) : os.makedirs(merged_ass) in_size = sum([os.stat(f).st_size for f in fastas]) newbler = raw_newbler % (merged_ass, " ".join(fastas)) header = raw_newbler_slurm % ( merged_ass,"merged_all", merged_ass,merged_ass) with open(merged_ass + "merge_slurm_script.sh","w") as handle: handle.writelines(header + newbler) sh.sbatch(merged_ass + "merge_slurm_script.sh")
def unmapped_pulling(self): if os.path.exists(self.out_dir + "merged/454AllContigs.fna" ): slurm_header = make_slurm_header(name = "unmapped_" + self.name, path = self.merge_dir + "unmapped_reads/" ) mapper_cmd = raw_unmap % ( self.merge_dir + "454AllContigs.fna", self.pair[0], self.pair[1], self.merge_dir + "unmapped_reads/unmapped.fastq" ) self.map_script = slurm_header + mapper_cmd with open(self.merge_dir + "unmapped_reads/" + "unmap_slurm_script" + ".sh","w") as handle: handle.writelines(self.map_script) sh.sbatch(self.merge_dir + "unmapped_reads/" + "unmap_slurm_script" + ".sh") print "Launched unmapper for", self.name
def main(base_dir, cmd_name, id_range): all_dirs = sorted(glob(join(base_dir, "*"))) id_min, id_max = map(int, id_range.split(",")) used_dirs = all_dirs[id_min:id_max] for each_dir in used_dirs: sbatch_dir = join(each_dir, "sbatch") sh.cd(sbatch_dir) sh.sleep(0.5) sh.sbatch(cmd_name) print(f"submitted {cmd_name} in {basename(each_dir)}")
def map_merged(self): if os.path.exists(self.merge_dir + "454AllContigs.fna"): if not os.path.exists(self.out_dir + "mapper_" + "merged" + "/"): os.makedirs(self.out_dir + "mapper_" + "merged" + "/") slurm_header = raw_map_slurm % ( self.out_dir, self.name, "merged", self.out_dir + "mapper_" + "merged" + "/", self.out_dir + "mapper_" + "merged" + "/") mapper_cmd = raw_map % (" ".join(self.pair), self.name, self.merge_dir + "454AllContigs.fna" , "merged" , self.out_dir + "mapper_" + "merged" + "/") self.map_script = slurm_header + mapper_cmd with open(self.out_dir + "mapper_" + "merged" + "/" + "map_slurm_script_" + "merged" + ".sh","w") as handle: handle.writelines(self.map_script) sh.sbatch(self.out_dir + "mapper_" + "merged" + "/" + "map_slurm_script_" + "merged" + ".sh") print "Launched mapper for", "merged" , self.name
def map(self,k): if os.path.exists(self.out_dir + "ray_" + str(k) + "/Contigs.fasta"): if not os.path.exists(self.out_dir + "mapper_" + str(k) + "/"): os.makedirs(self.out_dir + "mapper_" + str(k) + "/") slurm_header = raw_map_slurm % ( self.out_dir, self.name, str(k), self.out_dir + "mapper_" + str(k) + "/", self.out_dir + "mapper_" + str(k) + "/") mapper_cmd = raw_map % (" ".join(self.pair), self.name, self.out_dir + "ray_" + str(k) + "/Contigs.fasta", "ray_" + str(self.k), self.out_dir + "mapper_" + str(k) + "/") self.map_script = slurm_header + mapper_cmd with open(self.out_dir + "mapper_" + str(k) + "/" + "map_slurm_script_" + str(k) + ".sh","w") as handle: handle.writelines(self.map_script) sh.sbatch(self.out_dir + "mapper_" + str(k) + "/" + "map_slurm_script_" + str(k) + ".sh") print "Launched mapper for", self.name, "with k-mer size", k
def run(self): self.script = raw_slurm % ( self.out_dir, self.name, str(self.k), self.out_dir, self.out_dir) if not self.coass: cleans = "sickle se -f " + self.reads + " -t illumina -n -o " + self.reads.replace("fastq.gz","clean.fastq") +"\n" if not os.path.exists(self.reads.replace(".fastq.gz",".clean.fastq")) else "" self.arg = self.reads.replace("fastq.gz","clean.fastq") self.script = raw_slurm % ( self.out_dir, self.name, str(self.k), self.out_dir, self.out_dir) for c in cleans: self.script = self.script + c else : self.arg = " ".join([" -s " + r.replace("fastq.gz","clean.fastq") for r in self.reads]) self.script = self.script + raw_ray % (self.out_dir, self.out_dir + "ray", self.arg, self.out_dir, self.out_dir + "slurm_script.sh") with open(self.out_dir + "current_k.txt","w") as handle: handle.writelines([str(self.k)]) with open(self.out_dir + "slurm_script.sh","w") as handle: handle.writelines(self.script) sh.sbatch(self.out_dir + "slurm_script.sh")
def clean(self, project = "b2011138", submit = False): script = make_slurm_header(self.path, "clean_" + self.name, proj = project, cores = 1) libraries = self.get_libraries() script += make_parallel_sickle_script(libraries, threads = 1) with open(self.path + "clean_" + self.name + "_script.sh","w") as handle: handle.writelines(script) if submit : clean_job = sh.sbatch(self.path + "clean_" + self.name + "_script.sh") print "Launched clean_" + self.name
def run(self): if not self.coass: cleans = ["sickle pe -f " + self.dir + p[0][0] + " -r " + self.dir + p[0][1] + " -t sanger -n -o " + self.clean_dir + p[1][0] + " -p " + self.clean_dir + p[1][1] + " -s " + self.clean_dir + p[2] +"\n" for p in zip(self.pairs, self.cleans, self.singles)] self.arg = sum([sum([["-p "],[self.clean_dir + r for r in p]],[]) for p in self.cleans],[]) self.script = raw_slurm % ( self.out_dir, self.name, str(self.k), self.out_dir, self.out_dir) else : self.arg = sum([sum([["-p "],[r for r in p]],[]) for p in self.cleans],[]) self.script = raw_slurm % ( self.out_dir, self.name, str(self.k), self.out_dir, self.out_dir) if not self.coass : self.script = self.script + "if [ ! -e " + self.clean_dir + self.singles[0] + " ]\nthen\n" for c in cleans: self.script = self.script + c self.script = self.script + "fi" self.script = self.script + raw_ray % (self.out_dir, self.out_dir + "ray", " ".join(self.arg), self.out_dir, self.out_dir + "slurm_script.sh") with open(self.out_dir + "current_k.txt","w") as handle: handle.writelines([str(self.k)]) with open(self.out_dir + "slurm_script.sh","w") as handle: handle.writelines(self.script) sh.sbatch(self.out_dir + "slurm_script.sh")
def launch(self): """Make the script file and return the newly created job id""" # Make script file # self.make_script() # Do it # sbatch_out = sh.sbatch(self.script_path) jobs.expire() # Message # print Color.i_blu + "SLURM:" + Color.end + " " + str(sbatch_out), # Return id # self.id = int(re.findall("Submitted batch job ([0-9]+)", str(sbatch_out))[0]) return self.id
def launch(self): # Make script file # self.write_script() # Do it # sbatch_out = sh.sbatch(self.script_path) jobs.expire() # Message # print Color.i_blu + "SLURM:" + Color.end + " " + str(sbatch_out), # Clean up # if not self.save_script: os.remove(self.script_path) # Return id # self.id = int(re.findall("Submitted batch job ([0-9]+)", str(sbatch_out))[0]) return self.id
def launch(self): """Make the script file and return the newly created job id""" # Make script file # self.make_script() # Do it # sbatch_out = sh.sbatch(self.script_path) jobs.expire() # Message # print(Color.i_blu + "SLURM:" + Color.end + " " + str(sbatch_out), ) # Return id # self.id = int( re.findall("Submitted batch job ([0-9]+)", str(sbatch_out))[0]) return self.id
def diginorm(self, deps = None, submit = False, threads=4): path = self.sample_path script = make_slurm_header(path, "khmer_the_hell_" + self.name, self.project, time = "4-00:00:00", deps = deps, cores = threads) script += make_khmer_script(self.get_all_reads(), path + "normalised_reads.fastq", threads) with open(path + "diginorming.sh","w") as handle: handle.writelines(script) if submit : job = sh.sbatch(path + "diginorming.sh") if not self.job_ids.has_key('diginorming'): self.job_ids['diginorming'] = job.split()[-1] print "Launched diginorming on", self.name with open(self.job_file, 'w') as handle: json.dump(self.job_ids, handle)
def launch(self): # Make script file # self.write_script() # Do it # sbatch_out = sh.sbatch(self.script_path) jobs.expire() # Message # print Color.i_blu + "SLURM:" + Color.end + " " + str(sbatch_out), # Clean up # if not self.save_script: os.remove(self.script_path) # Return id # self.id = int( re.findall("Submitted batch job ([0-9]+)", str(sbatch_out))[0]) return self.id
def map_binannot(self, infasta, name, submit = False, deps = None): path = os.path.dirname(infasta) + "/annotation/" script = make_slurm_header(path, "annotate_bin_" + name, self.project, time = "1-00:00:00", deps = deps) script += make_bin_bmfa(infasta, path, name = name) with open(path + "annotate_bin.sh","w") as handle: handle.writelines(script) if submit : job = sh.sbatch(path + "annotate_bin.sh") if not self.job_ids.has_key('annotate'): self.job_ids['annotate'] = {} self.job_ids['maps'][name] = job.split()[-1] print "Launched mapper on", infasta with open(self.job_file, 'w') as handle: json.dump(self.job_ids, handle)
def clean_all(self, submit = False): script = make_slurm_header(self.sample_path, "clean_all", self.project) libraries = sum([s.get_libraries() for s in self.samples], []) script += make_parallel_sickle_script(libraries) with open(self.sample_path + "clean_all_script.sh","w") as handle: handle.writelines(script) if submit : clean_job = sh.sbatch(self.sample_path + "clean_all_script.sh") self.job_ids['clean_all'] = clean_job.split()[-1] print "Launched clean_all" with open(self.job_file, 'w') as handle: json.dump(self.job_ids, handle)
def besst_scaffold(self, reference, bowtie_only = False, submit = False, deps = None): maps_path = os.path.dirname(reference) + "/mapping_" + os.path.basename(reference) +"/" path = os.path.dirname(reference) + "/BESST_scaffolding/" script = make_slurm_header(path, "BESST_scaff_of_" + os.path.basename(reference), self.project, time = "4-00:00:00", deps = deps, cores=1) script += make_besst_script(reference, path, self.get_clean_sample_dict(), maps_path) with open(path + "besst_script.sh","w") as handle: handle.writelines(script) if submit : job = sh.sbatch(path + "besst_script.sh") if not self.job_ids.has_key('scaffolds'): self.job_ids['scaffolds'] = {} self.job_ids['scaffolds'][reference] = job.split()[-1] print "Launched mapper on", reference with open(self.job_file, 'w') as handle: json.dump(self.job_ids, handle)
def concocting(self, reference, coverage_csv, min_len = 1000, kmer=4, submit = False, deps = None): covfile = coverage_csv path = os.path.dirname(reference) + "/binning/" script = make_slurm_header(path, "concocting_of_" + os.path.basename(reference), self.project, time = "4-00:00:00", deps = deps) script += make_concoct_script(covfile, reference, path, min_len, kmer) with open(path + "concoct_script.sh","w") as handle: handle.writelines(script) if submit : job = sh.sbatch(path + "concoct_script.sh") if not self.job_ids.has_key('bins'): self.job_ids['bins'] = {} self.job_ids['bins'][reference] = job.split()[-1] print "Launched binning on", reference with open(self.job_file, 'w') as handle: json.dump(self.job_ids, handle)
def normalize(self, project, submit = False, shuffle = True): cores = 4 script = make_slurm_header(self.normalize_path, "khmer_norm_" + self.name, proj = project, cores = cores, time = "7-00:00:00") libraries = self.get_all_reads() if shuffle: random.shuffle(libraries) script += make_khmer_script(libraries, self.normalized_sample, threads = cores) with open(self.path + "normed_" + self.name + "_script.sh","w") as handle: handle.writelines(script) if submit : clean_job = sh.sbatch(self.path + "normed_" + self.name + "_script.sh") print "Launched clean_" + self.name
def map_samples(self, reference, bowtie_only = False, submit = False, deps = None): path = os.path.dirname(reference) + "/mapping_" + os.path.basename(reference) +"/" script = make_slurm_header(path, "mapping_samples_to_" + os.path.basename(reference), self.project, time = "4-00:00:00", deps = deps) script += make_bbmapping_script(reference, path, self.get_clean_sample_dict(), only_bowtie = bowtie_only) # script += make_star_mapping_script(reference, path, self.get_clean_sample_dict()) # script += make_mapping_script(reference, path, self.get_clean_sample_dict(), only_bowtie = bowtie_only) with open(path + "mapping_script.sh","w") as handle: handle.writelines(script) if submit : job = sh.sbatch(path + "mapping_script.sh") if not self.job_ids.has_key('maps'): self.job_ids['maps'] = {} self.job_ids['maps'][reference] = job.split()[-1] print "Launched mapper on", reference with open(self.job_file, 'w') as handle: json.dump(self.job_ids, handle)
def minimus_scaffolder(self, name="megahit", submit = False): if self.job_ids.has_key('assembly'): deps = [self.job_ids['assembly'][name]] else: deps = ['1'] path = self.raw_ass + name +"/" script = make_slurm_header(path, "scaffminimus_" + name, self.project, constraint = "medium", time = "4-00:00:00", deps = deps) script += make_minimus_script(path + "contigs.fasta", path + "scaffolds.fasta") with open(path + "scaffminimus_script.sh","w") as handle: handle.writelines(script) if submit : job = sh.sbatch(path + "scaffminimus_script.sh") if not self.job_ids.has_key('scaff'): self.job_ids['scaff'] = {} self.job_ids['scaff'][name] = job.split()[-1] print "Launched megahit assembly named", name with open(self.job_file, 'w') as handle: json.dump(self.job_ids, handle)
def annotate_all_bins(self, path, submit = False, deps = None, min_size = None): if min_size: bins = [".".join(b.split(".")[:-1]) for b in os.listdir(path) if "bin" in b and b[-5:]=="fasta" and os.path.getsize(path + b) > min_size] else: bins = [".".join(b.split(".")[:-1]) for b in os.listdir(path) if "bin" in b and b[-5:]=="fasta"] for b in bins: nproc = 1 if os.path.getsize(path + b + ".fasta") > 5000000: nproc = 16 script = make_slurm_header(path + b + "/", "annotate_" + b, self.project, time = "12:00:00", deps = deps, cores = nproc) script += make_bin_bmfa(path + b + ".fasta", path + b, name = b, threads = nproc) with open(path + b + "/" + "annotate_bin.sh","w") as handle: handle.writelines(script) if submit : job = sh.sbatch(path + b + "/" + "annotate_bin.sh") if not self.job_ids.has_key('annotate'): self.job_ids['annotate'] = {} print "Launched annotater on", b with open(self.job_file, 'w') as handle: json.dump(self.job_ids, handle)
def megahit_assembly(self, name="megahit", max_read_len = 291, submit = False, reads = None, deps = None): if not deps: if self.job_ids.has_key('clean_all'): deps = [self.job_ids['clean_all']] else: deps = ['1'] path = self.raw_ass + name +"/" if not reads: reads = self.get_all_reads() script = make_slurm_header(path, "assembly_" + name, self.project, constraint = "fat", time = "7-00:00:00", deps = deps) script += make_megahit_script(reads, path) with open(path + "megahit_assembly_script.sh","w") as handle: handle.writelines(script) if submit : job = sh.sbatch(path + "megahit_assembly_script.sh") if not self.job_ids.has_key('assembly'): self.job_ids['assembly'] = {} self.job_ids['assembly'][name] = job.split()[-1] print "Launched megahit assembly named", name with open(self.job_file, 'w') as handle: json.dump(self.job_ids, handle)
def reconcoct_all_good_bins(self, covfile, path, submit = False, deps = None, min_contamination = 10): bins = [".".join(b.split(".")[:-1]) for b in os.listdir(path) if "bin" in b and "fasta" in b] bins = [b for b in bins if b != "bin_non-bin"] contams ={} for b in bins: with open(path + b + "/checkm.txt") as handle: lines = handle.readlines() if len(lines) > 3: contams[b] = float([l.split()[13] for l in lines if b in l][0]) bins = [b for b,c in contams.iteritems() if c > min_contamination] bins = [b for b in bins if not os.path.exists(path + b + "/" + b + "_non-bin.fasta")] for b in bins: nproc = 16 script = make_slurm_header(path + b + "/", "reconcoct_" + b , self.project, time = "12:00:00", deps = deps, cores = nproc) script += make_reconcoct_script(covfile, path + b + ".fasta", b + "_" , cutoff = 5000) with open(path + b + "/" + "reconcoct_bin.sh","w") as handle: handle.writelines(script) if submit : job = sh.sbatch(path + b + "/" + "reconcoct_bin.sh") print "Launched re-concocter on", b
def _submit(self, execution): url = os.path.join( execution.storage.config["url"], execution.storage.config["directory"] or "", execution.submission_id, ) # script path script_path = "engine/slurm_" + pendulum.now().strftime( "%d-%m-%Y_%H-%M-%S") with open_fs(url) as filesystem: filesystem.makedirs(script_path, recreate=True) script_url = os.path.join(url, script_path) # submission project = json.dumps(execution.project.options).replace('"', '\\"') code = f""" import machinable as ml e = ml.Execution.from_storage('{abspath(url)}') e.set_engine('native') e.set_storage({execution.storage.config}) e.set_project(ml.Project.from_json('{project}')) e.filter(lambda i, component, _: component == '$COMPONENT_ID') e.submit() """.replace("\n ", ";")[1:-1] submission = ( f'cd {execution.project.directory_path};\n{self.python} -c "{code};"\n' ) for ( index, execution_type, component, components, storage, resources, args, kwargs, ) in execution.schedule.iterate(execution.storage.config): component_id = component["flags"]["COMPONENT_ID"] component_path = os.path.join(url.replace("osfs://", ""), component_id) os.makedirs(component_path, exist_ok=True) script = f"{self.shebang}\n" script += f'#SBATCH --job-name="{execution.submission_id}:{component_id}"\n' script += f"#SBATCH -o {os.path.join(component_path, 'output.log')}\n" script += "#SBATCH --open-mode=append\n" _c = mapped_config(component) _cc = mapped_config(components) script += _wrap( call_with_context( self.before_script, execution=execution, index=index, execution_type=execution_type, component=_c, components=_cc, config=_c.config, flags=_c.flags, storage=storage, resources=resources, args=args, kwargs=kwargs, )) script += submission.replace("$COMPONENT_ID", component_id) script += _wrap( call_with_context( self.after_script, execution=execution, index=index, execution_type=execution_type, component=_c, components=_cc, config=_c.config, flags=_c.flags, storage=storage, resources=resources, args=args, kwargs=kwargs, )) # write script to disk target = os.path.join(script_url.replace("osfs://", ""), f"{component_id}.sh") if target.find("://") != -1: raise ValueError("Slurm engine only supports local storages") with open(target, "w") as f: f.write(script) st = os.stat(target) os.chmod(target, st.st_mode | stat.S_IEXEC) # submit to slurm try: sbatch_arguments = [] for k, v in self.canonicalize_resources(resources).items(): sbatch_arguments.append(k) if v not in [None, True]: sbatch_arguments.append(str(v)) sbatch_arguments.append(target) p = sh.sbatch(*sbatch_arguments) output = p.stdout.decode("utf-8") try: job_id = int(output.rsplit(" ", maxsplit=1)[-1]) except ValueError: job_id = False info = self.serialize() info.update({ "job_id": job_id, "cmd": "sbatch " + " ".join(sbatch_arguments), "script": target, }) execution.set_result(info, echo=False) execution.storage.save_file(f"{component_id}/engine/info.json", info) except Exception as ex: if isinstance(ex, sh.ErrorReturnCode): message = ex.stderr.decode("utf-8") else: message = exception_to_str(ex) execution.set_result( ExecutionException(message, reason="engine_failure"), echo=True, ) total = len(execution.schedule._result) success = len(execution.schedule._result) - execution.failures self.log(f"Submitted {success}/{total} jobs successfully") return execution
#!/usr/bin/env python import os from sh import sbatch ROOT = os.path.dirname(__file__) for username in ('test1', 'test2'): for i in range(500): sbatch('-A', username, os.path.join(ROOT, 'job.py'))
def hmmer_the_hell(self, database = "~/glob/data/pfam/Pfam-A.hmm"): script = make_slurm_header(name = "hmmer_" + self.name + "_" + database.split("/")[-1], path = self.merge_dir + "annotation/" ) script += raw_hmm_pipe % (database, self.merge_dir + "annotation/full.faa", self.merge_dir + "annotation/" + database.split("/")[-1] + ".out") with open(self.merge_dir + "annotation/" + "hmmer_slurm_script.sh","w") as handle: handle.writelines(script) sh.sbatch(self.merge_dir + "annotation/" + "hmmer_slurm_script.sh")
--load_decoder saved_models/{decoder} \ --unsup_loss_weight {unsup_loss_weight} \ --patience 5 \ --max_num_trial 3 \ --lr_decay {lr_decay} \ --save_to saved_models/{job_name} 2>>logs/{job_name}.log python exp.py \ --cuda \ --mode test \ --load_model saved_models/{job_name}.bin \ --beam_size 15 \ --test_file ../data/django/test.bin \ --decode_max_time_step 100 2>>logs/{job_name}.log """.format(train_data=train_data, unlabeled_data=unlabeled_data, encoder=encoder, decoder=decoder, unsup_loss_weight=unsup_loss_weight, lr_decay=lr_decay, job_name=job_name)) cmd = sbatch('--gres', 'gpu:1', '--job-name', job_name, '--mem', 15000, # memory '--cpus-per-task', 8, # number of cpus '--time', 0, # wait time: unlimited '--output', 'logs/%s.out' % job_name, # redirect stdout to file job_script) print cmd.stdout
print('\tsubmitting: %s' % job_script) slurm_script = """#!/usr/bin/env python import sh, sys jobs = [] for job_script in {job_scripts}: job = sh.bash(job_script, _bg=True, _out=sys.stdout) jobs.append(job) for job in jobs: job.wait() """.format(job_scripts='[%s]' % ', '.join(['"%s"' % job_script for job_script in args.job_scripts])) job = sbatch( '--gres', 'gpu:1', '--job-name', args.job_name, '--mem', args.mem_per_job * job_num, # memory '--cpus-per-task', args.cpus_per_job * job_num, # number of cpus '--time', 0, # wait time: unlimited '--output', args.output, # assume you don't need stdout _in=slurm_script) print(job.stdout)