Example #1
0
def concoct_the_hell(name):
    if not os.path.exists(merged_ass + name + "/concoct/" ) : os.makedirs(merged_ass + name + "/concoct/")
    script = raw_concoct_slurm % (merged_ass + name + "/concoct/" , name, merged_ass + name + "/concoct/",merged_ass + name + "/concoct/")
    script += raw_concoct % (stats_out + name +"_contig_coverages_for_concoct.csv", merged_ass + name + "/454AllContigs.fna", 2000,  merged_ass + name + "/concoct/")
    with open(merged_ass + name + "/concoct/" + "concoct_slurm_script.sh","w") as handle:
        handle.writelines(script)
    sh.sbatch(merged_ass + name + "/concoct/" + "concoct_slurm_script.sh")
Example #2
0
 def merge(self):
     fastas = [self.out_dir + "ray_" + str(k) + "/Contigs.fasta" for k in self.success_ks if os.path.exists(self.out_dir + "ray_" + str(k) + "/Contigs.fasta")]
     if not os.path.exists(self.merge_dir) : os.makedirs(self.merge_dir)
     in_size = sum([os.stat(f).st_size for f in fastas])
     newbler = raw_newbler % (self.merge_dir, " ".join(fastas))
     header = raw_newbler_slurm % ( self.merge_dir, self.name, self.merge_dir, self.merge_dir)
     with open(self.merge_dir + "merge_slurm_script.sh","w") as handle:
         handle.writelines(header + newbler)
     sh.sbatch(self.merge_dir + "merge_slurm_script.sh")
Example #3
0
def merge_set(asses, name):
    fastas = [ass.merge_dir + "454AllContigs.fna" for ass in asses]
    if not os.path.exists(merged_ass + name ) : os.makedirs(merged_ass + name)
    in_size = sum([os.stat(f).st_size for f in fastas])
    newbler = raw_newbler % (merged_ass + name, " ".join(fastas))
    header = raw_newbler_slurm % ( merged_ass + name ,"merged_" + name, merged_ass + name, merged_ass +name)
    with open(merged_ass + name + "merge_slurm_script.sh","w") as handle:
        handle.writelines(header + newbler)
    sh.sbatch(merged_ass + name + "merge_slurm_script.sh")
Example #4
0
 def map2ref(self,ref):
     slurm_header = raw_map_slurm % ( self.out_dir, self.name, "2ref", self.out_dir + "mapper_" + "2ref" + "/", self.out_dir + "mapper_" + "2ref" + "/")
     mapper_cmd = raw_map % (self.pair, self.name, ref, "ray_" + "2ref", self.out_dir + "mapper_" + "2ref" + "/")
     self.map_script = slurm_header + mapper_cmd
     
     with open(self.out_dir + "mapper_" + "2ref" + "/" + "map_slurm_script_" + "2ref" + ".sh","w") as handle:
         handle.writelines(self.map_script)
     sh.sbatch(self.out_dir + "mapper_" + "2ref" + "/" + "map_slurm_script_" + "2ref" + ".sh")
     print "Launched mapper for", self.name, "with reference", ref
Example #5
0
    def run_singles(self, k = 41 ):
        self.arg =  "-p " + " ".join(self.pair)
        script = make_slurm_header(name = "ray_" + self.name + "_" + str(k), path = self.raw_ass + str(k) +"/")
            
        script = script + raw_ray_single % (k, self.raw_ass + str(k) + "/ass/", self.arg)
        with open(self.raw_ass + str(k) + "/slurm_script.sh","w") as handle:
            handle.writelines(script)

        sh.sbatch(self.raw_ass + str(k) + "/slurm_script.sh")
Example #6
0
 def map_merged(self):
     if os.path.exists(self.merge_dir + "454AllContigs.fna"):
         slurm_header = raw_map_slurm % ( self.out_dir, self.name, "merged", self.merge_dir, self.merge_dir)
         mapper_cmd = raw_map % (self.reads, self.name, self.merge_dir + "454AllContigs.fna" , "merged", self.merge_dir + "mapper/")
         self.map_script = slurm_header + mapper_cmd
     
         with open(self.merge_dir +  "map_slurm_script.sh","w") as handle:
             handle.writelines(self.map_script)
         sh.sbatch(self.merge_dir + "map_slurm_script.sh")
         print "Launched mapper for", self.name, "with merged assembly"
Example #7
0
def merge_final():
    asses = [Assembly(p.replace(".fastq.gz","")) for p in os.listdir(raw_path) if ".fastq.gz" in p and not "clean" in p]
    fastas = [ass.merge_dir + "454AllContigs.fna" for ass in asses]
    if not os.path.exists(merged_ass) : os.makedirs(merged_ass)
    in_size = sum([os.stat(f).st_size for f in fastas])
    newbler = raw_newbler % (merged_ass, " ".join(fastas))
    header = raw_newbler_slurm % ( merged_ass,"merged_all", merged_ass,merged_ass)
    with open(merged_ass + "merge_slurm_script.sh","w") as handle:
        handle.writelines(header + newbler)
    sh.sbatch(merged_ass + "merge_slurm_script.sh")
Example #8
0
 def unmapped_pulling(self):
     if os.path.exists(self.out_dir + "merged/454AllContigs.fna" ):
         slurm_header = make_slurm_header(name = "unmapped_" + self.name, path = self.merge_dir + "unmapped_reads/" )
         mapper_cmd = raw_unmap % ( self.merge_dir + "454AllContigs.fna", self.pair[0], self.pair[1], self.merge_dir + "unmapped_reads/unmapped.fastq" )
         self.map_script = slurm_header + mapper_cmd
     
         with open(self.merge_dir + "unmapped_reads/" + "unmap_slurm_script" + ".sh","w") as handle:
             handle.writelines(self.map_script)
         sh.sbatch(self.merge_dir + "unmapped_reads/" + "unmap_slurm_script" + ".sh")
         print "Launched unmapper for", self.name
Example #9
0
def main(base_dir, cmd_name, id_range):
    all_dirs = sorted(glob(join(base_dir, "*")))
    id_min, id_max = map(int, id_range.split(","))
    used_dirs = all_dirs[id_min:id_max]
    for each_dir in used_dirs:
        sbatch_dir = join(each_dir, "sbatch")
        sh.cd(sbatch_dir)
        sh.sleep(0.5)
        sh.sbatch(cmd_name)
        print(f"submitted {cmd_name} in {basename(each_dir)}")
Example #10
0
    def map_merged(self):
        if os.path.exists(self.merge_dir + "454AllContigs.fna"):
            if not os.path.exists(self.out_dir + "mapper_" + "merged" + "/"):
                os.makedirs(self.out_dir + "mapper_" + "merged" + "/")

            slurm_header = raw_map_slurm % ( self.out_dir, self.name, "merged", self.out_dir + "mapper_" + "merged" + "/", self.out_dir + "mapper_" + "merged" + "/")
            mapper_cmd = raw_map % (" ".join(self.pair), self.name, self.merge_dir + "454AllContigs.fna" , "merged" , self.out_dir + "mapper_" + "merged" + "/")
            self.map_script = slurm_header + mapper_cmd
        
            with open(self.out_dir + "mapper_" + "merged" + "/" + "map_slurm_script_" + "merged" + ".sh","w") as handle:
                handle.writelines(self.map_script)
            sh.sbatch(self.out_dir + "mapper_" + "merged" + "/" + "map_slurm_script_" + "merged" + ".sh")
            print "Launched mapper for", "merged" , self.name
Example #11
0
    def map(self,k):
        if os.path.exists(self.out_dir + "ray_" + str(k) + "/Contigs.fasta"):
            if not os.path.exists(self.out_dir + "mapper_" + str(k) + "/"):
                os.makedirs(self.out_dir + "mapper_" + str(k) + "/")

            slurm_header = raw_map_slurm % ( self.out_dir, self.name, str(k), self.out_dir + "mapper_" + str(k) + "/", self.out_dir + "mapper_" + str(k) + "/")
            mapper_cmd = raw_map % (" ".join(self.pair), self.name, self.out_dir + "ray_" + str(k) + "/Contigs.fasta", "ray_" + str(self.k), self.out_dir + "mapper_" + str(k) + "/")
            self.map_script = slurm_header + mapper_cmd
        
            with open(self.out_dir + "mapper_" + str(k) + "/" + "map_slurm_script_" + str(k) + ".sh","w") as handle:
                handle.writelines(self.map_script)
            sh.sbatch(self.out_dir + "mapper_" + str(k) + "/" + "map_slurm_script_" + str(k) + ".sh")
            print "Launched mapper for", self.name, "with k-mer size", k
Example #12
0
    def run(self):
        self.script = raw_slurm % ( self.out_dir, self.name, str(self.k), self.out_dir, self.out_dir)
        if not self.coass:
            cleans = "sickle se -f " + self.reads + " -t illumina -n -o " + self.reads.replace("fastq.gz","clean.fastq") +"\n" if not os.path.exists(self.reads.replace(".fastq.gz",".clean.fastq")) else ""
            self.arg = self.reads.replace("fastq.gz","clean.fastq")
            self.script = raw_slurm % ( self.out_dir, self.name, str(self.k), self.out_dir, self.out_dir)
            for c in cleans:
                self.script = self.script + c
        else :
            self.arg = " ".join([" -s " + r.replace("fastq.gz","clean.fastq") for r in self.reads])

        self.script = self.script + raw_ray % (self.out_dir, self.out_dir + "ray", self.arg, self.out_dir, self.out_dir + "slurm_script.sh")
        with open(self.out_dir + "current_k.txt","w") as handle:
            handle.writelines([str(self.k)])                    
        with open(self.out_dir + "slurm_script.sh","w") as handle:
            handle.writelines(self.script)
        sh.sbatch(self.out_dir + "slurm_script.sh")
Example #13
0
    def clean(self, project = "b2011138", submit = False):
        script = make_slurm_header(self.path, "clean_" + self.name, proj = project, cores = 1)

        libraries = self.get_libraries()
        
        script += make_parallel_sickle_script(libraries, threads = 1)

        with open(self.path +  "clean_" + self.name + "_script.sh","w") as handle:
            handle.writelines(script)
        if submit :
            clean_job = sh.sbatch(self.path +  "clean_" + self.name + "_script.sh")
            print "Launched clean_" + self.name
Example #14
0
    def run(self):
        if not self.coass:
            cleans =  ["sickle pe -f " + self.dir + p[0][0] + " -r " + self.dir + p[0][1] + " -t sanger -n -o " + self.clean_dir + p[1][0] + " -p " + self.clean_dir + p[1][1] + " -s " +  self.clean_dir + p[2] +"\n" for p in zip(self.pairs, self.cleans, self.singles)]
            self.arg =  sum([sum([["-p "],[self.clean_dir + r for r in p]],[]) for p in self.cleans],[])
            self.script = raw_slurm % ( self.out_dir, self.name, str(self.k), self.out_dir, self.out_dir)
        else :
            self.arg =  sum([sum([["-p "],[r for r in p]],[]) for p in self.cleans],[])
            self.script = raw_slurm % ( self.out_dir, self.name, str(self.k), self.out_dir, self.out_dir)
            
        if not self.coass :
            self.script = self.script + "if [ ! -e " + self.clean_dir + self.singles[0] + " ]\nthen\n"
            for c in cleans:
                self.script = self.script + c
            self.script = self.script + "fi"

        self.script = self.script + raw_ray % (self.out_dir, self.out_dir + "ray", " ".join(self.arg), self.out_dir, self.out_dir + "slurm_script.sh")
        with open(self.out_dir + "current_k.txt","w") as handle:
            handle.writelines([str(self.k)])                    
        with open(self.out_dir + "slurm_script.sh","w") as handle:
            handle.writelines(self.script)
        sh.sbatch(self.out_dir + "slurm_script.sh")
Example #15
0
 def launch(self):
     """Make the script file and return the newly created job id"""
     # Make script file #
     self.make_script()
     # Do it #
     sbatch_out = sh.sbatch(self.script_path)
     jobs.expire()
     # Message #
     print Color.i_blu + "SLURM:" + Color.end + " " + str(sbatch_out),
     # Return id #
     self.id = int(re.findall("Submitted batch job ([0-9]+)", str(sbatch_out))[0])
     return self.id
Example #16
0
 def launch(self):
     # Make script file #
     self.write_script()
     # Do it #
     sbatch_out = sh.sbatch(self.script_path)
     jobs.expire()
     # Message #
     print Color.i_blu + "SLURM:" + Color.end + " " + str(sbatch_out),
     # Clean up #
     if not self.save_script: os.remove(self.script_path)
     # Return id #
     self.id = int(re.findall("Submitted batch job ([0-9]+)", str(sbatch_out))[0])
     return self.id
Example #17
0
 def launch(self):
     """Make the script file and return the newly created job id"""
     # Make script file #
     self.make_script()
     # Do it #
     sbatch_out = sh.sbatch(self.script_path)
     jobs.expire()
     # Message #
     print(Color.i_blu + "SLURM:" + Color.end + " " + str(sbatch_out), )
     # Return id #
     self.id = int(
         re.findall("Submitted batch job ([0-9]+)", str(sbatch_out))[0])
     return self.id
Example #18
0
 def diginorm(self, deps = None, submit = False, threads=4):
     path = self.sample_path
     script = make_slurm_header(path, "khmer_the_hell_" + self.name, self.project,  time = "4-00:00:00", deps = deps, cores = threads)
     script += make_khmer_script(self.get_all_reads(), path + "normalised_reads.fastq", threads)
     with open(path +  "diginorming.sh","w") as handle:
         handle.writelines(script)
     if submit :
         job = sh.sbatch(path + "diginorming.sh")
         if not self.job_ids.has_key('diginorming'):
             self.job_ids['diginorming'] = job.split()[-1]
         print "Launched diginorming on", self.name
         with open(self.job_file, 'w') as handle:
             json.dump(self.job_ids, handle)
Example #19
0
 def launch(self):
     # Make script file #
     self.write_script()
     # Do it #
     sbatch_out = sh.sbatch(self.script_path)
     jobs.expire()
     # Message #
     print Color.i_blu + "SLURM:" + Color.end + " " + str(sbatch_out),
     # Clean up #
     if not self.save_script: os.remove(self.script_path)
     # Return id #
     self.id = int(
         re.findall("Submitted batch job ([0-9]+)", str(sbatch_out))[0])
     return self.id
Example #20
0
 def map_binannot(self, infasta, name, submit = False, deps = None):
     path = os.path.dirname(infasta) + "/annotation/" 
     script = make_slurm_header(path, "annotate_bin_" + name, self.project,  time = "1-00:00:00", deps = deps)
     script += make_bin_bmfa(infasta, path, name = name)
     with open(path +  "annotate_bin.sh","w") as handle:
         handle.writelines(script)
     if submit :
         job = sh.sbatch(path + "annotate_bin.sh")
         if not self.job_ids.has_key('annotate'):
             self.job_ids['annotate'] = {}
         self.job_ids['maps'][name] = job.split()[-1]
         print "Launched mapper on", infasta
         with open(self.job_file, 'w') as handle:
             json.dump(self.job_ids, handle)
Example #21
0
    def clean_all(self, submit = False):
        script = make_slurm_header(self.sample_path, "clean_all", self.project)

        libraries = sum([s.get_libraries() for s in self.samples], [])
        
        script += make_parallel_sickle_script(libraries)

        with open(self.sample_path +  "clean_all_script.sh","w") as handle:
            handle.writelines(script)
        if submit :
            clean_job = sh.sbatch(self.sample_path +  "clean_all_script.sh")
            self.job_ids['clean_all'] = clean_job.split()[-1]
            print "Launched clean_all"
            with open(self.job_file, 'w') as handle:
                json.dump(self.job_ids, handle)
Example #22
0
 def besst_scaffold(self, reference, bowtie_only = False, submit = False, deps = None):
     maps_path = os.path.dirname(reference) + "/mapping_" + os.path.basename(reference) +"/"
     path = os.path.dirname(reference) + "/BESST_scaffolding/"
     script = make_slurm_header(path, "BESST_scaff_of_" + os.path.basename(reference), self.project,  time = "4-00:00:00", deps = deps, cores=1)
     script += make_besst_script(reference, path, self.get_clean_sample_dict(), maps_path)
     with open(path +  "besst_script.sh","w") as handle:
         handle.writelines(script)
     if submit :
         job = sh.sbatch(path + "besst_script.sh")
         if not self.job_ids.has_key('scaffolds'):
             self.job_ids['scaffolds'] = {}
         self.job_ids['scaffolds'][reference] = job.split()[-1]
         print "Launched mapper on", reference
         with open(self.job_file, 'w') as handle:
             json.dump(self.job_ids, handle)
Example #23
0
 def concocting(self, reference, coverage_csv, min_len = 1000, kmer=4, submit = False, deps = None):
     covfile = coverage_csv
     path = os.path.dirname(reference) + "/binning/"
     script = make_slurm_header(path, "concocting_of_" + os.path.basename(reference), self.project,  time = "4-00:00:00", deps = deps)
     script += make_concoct_script(covfile, reference, path, min_len, kmer)
     with open(path +  "concoct_script.sh","w") as handle:
         handle.writelines(script)
     if submit :
         job = sh.sbatch(path + "concoct_script.sh")
         if not self.job_ids.has_key('bins'):
             self.job_ids['bins'] = {}
         self.job_ids['bins'][reference] = job.split()[-1]
         print "Launched binning on", reference
         with open(self.job_file, 'w') as handle:
             json.dump(self.job_ids, handle)
Example #24
0
    def normalize(self, project, submit = False, shuffle = True):
        cores = 4
        script = make_slurm_header(self.normalize_path, "khmer_norm_" + self.name, proj = project, cores = cores, time = "7-00:00:00")

        libraries = self.get_all_reads()
        if shuffle:
            random.shuffle(libraries)
        
        script += make_khmer_script(libraries, self.normalized_sample, threads = cores)

        with open(self.path +  "normed_" + self.name + "_script.sh","w") as handle:
            handle.writelines(script)
        if submit :
            clean_job = sh.sbatch(self.path +  "normed_" + self.name + "_script.sh")
            print "Launched clean_" + self.name
Example #25
0
    def map_samples(self, reference, bowtie_only = False, submit = False, deps = None):
        path = os.path.dirname(reference) + "/mapping_" + os.path.basename(reference) +"/"
        script = make_slurm_header(path, "mapping_samples_to_" + os.path.basename(reference), self.project,  time = "4-00:00:00", deps = deps)
        script += make_bbmapping_script(reference, path, self.get_clean_sample_dict(), only_bowtie = bowtie_only)
#        script += make_star_mapping_script(reference, path, self.get_clean_sample_dict())
#        script += make_mapping_script(reference, path, self.get_clean_sample_dict(), only_bowtie = bowtie_only)
        with open(path +  "mapping_script.sh","w") as handle:
            handle.writelines(script)
        if submit :
            job = sh.sbatch(path + "mapping_script.sh")
            if not self.job_ids.has_key('maps'):
                self.job_ids['maps'] = {}
            self.job_ids['maps'][reference] = job.split()[-1]
            print "Launched mapper on", reference
            with open(self.job_file, 'w') as handle:
                json.dump(self.job_ids, handle)
Example #26
0
 def minimus_scaffolder(self, name="megahit", submit = False):
     if self.job_ids.has_key('assembly'):
         deps = [self.job_ids['assembly'][name]]
     else:
         deps = ['1']
     
     path = self.raw_ass + name +"/"
     script = make_slurm_header(path, "scaffminimus_" + name, self.project, constraint = "medium", time = "4-00:00:00", deps = deps)
     script += make_minimus_script(path + "contigs.fasta", path + "scaffolds.fasta")
     with open(path +  "scaffminimus_script.sh","w") as handle:
         handle.writelines(script)
     if submit :
         job = sh.sbatch(path + "scaffminimus_script.sh")
         if not self.job_ids.has_key('scaff'):
             self.job_ids['scaff'] = {}
         self.job_ids['scaff'][name] = job.split()[-1]
         print "Launched megahit assembly named", name
         with open(self.job_file, 'w') as handle:
             json.dump(self.job_ids, handle)
Example #27
0
    def annotate_all_bins(self, path, submit = False, deps = None, min_size = None):
        if min_size:
            bins = [".".join(b.split(".")[:-1]) for b in os.listdir(path) if "bin" in b and b[-5:]=="fasta"  and  os.path.getsize(path + b) > min_size]
        else:
            bins = [".".join(b.split(".")[:-1]) for b in os.listdir(path) if "bin" in b and b[-5:]=="fasta"]

        for b in bins:
            nproc = 1 
            if os.path.getsize(path + b + ".fasta") > 5000000:
                nproc = 16
            script = make_slurm_header(path + b + "/", "annotate_" + b, self.project,  time = "12:00:00", deps = deps, cores = nproc)
            script += make_bin_bmfa(path + b + ".fasta", path + b, name = b, threads = nproc)
            with open(path + b + "/" +  "annotate_bin.sh","w") as handle:
                handle.writelines(script)
            if submit :
                job = sh.sbatch(path + b + "/" + "annotate_bin.sh")
                if not self.job_ids.has_key('annotate'):
                    self.job_ids['annotate'] = {}
                print "Launched annotater on", b
                with open(self.job_file, 'w') as handle:
                    json.dump(self.job_ids, handle)
Example #28
0
    def megahit_assembly(self, name="megahit", max_read_len = 291, submit = False, reads = None, deps = None):
        if not deps:
            if self.job_ids.has_key('clean_all'):
                deps = [self.job_ids['clean_all']]
            else:
                deps = ['1']
        path = self.raw_ass + name +"/"
        if not reads:
            reads = self.get_all_reads()

        script = make_slurm_header(path, "assembly_" + name, self.project, constraint = "fat", time = "7-00:00:00", deps = deps)
        script += make_megahit_script(reads, path)
        with open(path +  "megahit_assembly_script.sh","w") as handle:
            handle.writelines(script)
        if submit :
            job = sh.sbatch(path + "megahit_assembly_script.sh")
            if not self.job_ids.has_key('assembly'):
                self.job_ids['assembly'] = {}
            self.job_ids['assembly'][name] = job.split()[-1]
            print "Launched megahit assembly named", name
            with open(self.job_file, 'w') as handle:
                json.dump(self.job_ids, handle)
Example #29
0
    def reconcoct_all_good_bins(self, covfile, path, submit = False, deps = None, min_contamination = 10):
        bins = [".".join(b.split(".")[:-1]) for b in os.listdir(path) if "bin" in b and "fasta" in b]
        bins = [b for b in bins if b != "bin_non-bin"]
        contams ={}
        for b in bins: 
            with open(path + b + "/checkm.txt") as handle: 
                lines =  handle.readlines()
            if len(lines) > 3:
                contams[b] = float([l.split()[13] for l in lines if b in l][0])

        bins = [b for b,c in contams.iteritems() if c > min_contamination]
        bins = [b for b in bins if not os.path.exists(path + b + "/" + b + "_non-bin.fasta")]
        for b in bins:
            
            nproc = 16
            script = make_slurm_header(path + b + "/", "reconcoct_" + b , self.project,  time = "12:00:00", deps = deps, cores = nproc)
            script += make_reconcoct_script(covfile, path + b + ".fasta",  b + "_" , cutoff = 5000)

            with open(path + b + "/" +  "reconcoct_bin.sh","w") as handle:
                handle.writelines(script)
            if submit :
                job = sh.sbatch(path + b + "/" + "reconcoct_bin.sh")
                print "Launched re-concocter on", b
Example #30
0
    def _submit(self, execution):
        url = os.path.join(
            execution.storage.config["url"],
            execution.storage.config["directory"] or "",
            execution.submission_id,
        )

        # script path
        script_path = "engine/slurm_" + pendulum.now().strftime(
            "%d-%m-%Y_%H-%M-%S")
        with open_fs(url) as filesystem:
            filesystem.makedirs(script_path, recreate=True)
        script_url = os.path.join(url, script_path)

        # submission
        project = json.dumps(execution.project.options).replace('"', '\\"')
        code = f"""
        import machinable as ml
        e = ml.Execution.from_storage('{abspath(url)}')
        e.set_engine('native')
        e.set_storage({execution.storage.config})
        e.set_project(ml.Project.from_json('{project}'))
        e.filter(lambda i, component, _: component == '$COMPONENT_ID')
        e.submit()
        """.replace("\n        ", ";")[1:-1]
        submission = (
            f'cd {execution.project.directory_path};\n{self.python} -c "{code};"\n'
        )

        for (
                index,
                execution_type,
                component,
                components,
                storage,
                resources,
                args,
                kwargs,
        ) in execution.schedule.iterate(execution.storage.config):
            component_id = component["flags"]["COMPONENT_ID"]
            component_path = os.path.join(url.replace("osfs://", ""),
                                          component_id)
            os.makedirs(component_path, exist_ok=True)
            script = f"{self.shebang}\n"
            script += f'#SBATCH --job-name="{execution.submission_id}:{component_id}"\n'
            script += f"#SBATCH -o {os.path.join(component_path,  'output.log')}\n"
            script += "#SBATCH --open-mode=append\n"
            _c = mapped_config(component)
            _cc = mapped_config(components)
            script += _wrap(
                call_with_context(
                    self.before_script,
                    execution=execution,
                    index=index,
                    execution_type=execution_type,
                    component=_c,
                    components=_cc,
                    config=_c.config,
                    flags=_c.flags,
                    storage=storage,
                    resources=resources,
                    args=args,
                    kwargs=kwargs,
                ))
            script += submission.replace("$COMPONENT_ID", component_id)
            script += _wrap(
                call_with_context(
                    self.after_script,
                    execution=execution,
                    index=index,
                    execution_type=execution_type,
                    component=_c,
                    components=_cc,
                    config=_c.config,
                    flags=_c.flags,
                    storage=storage,
                    resources=resources,
                    args=args,
                    kwargs=kwargs,
                ))

            # write script to disk
            target = os.path.join(script_url.replace("osfs://", ""),
                                  f"{component_id}.sh")

            if target.find("://") != -1:
                raise ValueError("Slurm engine only supports local storages")

            with open(target, "w") as f:
                f.write(script)
            st = os.stat(target)
            os.chmod(target, st.st_mode | stat.S_IEXEC)

            # submit to slurm
            try:
                sbatch_arguments = []
                for k, v in self.canonicalize_resources(resources).items():
                    sbatch_arguments.append(k)
                    if v not in [None, True]:
                        sbatch_arguments.append(str(v))
                sbatch_arguments.append(target)
                p = sh.sbatch(*sbatch_arguments)
                output = p.stdout.decode("utf-8")
                try:
                    job_id = int(output.rsplit(" ", maxsplit=1)[-1])
                except ValueError:
                    job_id = False
                info = self.serialize()
                info.update({
                    "job_id": job_id,
                    "cmd": "sbatch " + " ".join(sbatch_arguments),
                    "script": target,
                })
                execution.set_result(info, echo=False)
                execution.storage.save_file(f"{component_id}/engine/info.json",
                                            info)
            except Exception as ex:
                if isinstance(ex, sh.ErrorReturnCode):
                    message = ex.stderr.decode("utf-8")
                else:
                    message = exception_to_str(ex)
                execution.set_result(
                    ExecutionException(message, reason="engine_failure"),
                    echo=True,
                )

        total = len(execution.schedule._result)
        success = len(execution.schedule._result) - execution.failures
        self.log(f"Submitted {success}/{total} jobs successfully")

        return execution
Example #31
0
#!/usr/bin/env python
import os
from sh import sbatch


ROOT = os.path.dirname(__file__)



for username in ('test1', 'test2'):
    for i in range(500):
        sbatch('-A', username, os.path.join(ROOT, 'job.py'))

Example #32
0
 def hmmer_the_hell(self, database = "~/glob/data/pfam/Pfam-A.hmm"):
     script = make_slurm_header(name = "hmmer_" + self.name + "_" + database.split("/")[-1], path = self.merge_dir + "annotation/" )
     script += raw_hmm_pipe  % (database, self.merge_dir + "annotation/full.faa",  self.merge_dir + "annotation/" + database.split("/")[-1] + ".out")
     with open(self.merge_dir + "annotation/" + "hmmer_slurm_script.sh","w") as handle:
         handle.writelines(script)
     sh.sbatch(self.merge_dir + "annotation/" + "hmmer_slurm_script.sh")
Example #33
0
    --load_decoder saved_models/{decoder} \
    --unsup_loss_weight {unsup_loss_weight} \
    --patience 5 \
    --max_num_trial 3 \
    --lr_decay {lr_decay} \
    --save_to saved_models/{job_name} 2>>logs/{job_name}.log

python exp.py \
	--cuda \
    --mode test \
    --load_model saved_models/{job_name}.bin \
    --beam_size 15 \
    --test_file ../data/django/test.bin \
    --decode_max_time_step 100 2>>logs/{job_name}.log

""".format(train_data=train_data, unlabeled_data=unlabeled_data,
           encoder=encoder, decoder=decoder,
           unsup_loss_weight=unsup_loss_weight,
           lr_decay=lr_decay,
           job_name=job_name))

cmd = sbatch('--gres', 'gpu:1',
             '--job-name', job_name,
             '--mem', 15000,  # memory
             '--cpus-per-task', 8,  # number of cpus
             '--time', 0,  # wait time: unlimited
             '--output', 'logs/%s.out' % job_name,  # redirect stdout to file
             job_script)

print cmd.stdout
Example #34
0
    print('\tsubmitting: %s' % job_script)

slurm_script = """#!/usr/bin/env python

import sh, sys
jobs = []
for job_script in {job_scripts}:
	job = sh.bash(job_script, _bg=True, _out=sys.stdout)
	jobs.append(job)

for job in jobs: job.wait()
""".format(job_scripts='[%s]' %
           ', '.join(['"%s"' % job_script for job_script in args.job_scripts]))

job = sbatch(
    '--gres',
    'gpu:1',
    '--job-name',
    args.job_name,
    '--mem',
    args.mem_per_job * job_num,  # memory
    '--cpus-per-task',
    args.cpus_per_job * job_num,  # number of cpus
    '--time',
    0,  # wait time: unlimited
    '--output',
    args.output,  # assume you don't need stdout
    _in=slurm_script)

print(job.stdout)