def test_qsub_cmd(self): """Test qsub_cmd.""" sge_opts = SgeOptions(unique_id=100) self.assertEqual(sge_opts.qsub_cmd("a.sh", num_threads=1), "qsub -cwd -V -S /bin/bash -pe smp 1 -e /dev/null -o /dev/null a.sh") sge_opts = SgeOptions(unique_id=100, sge_queue="my_sge_queue", sge_env_name="orte") self.assertEqual(sge_opts.qsub_cmd("a.sh", num_threads=1, wait_before_exit=True, depend_on_jobs=['1', '2', '3']), "qsub -cwd -V -S /bin/bash -pe orte 1 -q my_sge_queue -sync y -hold_jid 1,2,3 -e /dev/null -o /dev/null a.sh")
def run(self): """Execute ice_partial.py all|split|i|merge.""" cmd = self.args.subCommand logging.info("Running {f} {cmd} v{v}.".format(f=op.basename(__file__), cmd=cmd, v=get_version())) cmd_str = "" try: args = self.args obj = None if cmd == "all": sge_opts = SgeOptions(unique_id=args.unique_id, use_sge=args.use_sge, max_sge_jobs=args.max_sge_jobs, blasr_nproc=args.blasr_nproc) obj = IceAllPartials( root_dir=args.root_dir, fasta_filenames=args.fasta_filenames.split(','), ref_fasta=args.ref_fasta, out_pickle=args.out_pickle, sge_opts=sge_opts, ccs_fofn=args.ccs_fofn, tmp_dir=args.tmp_dir) elif cmd == "one": # Only assign nfl reads in the given input_fasta file to isoforms obj = IcePartialOne(input_fasta=args.input_fasta, ref_fasta=args.ref_fasta, out_pickle=args.out_pickle, ccs_fofn=args.ccs_fofn, done_filename=args.done_filename, blasr_nproc=args.blasr_nproc, tmp_dir=args.tmp_dir) elif cmd == "split": obj = IcePartialSplit(root_dir=args.root_dir, nfl_fa=args.nfl_fa, N=args.N) elif cmd == "i": obj = IcePartialI(root_dir=args.root_dir, i=args.i, ccs_fofn=args.ccs_fofn, blasr_nproc=args.blasr_nproc, tmp_dir=args.tmp_dir) elif cmd == "merge": obj = IcePartialMerge(root_dir=args.root_dir, N=args.N) else: raise ValueError( "Unknown command passed to {f}: {cmd}.".format( f=op.basename(__file__), cmd=cmd)) cmd_str = obj.cmd_str() logging.info("Running CMD: {cmd_str}".format(cmd_str=cmd_str)) obj.run() except: logging.exception("Exiting {cmd_str} with return code 1.".format( cmd_str=cmd_str)) return 1 return 0
def main(query_filename, target_filename, output_dir): """Main function to call DalignerRunner""" obj = DalignerRunner(query_filename=query_filename, target_filename=target_filename, is_FL=False, same_strand_only=True, query_converted=False, target_converted=False, use_sge=True, sge_opts=SgeOptions(100)) return obj.run(output_dir=output_dir)
def __init__(self, root_dir, subread_set, nproc): tmp_dir = op.join(root_dir, "tmp") mkdir(tmp_dir) super(IceQuiverRTC, self).__init__(root_dir=root_dir, bas_fofn=subread_set, fasta_fofn=None, sge_opts=SgeOptions(unique_id=12345, use_sge=False, max_sge_jobs=0, blasr_nproc=nproc, quiver_nproc=nproc), prog_name="IceQuiver")
def test_run(self): """Test run(output_dir, min_match_len, sensitive_mode). running on sge and locally. """ run_on_sge = (backticks('qstat')[1] == 0) if run_on_sge: self.runner.use_sge = True self.runner.sge_opts = SgeOptions(100) mknewdir(self.out_dir) self.runner.run(output_dir=self.out_dir) for las_filename in self.runner.las_filenames: print "Checking existance of " + las_filename self.assertTrue(op.exists(las_filename)) for la4ice_filename in self.runner.la4ice_filenames: print "Checking existance of " + la4ice_filename self.assertTrue(op.exists(la4ice_filename)) # Run locally self.runner.use_sge = False mknewdir(self.out_dir) self.runner.run(output_dir=self.out_dir) for las_filename in self.runner.las_filenames: print "Checking existance of " + las_filename self.assertTrue(op.exists(las_filename)) for la4ice_filename in self.runner.la4ice_filenames: print "Checking existance of " + la4ice_filename self.assertTrue(op.exists(la4ice_filename)) # clean all output self.runner.clean_run() for las_filename in self.runner.las_filenames: print "Checking %s has been removed.\n" % las_filename self.assertTrue(not op.exists(las_filename)) for la4ice_filename in self.runner.la4ice_filenames: print "Checking %s has been removed.\n" % la4ice_filename self.assertTrue(not op.exists(la4ice_filename))
def run(self): """ Call DalignerRunner """ logging.info("Running {f} v{v}.".format(f=op.basename(__file__), v=self.getVersion())) args = self.args mkdir(args.output_dir) sge_opts = SgeOptions(unique_id=args.unique_id, use_sge=args.use_sge, max_sge_jobs=args.max_sge_jobs, blasr_nproc=args.blasr_nproc, sge_env_name=args.sge_env_name, sge_queue=args.sge_queue) obj = DalignerRunner(query_filename=args.query_fasta, target_filename=args.target_fasta, is_FL=args.is_FL, same_strand_only=args.same_strand_only, query_converted=False, target_converted=False, use_sge=args.use_sge, sge_opts=sge_opts) obj.run(output_dir=args.output_dir)
def run(self): """Run""" logging.info("Running {f} v{v}.".format(f=op.basename(__file__), v=self.getVersion())) args = self.args sge_opts = SgeOptions(unique_id=args.unique_id, use_sge=args.use_sge, max_sge_jobs=args.max_sge_jobs, quiver_nproc=args.quiver_nproc, blasr_nproc=args.blasr_nproc, sge_env_name=args.sge_env_name, sge_queue=args.sge_queue) ipq_opts = IceQuiverHQLQOptions( hq_isoforms_fa=args.hq_isoforms_fa, hq_isoforms_fq=args.hq_isoforms_fq, lq_isoforms_fa=args.lq_isoforms_fa, lq_isoforms_fq=args.lq_isoforms_fq, qv_trim_5=args.qv_trim_5, qv_trim_3=args.qv_trim_3, hq_quiver_min_accuracy=args.hq_quiver_min_accuracy) try: obj = Polish(root_dir=args.root_dir, nfl_fa=args.nfl_fa, bas_fofn=args.bas_fofn, ccs_fofn=args.ccs_fofn, sge_opts=sge_opts, ice_opts=IceOptions(), ipq_opts=ipq_opts, tmp_dir=args.tmp_dir) obj.run() except Exception as e: logging.error(str(e)) import traceback traceback.print_exc() return 1 return 0
def test_sge_job_runner(self): """Test sge_job_runner""" cmds = ["sleep 5", "sleep 5", "sleep 5", "sleep 5"] script_files = [ op.join(self.out_dir, "test_sge_job_runner_%s.sh" % i) for i in range(0, len(cmds)) ] #done_script = op.join(self.out_dir, "test_sge_job_runner.done.sh") #done_file = op.join(self.out_dir, "test_sge_job_runner.done") delete_files = script_files #+ [done_script, done_file] for f in delete_files: backticks('rm %s' % f) sge_opts = SgeOptions(100) #write_cmd_to_script(cmd="echo 'done' > %s" % done_file, # script=done_script) jids = sge_job_runner( cmds, script_files=script_files, #done_script=done_script, num_threads_per_job=1, sge_opts=sge_opts, qsub_try_times=1)
def run(self): """Run classify, cluster, polish or subset.""" cmd = self._subCommand try: if cmd == 'classify': opts = ChimeraDetectionOptions( min_seq_len=self.args.min_seq_len, min_score=self.args.min_score, min_dist_from_end=self.args.min_dist_from_end, max_adjacent_hit_dist=self.args.max_adjacent_hit_dist, primer_search_window=self.args.primer_search_window, detect_chimera_nfl=self.args.detect_chimera_nfl) obj = Classifier( reads_fn=self.args.readsFN, out_dir=self.args.outDir, out_reads_fn=self.args.outReadsFN, primer_fn=self.args.primerFN, primer_report_fn=self.args.primerReportFN, summary_fn=self.args.summary_fn, cpus=self.args.cpus, change_read_id=True, opts=opts, out_flnc_fn=self.args.flnc_fa, out_nfl_fn=self.args.nfl_fa, ignore_polyA=self.args.ignore_polyA, reuse_dom=self.args.reuse_dom, ignore_empty_output=self.args.ignore_empty_output) obj.run() elif cmd == 'cluster': ice_opts = IceOptions( quiver=self.args.quiver, use_finer_qv=self.args.use_finer_qv, targeted_isoseq=self.args.targeted_isoseq, flnc_reads_per_split=self.args.flnc_reads_per_split, nfl_reads_per_split=self.args.nfl_reads_per_split, num_clusters_per_bin=self.args.num_clusters_per_bin) sge_opts = SgeOptions(unique_id=self.args.unique_id, use_sge=self.args.use_sge, max_sge_jobs=self.args.max_sge_jobs, blasr_nproc=self.args.blasr_nproc, quiver_nproc=self.args.quiver_nproc, sge_queue=self.args.sge_queue, sge_env_name=self.args.sge_env_name) ipq_opts = IceQuiverHQLQOptions( qv_trim_5=self.args.qv_trim_5, qv_trim_3=self.args.qv_trim_3, hq_quiver_min_accuracy=self.args.hq_quiver_min_accuracy, hq_isoforms_fa=self.args.hq_isoforms_fa, hq_isoforms_fq=self.args.hq_isoforms_fq, lq_isoforms_fa=self.args.lq_isoforms_fa, lq_isoforms_fq=self.args.lq_isoforms_fq) obj = Cluster(root_dir=self.args.root_dir, flnc_fa=self.args.flnc_fa, nfl_fa=self.args.nfl_fa, bas_fofn=self.args.bas_fofn, ccs_fofn=self.args.ccs_fofn, out_fa=self.args.consensusFa, sge_opts=sge_opts, ice_opts=ice_opts, ipq_opts=ipq_opts, report_fn=self.args.report_fn, summary_fn=self.args.summary_fn, output_pickle_file=self.args.pickle_fn, tmp_dir=self.args.tmp_dir) obj.run() elif cmd == 'subset': rules = SubsetRules(FL=self.args.FL, nonChimeric=self.args.nonChimeric) obj = ReadsSubsetExtractor( inFN=self.args.readsFN, outFN=self.args.outFN, rules=rules, ignore_polyA=self.args.ignore_polyA, printReadLengthOnly=self.args.printReadLengthOnly) obj.run() else: raise PBTranscriptException( cmd, "Unknown command passed to pbtranscript:" + self.args.subName) except Exception: logging.exception("Exiting pbtranscript with return code 1.") return 1 return 0
def run(self): """Execute ice_quiver.py all|i|merge|postprocess.""" cmd = self.args.subCommand logging.info("Running {f} {cmd} v{v}.".format(f=op.basename(__file__), cmd=cmd, v=get_version())) cmd_str = "" try: args = self.args obj = None if cmd == "all": sge_opts = SgeOptions(unique_id=args.unique_id, use_sge=args.use_sge, max_sge_jobs=args.max_sge_jobs, blasr_nproc=args.blasr_nproc, quiver_nproc=args.quiver_nproc) ipq_opts = IceQuiverHQLQOptions( hq_isoforms_fa=args.hq_isoforms_fa, hq_isoforms_fq=args.hq_isoforms_fq, lq_isoforms_fa=args.lq_isoforms_fa, lq_isoforms_fq=args.lq_isoforms_fq, qv_trim_5=args.qv_trim_5, qv_trim_3=args.qv_trim_3, hq_quiver_min_accuracy=args.hq_quiver_min_accuracy) obj = IceQuiverAll(root_dir=args.root_dir, bas_fofn=args.bas_fofn, fasta_fofn=None, sge_opts=sge_opts, ipq_opts=ipq_opts, tmp_dir=args.tmp_dir) elif cmd == "i": sge_opts = SgeOptions(unique_id=args.unique_id, use_sge=args.use_sge, max_sge_jobs=args.max_sge_jobs, blasr_nproc=args.blasr_nproc, quiver_nproc=args.quiver_nproc) obj = IceQuiverI(root_dir=args.root_dir, i=args.i, N=args.N, bas_fofn=args.bas_fofn, fasta_fofn=None, sge_opts=sge_opts, tmp_dir=args.tmp_dir) elif cmd == "merge": obj = IceQuiverMerge(root_dir=args.root_dir, N=args.N) elif cmd == "postprocess": ipq_opts = IceQuiverHQLQOptions( hq_isoforms_fa=args.hq_isoforms_fa, hq_isoforms_fq=args.hq_isoforms_fq, lq_isoforms_fa=args.lq_isoforms_fa, lq_isoforms_fq=args.lq_isoforms_fq, qv_trim_5=args.qv_trim_5, qv_trim_3=args.qv_trim_3, hq_quiver_min_accuracy=args.hq_quiver_min_accuracy) obj = IceQuiverPostprocess(root_dir=args.root_dir, ipq_opts=ipq_opts, use_sge=args.use_sge, quit_if_not_done=args.quit_if_not_done, summary_fn=args.summary_fn, report_fn=args.report_fn) else: raise ValueError("Unknown command passed to {f}: {cmd}.". format(f=op.basename(__file__), cmd=cmd)) cmd_str = obj.cmd_str() logging.info("Running CMD: {cmd_str}".format(cmd_str=cmd_str)) obj.run() except: logging.exception("Exiting {cmd_str} with return code 1.". format(cmd_str=cmd_str)) return 1 return 0
def args_runner(args): """args runner""" logging.info("%s arguments are:\n%s\n", __file__, args) # sanity check arguments _sanity_check_args(args) # make option objects ice_opts = IceOptions(quiver=args.quiver, use_finer_qv=args.use_finer_qv, targeted_isoseq=args.targeted_isoseq, ece_penalty=args.ece_penalty, ece_min_len=args.ece_min_len, flnc_reads_per_split=args.flnc_reads_per_split, nfl_reads_per_split=args.nfl_reads_per_split) sge_opts = SgeOptions(unique_id=args.unique_id, use_sge=args.use_sge, max_sge_jobs=args.max_sge_jobs, blasr_nproc=args.blasr_nproc, quiver_nproc=args.quiver_nproc, gcon_nproc=args.gcon_nproc, sge_env_name=args.sge_env_name, sge_queue=args.sge_queue) ipq_opts = IceQuiverHQLQOptions( qv_trim_5=args.qv_trim_5, qv_trim_3=args.qv_trim_3, hq_quiver_min_accuracy=args.hq_quiver_min_accuracy) # (1) separate flnc reads into bins logging.info("Separating FLNC reads into bins.") tofu_f = TofuFiles(tofu_dir=args.tofu_dir) s = SeparateFLNCRunner(flnc_fa=args.flnc_fa, root_dir=args.tofu_dir, out_pickle=tofu_f.separate_flnc_pickle, bin_size_kb=args.bin_size_kb, bin_by_primer=args.bin_by_primer, bin_manual=args.bin_manual, max_base_limit_MB=args.max_base_limit_MB) s.run() flnc_files = SeparateFLNCBase.convert_pickle_to_sorted_flnc_files( tofu_f.separate_flnc_pickle) logging.info("Separated FLNC reads bins are %s", flnc_files) # (2) apply 'pbtranscript cluster' to each bin # run ICE/Quiver (the whole thing), providing the fasta_fofn logging.info("Running ICE/Polish on separated FLNC reads bins.") split_dirs = [] for flnc_file in flnc_files: split_dir = op.join(realpath(op.dirname(flnc_file)), "cluster_out") mkdir(split_dir) split_dirs.append(split_dir) cur_out_cons = op.join(split_dir, "consensus_isoforms.fasta") ipq_f = IceQuiverPostprocess(root_dir=split_dir, ipq_opts=ipq_opts) if op.exists(ipq_f.quivered_good_fq): logging.warning("HQ polished isoforms %s already exist. SKIP!", ipq_f.quivered_good_fq) continue else: logging.info("Running ICE/Quiver on %s", split_dir) rmpath(cur_out_cons) obj = Cluster(root_dir=split_dir, flnc_fa=flnc_file, nfl_fa=args.nfl_fa, bas_fofn=args.bas_fofn, ccs_fofn=args.ccs_fofn, fasta_fofn=args.fasta_fofn, out_fa=cur_out_cons, sge_opts=sge_opts, ice_opts=ice_opts, ipq_opts=ipq_opts) if args.mem_debug: # DEBUG from memory_profiler import memory_usage start_t = time.time() mem_usage = memory_usage(obj.run, interval=60) end_t = time.time() with open('mem_debug.log', 'a') as f: f.write("Running ICE/Quiver on {0} took {1} secs.\n".format( split_dir, end_t - start_t)) f.write("Maximum memory usage: {0}\n".format(max(mem_usage))) f.write("Memory usage: {0}\n".format(mem_usage)) else: obj.run() if not args.keep_tmp_files: # by deafult, delete all tempory files. logging.info("Deleting %s", ipq_f.tmp_dir) subprocess.Popen(['rm', '-rf', '%s' % ipq_f.tmp_dir]) logging.info("Deleting %s", ipq_f.quivered_dir) subprocess.Popen(['rm', '-rf', '%s' % ipq_f.quivered_dir]) # (3) merge polished isoform cluster from all bins logging.info("Merging isoforms from all bins to %s.", tofu_f.combined_dir) c = CombineRunner(combined_dir=tofu_f.combined_dir, sample_name=get_sample_name(args.sample_name), split_dirs=split_dirs, ipq_opts=ipq_opts) c.run() if args.summary_fn is not None: ln(tofu_f.all_cluster_summary_fn, args.summary_fn) if args.report_fn is not None: ln(tofu_f.all_cluster_report_fn, args.report_fn) # (4) map HQ isoforms to GMAP reference genome map_isoforms_and_sort(input_filename=tofu_f.all_hq_fq, sam_filename=tofu_f.sorted_gmap_sam, gmap_db_dir=args.gmap_db, gmap_db_name=args.gmap_name, gmap_nproc=args.gmap_nproc) # (5) post mapping to genome analysis, including # * collapse polished HQ isoform clusters into groups # * count abundance of collapsed isoform groups # * filter collapsed isoforms based on abundance info logging.info("Post mapping to genome analysis.") out_isoforms = args.collapsed_filtered_fn if any(out_isoforms.endswith(ext) for ext in (".fa", ".fasta")): in_isoforms = tofu_f.all_hq_fa elif any(out_isoforms.endswith(ext) for ext in (".fq", ".fastq")): in_isoforms = tofu_f.all_hq_fq else: raise ValueError("Output file %s must be FASTA or FASTQ!" % out_isoforms) post_mapping_to_genome_runner(in_isoforms=in_isoforms, in_sam=tofu_f.sorted_gmap_sam, in_pickle=tofu_f.hq_lq_prefix_dict_pickle, out_isoforms=args.collapsed_filtered_fn, out_gff=args.gff_fn, out_abundance=args.abundance_fn, out_group=args.group_fn, out_read_stat=args.read_stat_fn, min_aln_coverage=args.min_aln_coverage, min_aln_identity=args.min_aln_identity, min_flnc_coverage=args.min_flnc_coverage, max_fuzzy_junction=args.max_fuzzy_junction, allow_extra_5exon=args.allow_extra_5exon, min_count=args.min_count) return 0
def test_sanity_check_sge(self): """sanity_check_sge.""" self.assertTrue(IceUtils.sanity_check_sge(SgeOptions(100), self.outDir))
def tes_daligner_against_ref_use_sge(self): """Test daligner_against_ref() using fake prob model on sge.""" test_name = "test_daligner_against_ref_use_sge" self._test_daligner_against_ref(test_name=test_name, use_sge=True, sge_opts=SgeOptions())