def run(self): """Execute ice_partial.py all|split|i|merge.""" cmd = self.args.subCommand logging.info("Running {f} {cmd} v{v}.".format(f=op.basename(__file__), cmd=cmd, v=get_version())) cmd_str = "" try: args = self.args obj = None if cmd == "all": sge_opts = SgeOptions(unique_id=args.unique_id, use_sge=args.use_sge, max_sge_jobs=args.max_sge_jobs, blasr_nproc=args.blasr_nproc) obj = IceAllPartials( root_dir=args.root_dir, fasta_filenames=args.fasta_filenames.split(','), ref_fasta=args.ref_fasta, out_pickle=args.out_pickle, sge_opts=sge_opts, sa_file=args.sa_file, ccs_fofn=args.ccs_fofn) elif cmd == "one": # Only assign nfl reads in the given input_fasta file to isoforms obj = IcePartialOne(input_fasta=args.input_fasta, ref_fasta=args.ref_fasta, out_pickle=args.out_pickle, sa_file=args.sa_file, ccs_fofn=args.ccs_fofn, done_filename=args.done_filename, blasr_nproc=args.blasr_nproc, use_finer_qv=args.use_finer_qv) elif cmd == "split": obj = IcePartialSplit(root_dir=args.root_dir, nfl_fa=args.nfl_fa, N=args.N) elif cmd == "i": obj = IcePartialI(root_dir=args.root_dir, i=args.i, ccs_fofn=args.ccs_fofn, blasr_nproc=args.blasr_nproc) elif cmd == "merge": obj = IcePartialMerge(root_dir=args.root_dir, N=args.N) else: raise ValueError( "Unknown command passed to {f}: {cmd}.".format( f=op.basename(__file__), cmd=cmd)) cmd_str = obj.cmd_str() logging.info("Running CMD: {cmd_str}".format(cmd_str=cmd_str)) obj.run() except: logging.exception("Exiting {cmd_str} with return code 1.".format( cmd_str=cmd_str)) return 1 return 0
def run(self): """ First, split non-full-length (nfl) fasta files into smaller chunks, assign nfl reads in each splitted fasta file into unpolished isoform clusters and then merge all pickles into self.nfl_all_pickle_fn. Second, bin every 100 clusters, for each bin, call blasr, samto5h, loadPulses, cmph5tools to create cmp.h5 files and call quiver to polish each isoforms within each bin. Finally, pick up good isoform clusters whose QV errors is less than a threshold. Save all high quality isoforms to hq_isoforms_fa|fq if they are not None Save all low quality isoforms to lq_isoforms_fa|fq if they are not None """ # Create final.consensus.fa.sa self.add_log("Generating suffix array for {f}".format( f=self.final_consensus_sa), level=logging.INFO) sa_file = self.get_sa_file() # Create input.fasta.fofn from bas_fofn self.add_log("Creating fasta fofn from bas/bax.h5 fofn", level=logging.INFO) if self.fasta_fofn is None: self.fasta_fofn = op.join(self.nfl_dir, "input.fasta.fofn") self.add_log("bas fofn={f}".format(f=self.bas_fofn)) self.add_log("fasta fofn={f}".format(f=self.fasta_fofn)) convert_fofn_to_fasta(fofn_filename=self.bas_fofn, out_filename=self.fasta_fofn, fasta_out_dir=self.nfl_dir) # Split non-full-length reads into smaller fasta files # and save files to root_dir/nfl_00.fa, ..., . self.add_log("Splitting {nfl} into ".format(nfl=self.nfl_fa) + "smaller files each containing {n} reads.".format( n=self.ice_opts.nfl_reads_per_split), level=logging.INFO) self._nfl_splitted_fas = splitFasta( input_fasta=self.nfl_fa, reads_per_split=self.ice_opts.nfl_reads_per_split, out_dir=self.nfl_dir, out_prefix="input.split") msg = "Splitted files are: " + "\n".join(self._nfl_splitted_fas) self.add_log(msg, level=logging.INFO) # Process nfl reads in each splitted fasta. self.add_log("IceAllPartials initiated.", level=logging.INFO) sa_file = self.final_consensus_sa \ if op.exists(self.final_consensus_fa) else None self.icep = IceAllPartials(root_dir=self.root_dir, fasta_filenames=self._nfl_splitted_fas, ref_fasta=self.final_consensus_fa, out_pickle=self.nfl_all_pickle_fn, sge_opts=self.sge_opts, sa_file=sa_file, ccs_fofn=self.ccs_fofn) self.icep.run() self.add_log("IceAllPartials completed.", level=logging.INFO) self.add_log("IceQuiver initiated.", level=logging.INFO) self.iceq = IceQuiver(root_dir=self.root_dir, bas_fofn=self.bas_fofn, fasta_fofn=self.fasta_fofn, sge_opts=self.sge_opts) self.iceq.run() self.add_log("IceQuiver finished.", level=logging.INFO) self.add_log("IcePostQuiver initiated.", level=logging.INFO) self.icepq = IcePostQuiver(root_dir=self.root_dir, hq_isoforms_fa=self.hq_isoforms_fa, hq_isoforms_fq=self.hq_isoforms_fq, lq_isoforms_fa=self.lq_isoforms_fa, lq_isoforms_fq=self.lq_isoforms_fq, use_sge=self.sge_opts.use_sge, quit_if_not_done=False) self.icepq.run() self.add_log("IcePostQuiver finished.", level=logging.INFO)
def run(self): """ First, split non-full-length (nfl) fasta files into smaller chunks, assign nfl reads in each splitted fasta file into unpolished isoform clusters and then merge all pickles into self.nfl_all_pickle_fn. Second, bin every 100 clusters, for each bin, call blasr, samto5h, loadPulses, cmph5tools to create cmp.h5 files and call quiver to polish each isoforms within each bin. Finally, pick up good isoform clusters whose QV errors is less than a threshold. Save all high quality isoforms to hq_isoforms_fa|fq if they are not None Save all low quality isoforms to lq_isoforms_fa|fq if they are not None """ # Create final.consensus.fa.sa #self.add_log("Generating suffix array for {f}".format( # f=self.final_consensus_sa), level=logging.INFO) #sa_file = self.get_sa_file() # Create input.fasta.fofn from bas_fofn self.add_log("Creating fasta fofn from bas/bax.h5 fofn", level=logging.INFO) if self.fasta_fofn is None: self.fasta_fofn = op.join(self.nfl_dir, "input.fasta.fofn") self.add_log("bas fofn={f}".format(f=self.bas_fofn)) self.add_log("fasta fofn={f}".format(f=self.fasta_fofn)) if op.exists(self.fasta_fofn): self.add_log("No need to run convert_fofn_to_fasta.") else: convert_fofn_to_fasta(fofn_filename=self.bas_fofn, out_filename=self.fasta_fofn, fasta_out_dir=self.nfl_dir, cpus=self.sge_opts.blasr_nproc) # Split non-full-length reads into smaller fasta files # and save files to root_dir/nfl_00.fa, ..., . self.add_log("Splitting {nfl} into ".format(nfl=self.nfl_fa) + "smaller files each containing {n} reads.".format( n=self.nfl_reads_per_split), level=logging.INFO) self._nfl_splitted_fas = splitFasta(input_fasta=self.nfl_fa, reads_per_split=self.nfl_reads_per_split, out_dir=self.nfl_dir, out_prefix="input.split") msg = "Splitted files are: " + "\n".join(self._nfl_splitted_fas) self.add_log(msg, level=logging.INFO) # Generating dazz DB for final.consensus.fasta ref_obj = DazzIDHandler(self.final_consensus_fa, False) DalignerRunner.make_db(ref_obj.dazz_filename) msg = "Dazz DB made for: " + ref_obj.dazz_filename self.add_log(msg, level=logging.INFO) # Process nfl reads in each splitted fasta. self.add_log("Initializing IceAllPartials.", level=logging.INFO) #sa_file = self.final_consensus_sa \ # if op.exists(self.final_consensus_fa) else None self.icep = IceAllPartials( root_dir=self.root_dir, fasta_filenames=self._nfl_splitted_fas, ref_fasta=self.final_consensus_fa, out_pickle=self.nfl_all_pickle_fn, sge_opts=self.sge_opts, sa_file=None, # since we are switching to daligner, just give it as None now; remove sa_file completely later when daligner is mature (ToDo) ccs_fofn=self.ccs_fofn) self.add_log("IceAllPartials log: {f}.".format(f=self.icep.log_fn), level=logging.INFO) self.icep.run() self.add_log("IceAllPartials completed.", level=logging.INFO) self.add_log("Initializing IceQuiver.", level=logging.INFO) self.iceq = IceQuiver(root_dir=self.root_dir, bas_fofn=self.bas_fofn, fasta_fofn=self.fasta_fofn, sge_opts=self.sge_opts) self.add_log("IceQuiver log: {f}.".format(f=self.iceq.log_fn), level=logging.INFO) self.iceq.run() self.add_log("IceQuiver finished.", level=logging.INFO) self.add_log("Initializing IceQuiverPostprocess.", level=logging.INFO) self.icepq = IceQuiverPostprocess(root_dir=self.root_dir, use_sge=self.sge_opts.use_sge, quit_if_not_done=False, ipq_opts=self.ipq_opts) self.add_log("IceQuiverPostprocess log: {f}.". format(f=self.icepq.log_fn), level=logging.INFO) self.icepq.run() self.add_log("IceQuiverPostprocess finished.", level=logging.INFO)