def tofu_wrap_main(): parser = argparse.ArgumentParser() add_cluster_arguments(parser) parser.add_argument("--bin_size_kb", default=1, type=int, help="Bin size by kb (default: 1)") parser.add_argument("--bin_manual", default=None, help="Bin manual (ex: (1,2,3,5)), overwrites bin_size_kb") parser.add_argument("--bin_by_primer", default=False, action="store_true", help="Instead of binning by size, bin by primer (overwrites --bin_size_kb and --bin_manual)") parser.add_argument("--max_base_limit_MB", default=600, type=int, help="Maximum number of bases per partitioned bin, in MB (default: 600)") parser.add_argument("--gmap_name", default="hg19", help="GMAP DB name (default: hg19)") parser.add_argument("--gmap_db", default="/home/UNIXHOME/etseng/share/gmap_db_new/", help="GMAP DB location (default: /home/UNIXHOME/etseng/share/gmap_db_new/)") parser.add_argument("--output_seqid_prefix", type=str, default=None, help="Output seqid prefix. If not given, a random ID is generated") parser.add_argument("--mem_debug", default=False, action="store_true", help=argparse.SUPPRESS) args = parser.parse_args() # DEBUG if args.mem_debug: from memory_profiler import memory_usage # ################################################################# # SANITY CHECKS if not args.quiver: print >> sys.stderr, "--quiver must be turned on for tofu_wrap. Quit." sys.exit(-1) if args.nfl_fa is None: print >> sys.stderr, "--nfl_fa must be provided for tofu_wrap. Quit." sys.exit(-1) if not os.path.exists(args.gmap_db): print >> sys.stderr, "GMAP DB location not valid: {0}. Quit.".format(args.gmap_db) sys.exit(-1) if not os.path.exists(os.path.join(args.gmap_db, args.gmap_name)): print >> sys.stderr, "GMAP name not valid: {0}. Quit.".format(args.gmap_name) sys.exit(-1) # ################################################################# tofu_prefix = binascii.b2a_hex(os.urandom(3)) if args.output_seqid_prefix is None else output_seqid_prefix ice_opts = IceOptions(cDNA_size=args.cDNA_size, quiver=args.quiver) sge_opts = SgeOptions(unique_id=args.unique_id, use_sge=args.use_sge, max_sge_jobs=args.max_sge_jobs, blasr_nproc=args.blasr_nproc, quiver_nproc=args.quiver_nproc) ipq_opts = IceQuiverHQLQOptions(qv_trim_5=args.qv_trim_5, qv_trim_3=args.qv_trim_3, hq_quiver_min_accuracy=args.hq_quiver_min_accuracy, hq_isoforms_fa=args.hq_isoforms_fa, hq_isoforms_fq=args.hq_isoforms_fq, lq_isoforms_fa=args.lq_isoforms_fa, lq_isoforms_fq=args.lq_isoforms_fq) # ex: all_quivered_hq.100_30_0.99.fastq quiver_hq_filename = "all_quivered_hq.{0}_{1}_{2:.2f}.fastq".format(\ args.qv_trim_5,args.qv_trim_3,args.hq_quiver_min_accuracy) quiver_lq_filename = "all_quivered_lq.fastq" # (1) separate input flnc into size bins or primers if args.bin_by_primer: split_files = sep_flnc_by_primer(args.flnc_fa, args.root_dir) else: bin_manual = eval(args.bin_manual) if args.bin_manual is not None else None split_files = sep_flnc_by_size(args.flnc_fa, args.root_dir, bin_size_kb=args.bin_size_kb, bin_manual=bin_manual, max_base_limit_MB=args.max_base_limit_MB) print >> sys.stderr, "split input {0} into {1} bins".format(args.flnc_fa, len(split_files)) # (2) if fasta_fofn already is there, use it; otherwise make it first if args.quiver and args.fasta_fofn is None: print >> sys.stderr, "Making fasta_fofn now" nfl_dir = os.path.abspath(os.path.join(args.root_dir, "fasta_fofn_files")) if not os.path.exists(nfl_dir): os.makedirs(nfl_dir) args.fasta_fofn = os.path.join(nfl_dir, 'input.fasta.fofn') print >> sys.stderr, "fasta_fofn", args.fasta_fofn print >> sys.stderr, "nfl_dir", nfl_dir convert_fofn_to_fasta(fofn_filename=args.bas_fofn, out_filename=args.fasta_fofn, fasta_out_dir=nfl_dir, cpus=args.blasr_nproc) else: if not os.path.exists(args.fasta_fofn): raise Exception, "fasta_fofn {0} does not exist!".format(args.fasta_fofn) for line in open(args.fasta_fofn): file = line.strip() if len(file) > 0 and not os.path.exists(file): raise Exception, "File {0} does not exists in {1}".format(file, args.fasta_fofn) # (3) run ICE/Quiver (the whole thing), providing the fasta_fofn split_dirs = [] for cur_file in split_files: cur_dir = os.path.dirname(cur_file) split_dirs.append(cur_dir) cur_out_cons = os.path.join(cur_dir, args.consensusFa) hq_quiver = os.path.join(cur_dir, quiver_hq_filename) if os.path.exists(hq_quiver): print >> sys.stderr, "{0} already exists. SKIP!".format(hq_quiver) continue print >> sys.stderr, "running ICE/Quiver on", cur_dir start_t = time.time() obj = Cluster(root_dir=cur_dir, flnc_fa=cur_file, nfl_fa=args.nfl_fa, bas_fofn=args.bas_fofn, ccs_fofn=args.ccs_fofn, fasta_fofn=args.fasta_fofn, out_fa=cur_out_cons, sge_opts=sge_opts, ice_opts=ice_opts, ipq_opts=ipq_opts, report_fn=args.report_fn, summary_fn=args.summary_fn, nfl_reads_per_split=args.nfl_reads_per_split) # DEBUG if args.mem_debug: mem_usage = memory_usage(obj.run, interval=60) end_t = time.time() with open('mem_debug.log', 'a') as f: f.write("Running ICE/Quiver on {0} took {1} secs.\n".format(cur_dir, end_t-start_t)) f.write("Maximum memory usage: {0}\n".format(max(mem_usage))) f.write("Memory usage: {0}\n".format(mem_usage)) else: obj.run() combined_dir = os.path.join(args.root_dir, 'combined') if not os.path.exists(combined_dir): os.makedirs(combined_dir) # (4) combine quivered HQ/LQ results hq_filename, lq_filename, hq_pre_dict, lq_pre_dict = \ combine_quiver_results(split_dirs, combined_dir, quiver_hq_filename, quiver_lq_filename,\ tofu_prefix) with open(os.path.join(args.root_dir, 'combined', 'combined.hq_lq_pre_dict.pickle'), 'w') as f: dump({'HQ': hq_pre_dict, 'LQ': lq_pre_dict}, f) # (5) collapse quivered HQ results collapse_prefix_hq = run_collapse_sam(hq_filename, args.gmap_db, args.gmap_name, cpus=args.blasr_nproc) # (6) make abundance get_abundance(collapse_prefix_hq, hq_pre_dict, collapse_prefix_hq)
def tofu_wrap_main(): parser = argparse.ArgumentParser(prog='tofu_wrap') add_cluster_arguments(parser, show_sge_env_name=True, show_sge_queue=True) parser.add_argument("--bin_size_kb", default=1, type=int, help="Bin size by kb (default: 1)") parser.add_argument("--bin_manual", default=None, help="Bin manual (ex: (1,2,3,5)), overwrites bin_size_kb") parser.add_argument("--bin_by_primer", default=False, action="store_true", help="Instead of binning by size, bin by primer (overwrites --bin_size_kb and --bin_manual)") parser.add_argument("--max_base_limit_MB", default=600, type=int, help="Maximum number of bases per partitioned bin, in MB (default: 600)") parser.add_argument("--gmap_name", default="hg19", help="GMAP DB name (default: hg19)") parser.add_argument("--gmap_db", default="/home/UNIXHOME/etseng/share/gmap_db_new/", help="GMAP DB location (default: /home/UNIXHOME/etseng/share/gmap_db_new/)") parser.add_argument("--output_seqid_prefix", type=str, default=None, help="Output seqid prefix. If not given, a random ID is generated") parser.add_argument("--mem_debug", default=False, action="store_true", help=argparse.SUPPRESS) parser.add_argument("--max_fuzzy_junction", default=5, type=int, help="Max fuzzy junction (default: 5 bp)") parser.add_argument("--version", action='version', version='%(prog)s ' + str(get_version())) args = parser.parse_args() # PRINT VERSION AND EXIT # if args.version: # print >> sys.stderr, get_version() # sys.exit(0) # DEBUG if args.mem_debug: from memory_profiler import memory_usage # ################################################################# # SANITY CHECKS if not args.quiver: print >> sys.stderr, "--quiver must be turned on for tofu_wrap. Quit." sys.exit(-1) if args.nfl_fa is None: print >> sys.stderr, "--nfl_fa must be provided for tofu_wrap. Quit." sys.exit(-1) if not os.path.exists(args.gmap_db): print >> sys.stderr, "GMAP DB location not valid: {0}. Quit.".format(args.gmap_db) sys.exit(-1) if not os.path.exists(os.path.join(args.gmap_db, args.gmap_name)): print >> sys.stderr, "GMAP name not valid: {0}. Quit.".format(args.gmap_name) sys.exit(-1) # ################################################################# tofu_prefix = binascii.b2a_hex(os.urandom(3)) if args.output_seqid_prefix is None else args.output_seqid_prefix ice_opts = IceOptions(quiver=args.quiver, use_finer_qv=args.use_finer_qv, targeted_isoseq=args.targeted_isoseq, ece_penalty=args.ece_penalty, ece_min_len=args.ece_min_len, ) sge_opts = SgeOptions(unique_id=args.unique_id, use_sge=args.use_sge, max_sge_jobs=args.max_sge_jobs, blasr_nproc=args.blasr_nproc, quiver_nproc=args.quiver_nproc, gcon_nproc=args.gcon_nproc, sge_env_name=args.sge_env_name, sge_queue=args.sge_queue) ipq_opts = IceQuiverHQLQOptions(qv_trim_5=args.qv_trim_5, qv_trim_3=args.qv_trim_3, hq_quiver_min_accuracy=args.hq_quiver_min_accuracy, hq_isoforms_fa=args.hq_isoforms_fa, hq_isoforms_fq=args.hq_isoforms_fq, lq_isoforms_fa=args.lq_isoforms_fa, lq_isoforms_fq=args.lq_isoforms_fq) # ex: all_quivered_hq.100_30_0.99.fastq quiver_hq_filename = "all_quivered_hq.{0}_{1}_{2:.2f}.fastq".format(\ args.qv_trim_5,args.qv_trim_3,args.hq_quiver_min_accuracy) quiver_lq_filename = "all_quivered_lq.fastq" # (1) separate input flnc into size bins or primers if args.bin_by_primer: split_files = sep_flnc_by_primer(args.flnc_fa, os.path.abspath(args.root_dir)) else: bin_manual = eval(args.bin_manual) if args.bin_manual is not None else None split_files = sep_flnc_by_size(args.flnc_fa, args.root_dir, bin_size_kb=args.bin_size_kb, bin_manual=bin_manual, max_base_limit_MB=args.max_base_limit_MB) print >> sys.stderr, "split input {0} into {1} bins".format(args.flnc_fa, len(split_files)) # (2) if fasta_fofn already is there, use it; otherwise make it first if args.quiver and args.fasta_fofn is None: print >> sys.stderr, "Making fasta_fofn now" nfl_dir = os.path.abspath(os.path.join(args.root_dir, "fasta_fofn_files")) if not os.path.exists(nfl_dir): os.makedirs(nfl_dir) args.fasta_fofn = os.path.join(nfl_dir, 'input.fasta.fofn') print >> sys.stderr, "fasta_fofn", args.fasta_fofn print >> sys.stderr, "nfl_dir", nfl_dir convert_fofn_to_fasta(fofn_filename=args.bas_fofn, out_filename=args.fasta_fofn, fasta_out_dir=nfl_dir, cpus=args.blasr_nproc) else: if not os.path.exists(args.fasta_fofn): raise Exception, "fasta_fofn {0} does not exist!".format(args.fasta_fofn) for line in open(args.fasta_fofn): file = line.strip() if len(file) > 0 and not os.path.exists(file): raise Exception, "File {0} does not exists in {1}".format(file, args.fasta_fofn) # (3) run ICE/Quiver (the whole thing), providing the fasta_fofn split_dirs = [] for cur_file in split_files: cur_dir = os.path.abspath(os.path.dirname(cur_file)) split_dirs.append(cur_dir) cur_out_cons = os.path.join(cur_dir, args.consensusFa) hq_quiver = os.path.join(cur_dir, quiver_hq_filename) if os.path.exists(hq_quiver): print >> sys.stderr, "{0} already exists. SKIP!".format(hq_quiver) continue print >> sys.stderr, "running ICE/Quiver on", cur_dir start_t = time.time() obj = Cluster(root_dir=cur_dir, flnc_fa=cur_file, nfl_fa=realpath(args.nfl_fa), bas_fofn=realpath(args.bas_fofn), ccs_fofn=realpath(args.ccs_fofn), fasta_fofn=realpath(args.fasta_fofn), out_fa=cur_out_cons, sge_opts=sge_opts, ice_opts=ice_opts, ipq_opts=ipq_opts, report_fn=args.report_fn, summary_fn=args.summary_fn, nfl_reads_per_split=args.nfl_reads_per_split) # DEBUG if args.mem_debug: mem_usage = memory_usage(obj.run, interval=60) end_t = time.time() with open('mem_debug.log', 'a') as f: f.write("Running ICE/Quiver on {0} took {1} secs.\n".format(cur_dir, end_t-start_t)) f.write("Maximum memory usage: {0}\n".format(max(mem_usage))) f.write("Memory usage: {0}\n".format(mem_usage)) else: obj.run() combined_dir = os.path.join(args.root_dir, 'combined') if not os.path.exists(combined_dir): os.makedirs(combined_dir) # (4) combine quivered HQ/LQ results hq_filename, lq_filename, hq_pre_dict, lq_pre_dict = \ combine_quiver_results(split_dirs, combined_dir, quiver_hq_filename, quiver_lq_filename,\ tofu_prefix) with open(os.path.join(args.root_dir, 'combined', 'combined.hq_lq_pre_dict.pickle'), 'w') as f: dump({'HQ': hq_pre_dict, 'LQ': lq_pre_dict}, f) # (5) collapse quivered HQ results collapse_prefix_hq = run_collapse_sam(hq_filename, args.gmap_db, args.gmap_name, cpus=args.blasr_nproc, max_fuzzy_junction=args.max_fuzzy_junction, dun_merge_5_shorter=True) # (6) make abundance get_abundance(collapse_prefix_hq, hq_pre_dict, collapse_prefix_hq) # (7) run filtering & removing subsets in no5merge if args.targeted_isoseq: run_filtering_by_count(collapse_prefix_hq, collapse_prefix_hq+'.min_fl_5', min_count=5) run_filtering_away_subsets(collapse_prefix_hq+'.min_fl_5', collapse_prefix_hq+'.min_fl_5.filtered', args.max_fuzzy_junction) else: run_filtering_by_count(collapse_prefix_hq, collapse_prefix_hq+'.min_fl_2', min_count=2) run_filtering_away_subsets(collapse_prefix_hq+'.min_fl_2', collapse_prefix_hq+'.min_fl_2.filtered', args.max_fuzzy_junction)
def tofu_wrap_main(): parser = argparse.ArgumentParser() add_cluster_arguments(parser) parser.add_argument("--bin_size_kb", default=1, type=int, help="Bin size by kb (default: 1)") parser.add_argument("--bin_manual", default=None, help="Bin manual (ex: (1,2,3,5)), overwrites bin_size_kb") parser.add_argument("--bin_by_primer", default=False, action="store_true", help="Instead of binning by size, bin by primer (overwrites --bin_size_kb and --bin_manual)") parser.add_argument("--gmap_name", default="hg19", help="GMAP DB name (default: hg19)") parser.add_argument("--gmap_db", default="/home/UNIXHOME/etseng/share/gmap_db_new/", help="GMAP DB location (default: /home/UNIXHOME/etseng/share/gmap_db_new/)") parser.add_argument("--output_seqid_prefix", type=str, default=None, help="Output seqid prefix. If not given, a random ID is generated") args = parser.parse_args() # ################################################################# # SANITY CHECKS if not args.quiver: print >> sys.stderr, "--quiver must be turned on for tofu_wrap. Quit." sys.exit(-1) if args.nfl_fa is None: print >> sys.stderr, "--nfl_fa must be provided for tofu_wrap. Quit." sys.exit(-1) if not os.path.exists(args.gmap_db): print >> sys.stderr, "GMAP DB location not valid: {0}. Quit.".format(args.gmap_db) sys.exit(-1) if not os.path.exists(os.path.join(args.gmap_db, args.gmap_name)): print >> sys.stderr, "GMAP name not valid: {0}. Quit.".format(args.gmap_name) sys.exit(-1) # ################################################################# tofu_prefix = binascii.b2a_hex(os.urandom(3)) if args.output_seqid_prefix is None else output_seqid_prefix ice_opts = IceOptions(cDNA_size=args.cDNA_size, quiver=args.quiver) sge_opts = SgeOptions(unique_id=args.unique_id, use_sge=args.use_sge, max_sge_jobs=args.max_sge_jobs, blasr_nproc=args.blasr_nproc, quiver_nproc=args.quiver_nproc) ipq_opts = IceQuiverHQLQOptions(qv_trim_5=args.qv_trim_5, qv_trim_3=args.qv_trim_3, hq_quiver_min_accuracy=args.hq_quiver_min_accuracy, hq_isoforms_fa=args.hq_isoforms_fa, hq_isoforms_fq=args.hq_isoforms_fq, lq_isoforms_fa=args.lq_isoforms_fa, lq_isoforms_fq=args.lq_isoforms_fq) # ex: all_quivered_hq.100_30_0.99.fastq quiver_hq_filename = "all_quivered_hq.{0}_{1}_{2:.2f}.fastq".format(\ args.qv_trim_5,args.qv_trim_3,args.hq_quiver_min_accuracy) quiver_lq_filename = "all_quivered_lq.fastq" # (1) separate input flnc into size bins or primers if args.bin_by_primer: split_files = sep_flnc_by_primer(args.flnc_fa, args.root_dir) else: bin_manual = eval(args.bin_manual) if args.bin_manual is not None else None split_files = sep_flnc_by_size(args.flnc_fa, args.root_dir, bin_size_kb=args.bin_size_kb, bin_manual=bin_manual) print >> sys.stderr, "split input {0} into {1} bins".format(args.flnc_fa, len(split_files)) # (2) if fasta_fofn already is there, use it; otherwise make it first if args.quiver and args.fasta_fofn is None: print >> sys.stderr, "Making fasta_fofn now" nfl_dir = os.path.abspath(os.path.join(args.root_dir, "fasta_fofn_files")) if not os.path.exists(nfl_dir): os.makedirs(nfl_dir) args.fasta_fofn = os.path.join(nfl_dir, 'input.fasta.fofn') print >> sys.stderr, "fasta_fofn", args.fasta_fofn print >> sys.stderr, "nfl_dir", nfl_dir convert_fofn_to_fasta(fofn_filename=args.bas_fofn, out_filename=args.fasta_fofn, fasta_out_dir=nfl_dir, cpus=args.blasr_nproc) else: if not os.path.exists(args.fasta_fofn): raise Exception, "fasta_fofn {0} does not exist!".format(args.fasta_fofn) for line in open(args.fasta_fofn): file = line.strip() if len(file) > 0 and not os.path.exists(file): raise Exception, "File {0} does not exists in {1}".format(file, args.fasta_fofn) # (3) run ICE/Quiver (the whole thing), providing the fasta_fofn split_dirs = [] for cur_file in split_files: cur_dir = os.path.dirname(cur_file) split_dirs.append(cur_dir) cur_out_cons = os.path.join(cur_dir, args.consensusFa) hq_quiver = os.path.join(cur_dir, quiver_hq_filename) if os.path.exists(hq_quiver): print >> sys.stderr, "{0} already exists. SKIP!".format(hq_quiver) continue print >> sys.stderr, "running ICE/Quiver on", cur_dir obj = Cluster(root_dir=cur_dir, flnc_fa=cur_file, nfl_fa=args.nfl_fa, bas_fofn=args.bas_fofn, ccs_fofn=args.ccs_fofn, fasta_fofn=args.fasta_fofn, out_fa=cur_out_cons, sge_opts=sge_opts, ice_opts=ice_opts, ipq_opts=ipq_opts, report_fn=args.report_fn, summary_fn=args.summary_fn, nfl_reads_per_split=args.nfl_reads_per_split) obj.run() combined_dir = os.path.join(args.root_dir, 'combined') if not os.path.exists(combined_dir): os.makedirs(combined_dir) # (4) combine quivered HQ/LQ results hq_filename, lq_filename, hq_pre_dict, lq_pre_dict = \ combine_quiver_results(split_dirs, combined_dir, quiver_hq_filename, quiver_lq_filename, \ prefix=tofu_prefix) with open('combined.hq_lq_pre_dict.pickle', 'w') as f: dump({'HQ': hq_pre_dict, 'LQ': lq_pre_dict}, f) # (5) collapse quivered HQ results collapse_prefix_hq = run_collapse_sam(hq_filename, args.gmap_db, args.gmap_name, cpus=args.blasr_nproc) # (6) make abundance get_abundance(collapse_prefix_hq, hq_pre_dict, collapse_prefix_hq)
def run(self): """Run classify, cluster, polish or subset.""" cmd = self.args.subCommand try: if cmd == 'classify': opts = ChimeraDetectionOptions( min_seq_len=self.args.min_seq_len, min_score=self.args.min_score, min_dist_from_end=self.args.min_dist_from_end, max_adjacent_hit_dist=self.args.max_adjacent_hit_dist, primer_search_window=self.args.primer_search_window, detect_chimera_nfl=self.args.detect_chimera_nfl) obj = Classifier(reads_fn=self.args.readsFN, out_dir=self.args.outDir, out_reads_fn=self.args.outReadsFN, primer_fn=self.args.primerFN, primer_report_fn=self.args.primerReportFN, summary_fn=self.args.summary_fn, cpus=self.args.cpus, change_read_id=True, opts=opts, out_flnc_fn=self.args.flnc_fa, out_nfl_fn=self.args.nfl_fa, ignore_polyA=self.args.ignore_polyA, reuse_dom=self.args.reuse_dom) obj.run() elif cmd == 'cluster': ice_opts = IceOptions(cDNA_size=self.args.cDNA_size, quiver=self.args.quiver, use_finer_qv=self.args.use_finer_qv) sge_opts = SgeOptions(unique_id=self.args.unique_id, use_sge=self.args.use_sge, max_sge_jobs=self.args.max_sge_jobs, blasr_nproc=self.args.blasr_nproc, quiver_nproc=self.args.quiver_nproc) ipq_opts = IceQuiverHQLQOptions(qv_trim_5=self.args.qv_trim_5, qv_trim_3=self.args.qv_trim_3, hq_quiver_min_accuracy=self.args.hq_quiver_min_accuracy, hq_isoforms_fa=self.args.hq_isoforms_fa, hq_isoforms_fq=self.args.hq_isoforms_fq, lq_isoforms_fa=self.args.lq_isoforms_fa, lq_isoforms_fq=self.args.lq_isoforms_fq) obj = Cluster(root_dir=self.args.root_dir, flnc_fa=self.args.flnc_fa, nfl_fa=self.args.nfl_fa, bas_fofn=self.args.bas_fofn, ccs_fofn=self.args.ccs_fofn, fasta_fofn=self.args.fasta_fofn, out_fa=self.args.consensusFa, sge_opts=sge_opts, ice_opts=ice_opts, ipq_opts=ipq_opts, report_fn=self.args.report_fn, summary_fn=self.args.summary_fn, nfl_reads_per_split=self.args.nfl_reads_per_split) obj.run() elif cmd == 'subset': rules = SubsetRules(FL=self.args.FL, nonChimeric=self.args.nonChimeric) obj = ReadsSubsetExtractor(inFN=self.args.readsFN, outFN=self.args.outFN, rules=rules, ignore_polyA=self.args.ignore_polyA, printReadLengthOnly=self.args.printReadLengthOnly) obj.run() else: raise PBTranscriptException(cmd, "Unknown command passed to pbtranscript.py:" + self.args.subName) except Exception: logging.exception("Exiting pbtranscript with return code 1.") return 1 return 0
def run(self): """Run classify, cluster, polish or subset.""" cmd = self.args.subCommand try: if cmd == 'classify': opts = ChimeraDetectionOptions( min_seq_len=self.args.min_seq_len, min_score=self.args.min_score, min_dist_from_end=self.args.min_dist_from_end, max_adjacent_hit_dist=self.args.max_adjacent_hit_dist, primer_search_window=self.args.primer_search_window) obj = Classifier(reads_fn=self.args.readsFN, out_dir=self.args.outDir, out_reads_fn=self.args.outReadsFN, primer_fn=self.args.primerFN, primer_report_fn=self.args.primerReportFN, summary_fn=self.args.summary_fn, cpus=self.args.cpus, change_read_id=True, opts=opts, out_flnc_fn=self.args.flnc_fa, out_nfl_fn=self.args.nfl_fa, ignore_polyA=self.args.ignore_polyA) obj.run() elif cmd == 'cluster': ice_opts = IceOptions(cDNA_size=self.args.cDNA_size, quiver=self.args.quiver) sge_opts = SgeOptions(unique_id=self.args.unique_id, use_sge=self.args.use_sge, max_sge_jobs=self.args.max_sge_jobs, blasr_nproc=self.args.blasr_nproc, quiver_nproc=self.args.quiver_nproc) obj = Cluster(root_dir=self.args.root_dir, flnc_fa=self.args.flnc_fa, nfl_fa=self.args.nfl_fa, bas_fofn=self.args.bas_fofn, ccs_fofn=self.args.ccs_fofn, out_fa=self.args.consensusFa, sge_opts=sge_opts, ice_opts=ice_opts, hq_isoforms_fa=self.args.hq_isoforms_fa, hq_isoforms_fq=self.args.hq_isoforms_fq, lq_isoforms_fa=self.args.lq_isoforms_fa, lq_isoforms_fq=self.args.lq_isoforms_fq, report_fn=self.args.report_fn, summary_fn=self.args.summary_fn) obj.run() elif cmd == 'subset': rules = SubsetRules(FL=self.args.FL, nonChimeric=self.args.nonChimeric) obj = ReadsSubsetExtractor( inFN=self.args.readsFN, outFN=self.args.outFN, rules=rules, ignore_polyA=self.args.ignore_polyA, printReadLengthOnly=self.args.printReadLengthOnly) obj.run() else: raise PBTranscriptException( cmd, "Unknown command passed to pbtranscript.py:" + self.args.subName) except Exception as err: logging.error(str(err)) return 1 return 0