def get_argument_parser(): ap = get_default_argparser_with_base_opts(get_version(), "Toolkit for cDNA analysis.", default_level="WARN") subparsers = ap.add_subparsers(dest="subCommand") arg_parser = subparsers.add_parser( 'classify', description="Classify reads based on whether they are " + "non-chimeric, full length and have their 5', " + "3' and poly A tail seen.") # Add arguments for subcommand classify add_classify_arguments(_wrap_parser(arg_parser)) arg_parser = subparsers.add_parser( 'cluster', description='Discover consensus isoforms based on ' + 'quality controlled non-chimeric, ' + 'full length reads to reference genome.') # Add arguments for subcommand cluster add_cluster_arguments(_wrap_parser(arg_parser)) arg_parser = subparsers.add_parser( 'subset', description='Subset annotated reads in FASTA format.') add_subset_arguments(_wrap_parser(arg_parser)) ap.add_argument("--profile", action="store_true", help="Print runtime profile at exit") return ap
def get_argument_parser(): ap = get_default_argparser_with_base_opts(get_version(), "Toolkit for cDNA analysis.", default_level="WARN") subparsers = ap.add_subparsers(dest="subCommand") arg_parser = subparsers.add_parser( 'classify', description="Classify reads based on whether they are " + "non-chimeric, full length and have their 5', " + "3' and poly A tail seen.") # Add arguments for subcommand classify add_classify_arguments(_wrap_parser(arg_parser)) arg_parser = subparsers.add_parser( 'cluster', description='Discover consensus isoforms based on ' + 'quality controlled non-chimeric, ' + 'full length reads to reference genome.') # Add arguments for subcommand cluster add_cluster_arguments(_wrap_parser(arg_parser)) arg_parser = subparsers.add_parser( 'subset', description='Subset annotated reads in FASTA format.') add_subset_arguments(_wrap_parser(arg_parser)) ap.add_argument( "--profile", action="store_true", help="Print runtime profile at exit") return ap
class Constants(object): """Constants used in pbtranscript.tasks.gather_gmap_sam""" TOOL_ID = "pbtranscript.tasks.gather_gmap_sam" VERSION = get_version() DRIVER = "python -m %s --resolved-tool-contract " % TOOL_ID PARSER_DESC = __doc__ CHUNK_KEY = "$chunk.sam_id"
def get_parser(): """Returns arg parser.""" parser = argparse.ArgumentParser(prog='tofu_wrap') helpstr = "Input full-length non-chimeric reads in FASTA or ContigSet format " + \ "(e.g., isoseq_flnc.fasta|contigset.xml)" parser.add_argument("flnc_fa", type=str, help=helpstr) helpstr = "Output collapsed filtered isoforms in FASTA/FASTQ format (e.g., tofu_out.fastq)" parser.add_argument("collapsed_filtered_fn", type=str, help=helpstr) parser = add_nfl_fa_argument(parser, positional=False, required=True) parser.add_argument("--nfl_reads_per_split", type=int, dest="nfl_reads_per_split", default=60000, help="Number of nFL reads per split file (default: 60000)") parser = add_fofn_arguments(parser, ccs_fofn=True, bas_fofn=True, fasta_fofn=True) # tofu output arguments parser = add_tofu_output_arguments(parser) parser = add_ice_arguments(parser) # Add Ice options, including --quiver parser = add_sge_arguments(parser, blasr_nproc=True, quiver_nproc=True, gcon_nproc=True) # Sge parser = add_ice_post_quiver_hq_lq_qv_arguments(parser) # IceQuiver HQ/LQ QV options. parser = add_separate_flnc_arguments(parser) # separate_flnc options parser = add_gmap_arguments(parser) # map to gmap reference options parser = add_post_mapping_to_genome_arguments(parser) # post mapping to genome options misc_group = parser.add_argument_group("Misc arguments") misc_group.add_argument("--mem_debug", default=False, action="store_true", help=argparse.SUPPRESS) misc_group.add_argument("--keep_tmp_files", default=False, action="store_true", help="False: delete tmp files; True: keep tmp files (default: False).") misc_group.add_argument("--version", action='version', version='%(prog)s ' + str(get_version())) return parser
def run(self): """Execute ice_partial.py all|split|i|merge.""" cmd = self.args.subCommand logging.info("Running {f} {cmd} v{v}.".format(f=op.basename(__file__), cmd=cmd, v=get_version())) cmd_str = "" try: args = self.args obj = None if cmd == "all": sge_opts = SgeOptions(unique_id=args.unique_id, use_sge=args.use_sge, max_sge_jobs=args.max_sge_jobs, blasr_nproc=args.blasr_nproc) obj = IceAllPartials( root_dir=args.root_dir, fasta_filenames=args.fasta_filenames.split(','), ref_fasta=args.ref_fasta, out_pickle=args.out_pickle, sge_opts=sge_opts, ccs_fofn=args.ccs_fofn, tmp_dir=args.tmp_dir) elif cmd == "one": # Only assign nfl reads in the given input_fasta file to isoforms obj = IcePartialOne(input_fasta=args.input_fasta, ref_fasta=args.ref_fasta, out_pickle=args.out_pickle, ccs_fofn=args.ccs_fofn, done_filename=args.done_filename, blasr_nproc=args.blasr_nproc, tmp_dir=args.tmp_dir) elif cmd == "split": obj = IcePartialSplit(root_dir=args.root_dir, nfl_fa=args.nfl_fa, N=args.N) elif cmd == "i": obj = IcePartialI(root_dir=args.root_dir, i=args.i, ccs_fofn=args.ccs_fofn, blasr_nproc=args.blasr_nproc, tmp_dir=args.tmp_dir) elif cmd == "merge": obj = IcePartialMerge(root_dir=args.root_dir, N=args.N) else: raise ValueError( "Unknown command passed to {f}: {cmd}.".format( f=op.basename(__file__), cmd=cmd)) cmd_str = obj.cmd_str() logging.info("Running CMD: {cmd_str}".format(cmd_str=cmd_str)) obj.run() except: logging.exception("Exiting {cmd_str} with return code 1.".format( cmd_str=cmd_str)) return 1 return 0
def get_base_contract_parser(Constants=BaseConstants, default_level="WARN"): p = get_pbparser(tool_id=Constants.TOOL_ID, version=get_version(), name=Constants.TOOL_ID, description=Constants.PARSER_DESC, driver_exe=Constants.DRIVER_EXE, nproc=SymbolTypes.MAX_NPROC, resource_types=(ResourceTypes.TMP_DIR, ), default_level=default_level) return p
def get_base_contract_parser(Constants=BaseConstants, default_level="WARN"): p = get_pbparser( tool_id=Constants.TOOL_ID, version=get_version(), name=Constants.TOOL_ID, description=Constants.PARSER_DESC, driver_exe=Constants.DRIVER_EXE, nproc=SymbolTypes.MAX_NPROC, resource_types=(ResourceTypes.TMP_DIR,), default_level=default_level) return p
def run(self): """Execute ice_partial.py all|one|split|i|merge.""" cmd = self.args.subCommand logging.info("Running {f} {cmd} v{v}.".format(f=op.basename(__file__), cmd=cmd, v=get_version())) cmd_str = "" try: args = self.args obj = None if cmd == "all": sge_opts = SgeOptions(unique_id=args.unique_id, use_sge=args.use_sge, max_sge_jobs=args.max_sge_jobs, blasr_nproc=args.blasr_nproc) obj = IceAllPartials(root_dir=args.root_dir, fasta_filenames=args.fasta_filenames.split(','), ref_fasta=args.ref_fasta, out_pickle=args.out_pickle, sge_opts=sge_opts, ccs_fofn=args.ccs_fofn, tmp_dir=args.tmp_dir) elif cmd == "one": # Only assign nfl reads in the given input_fasta file to isoforms obj = IcePartialOne(input_fasta=args.input_fasta, ref_fasta=args.ref_fasta, out_pickle=args.out_pickle, ccs_fofn=args.ccs_fofn, done_filename=args.done_filename, blasr_nproc=args.blasr_nproc, tmp_dir=args.tmp_dir) elif cmd == "split": obj = IcePartialSplit(root_dir=args.root_dir, nfl_fa=args.nfl_fa, N=args.N) elif cmd == "i": obj = IcePartialI(root_dir=args.root_dir, i=args.i, ccs_fofn=args.ccs_fofn, blasr_nproc=args.blasr_nproc, tmp_dir=args.tmp_dir) elif cmd == "merge": obj = IcePartialMerge(root_dir=args.root_dir, N=args.N) else: raise ValueError("Unknown command passed to {f}: {cmd}.". format(f=op.basename(__file__), cmd=cmd)) cmd_str = obj.cmd_str() logging.info("Running CMD: {cmd_str}".format(cmd_str=cmd_str)) obj.run() except: logging.exception("Exiting {cmd_str} with return code 1.". format(cmd_str=cmd_str)) return 1 return 0
def main(): """Main function, split a fasta into smaller chunks.""" import logging from pbtranscript.__init__ import get_version log = logging.getLogger(__name__) args = get_args() from pbtranscript.Utils import setup_log setup_log(alog=log, level=logging.DEBUG) log.info("Running {f} v{v}.".format(f=op.basename(__file__), v=get_version())) splitFasta(input_fasta=args.input_fasta, reads_per_split=args.reads_per_split, out_dir=args.out_dir, out_prefix=args.out_prefix)
def run(self): """Run""" logging.info("Running {f} v{v}.".format(f=op.basename(__file__), v=get_version())) args = self.args try: convert_fofn_to_fasta(fofn_filename=args.input_fofn, out_filename=args.fasta_fofn, fasta_out_dir=args.fasta_out_dir, force_overwrite=False) except: logging.exception("Failed to convert fofn {f} to fasta.".format( f=args.input_fofn)) return 1 return 0
def run(self): """Run""" logging.info("Running {f} v{v}.".format(f=op.basename(__file__), v=get_version())) args = self.args try: convert_fofn_to_fasta(fofn_filename=args.input_fofn, out_filename=args.fasta_fofn, fasta_out_dir=args.fasta_out_dir, force_overwrite=False) except: logging.exception("Failed to convert fofn {f} to fasta.". format(f=args.input_fofn)) return 1 return 0
def run(self): """Run""" logging.info("Running {f} v{v}.".format(f=op.basename(__file__), v=get_version())) cmd_str = "" try: args = self.args iceqm = IceQuiverMerge(root_dir=args.root_dir, N=args.N) cmd_str = iceqm.cmd_str() iceqm.run() except: logging.exception("Exiting {cmd_str} with return code 1.". format(cmd_str=cmd_str)) return 1 return 0
def set_parser(): """Set up and return argument parser.""" parser = ArgumentParser() parser.add_argument("input_fasta", help="Input fasta filename") parser.add_argument("output_prefix", help="Output filename prefix (ex: g_consensus)") parser.add_argument("consensus_id", help="Consensus sequence ID name (ex: consensus)") parser.add_argument("--nproc", default=8, type=int, help="Number of processes") parser.add_argument("--maxScore", default=-1000, type=int, help="blasr maxScore") parser.add_argument("--version", "-v", action='version', version='%(prog)s ' + get_version()) return parser
def run(self): """Execute ice_fa2fq.py.""" logging.info("Running {f} v{v}.".format(f=op.basename(__file__), v=get_version())) cmd_str = "" try: args = self.args in_fa, ccs_fofn, out_fq = args.in_fa, args.ccs_fofn, \ args.out_fq self.validate_inputs(in_fa=in_fa, ccs_fofn=ccs_fofn) cmd_str = self.cmd_str(in_fa=in_fa, ccs_fofn=ccs_fofn, out_fq=out_fq) ice_fa2fq(in_fa=in_fa, ccs_fofn=ccs_fofn, out_fq=out_fq) except: logging.exception( "Exiting {cmd} with return code 1.".format(cmd=cmd_str)) return 1 return 0
def _logConfigs(self): """Log configuration.""" with open(self.configFN, 'w', 0) as f: f.write('pbtranscript ' + get_version() + "\n") f.write(str(self.ice_opts) + "\n") f.write(str(self.sge_opts) + "\n")
def getVersion(self): """Return version string.""" return get_version()
def getVersion(self): return get_version()
def run(self): """Execute ice_quiver.py all|i|merge|postprocess.""" cmd = self.args.subCommand logging.info("Running {f} {cmd} v{v}.".format(f=op.basename(__file__), cmd=cmd, v=get_version())) cmd_str = "" try: args = self.args obj = None if cmd == "all": sge_opts = SgeOptions2(unique_id=args.unique_id, use_sge=args.use_sge, max_sge_jobs=args.max_sge_jobs, blasr_nproc=args.blasr_nproc, arrow_nproc=args.arrow_nproc, sge_env_name=args.sge_env_name, sge_queue=args.sge_queue, qsub_extra=args.qsub_extra) ipq_opts = IceArrowHQLQOptions2( hq_isoforms_fa=args.hq_isoforms_fa, hq_isoforms_fq=args.hq_isoforms_fq, lq_isoforms_fa=args.lq_isoforms_fa, lq_isoforms_fq=args.lq_isoforms_fq, qv_trim_5=args.qv_trim_5, qv_trim_3=args.qv_trim_3, hq_arrow_min_accuracy=args.hq_arrow_min_accuracy) obj = IceArrowAll2(root_dir=args.root_dir, subread_xml=args.subread_xml, sge_opts=sge_opts, ipq_opts=ipq_opts, tmp_dir=args.tmp_dir) # elif cmd == "i": # sge_opts = SgeOptions(unique_id=args.unique_id, # use_sge=args.use_sge, # max_sge_jobs=args.max_sge_jobs, # blasr_nproc=args.blasr_nproc, # arrow_nproc=args.arrow_nproc) # obj = IceQuiverI(root_dir=args.root_dir, i=args.i, N=args.N, # bas_fofn=args.bas_fofn, # fasta_fofn=None, # sge_opts=sge_opts, # tmp_dir=args.tmp_dir) #elif cmd == "merge": # obj = IceQuiverMerge(root_dir=args.root_dir, N=args.N) elif cmd == "postprocess": ipq_opts = IceArrowHQLQOptions2( hq_isoforms_fa=args.hq_isoforms_fa, hq_isoforms_fq=args.hq_isoforms_fq, lq_isoforms_fa=args.lq_isoforms_fa, lq_isoforms_fq=args.lq_isoforms_fq, qv_trim_5=args.qv_trim_5, qv_trim_3=args.qv_trim_3, hq_arrow_min_accuracy=args.hq_arrow_min_accuracy, hq_min_full_length_reads=args.hq_min_full_length_reads) obj = IceArrowPostProcess2( root_dir=args.root_dir, ipq_opts=ipq_opts, quit_if_not_done=args.quit_if_not_done, summary_fn=args.summary_fn, report_fn=args.report_fn) else: raise ValueError( "Unknown command passed to {f}: {cmd}.".format( f=op.basename(__file__), cmd=cmd)) cmd_str = obj.cmd_str() logging.info("Running CMD: {cmd_str}".format(cmd_str=cmd_str)) obj.run() except: logging.exception("Exiting {cmd_str} with return code 1.".format( cmd_str=cmd_str)) return 1 return 0
def run(self): """Execute ice_partial.py all|one|split|i|merge.""" cmd = self.args.subCommand logging.info("Running {f} {cmd} v{v}.".format(f=op.basename(__file__), cmd=cmd, v=get_version())) cmd_str = "" try: args = self.args obj = None if cmd in ('all', 'one'): # currently user NOT allowed to set full missed start/end # (we also set it to 30/10 bp which is more stringent than in IceIterative2, # which is hard set to 50/30) ice_opts = IceOptions2(ece_penalty=args.ece_penalty, ece_min_len=args.ece_min_len, max_missed_start=args.max_missed_start, max_missed_end=args.max_missed_end, full_missed_start=30, full_missed_end=10, min_match_len=50, aligner_choice=args.aligner_choice) if cmd == "all": sge_opts = SgeOptions2(unique_id=args.unique_id, use_sge=args.use_sge, max_sge_jobs=args.max_sge_jobs, sge_queue=args.sge_queue, sge_env_name=args.sge_env_name, qsub_extra=args.qsub_extra) obj = IceAllPartials2( root_dir=args.root_dir, fasta_filenames=args.fasta_filenames.split(','), fastq_filenames=args.fastq_filenames.split(',') if args.fastq_filenames is not None else None, ref_fasta=args.ref_fasta, out_pickle=args.out_pickle, ice_opts=ice_opts, sge_opts=sge_opts, cpus=args.cpus, tmp_dir=args.tmp_dir) elif cmd == "one": # Only assign nfl reads in the given input_fasta file to isoforms # "one" is always run locally so no need for SGE option obj = IcePartialOne2(input_fasta=args.input_fasta, input_fastq=args.input_fastq, ref_fasta=args.ref_fasta, out_pickle=args.out_pickle, done_filename=args.done_filename, ice_opts=ice_opts, cpus=args.cpus, tmp_dir=args.tmp_dir) elif cmd == "split": obj = IcePartialSplit2(root_dir=args.root_dir, nfl_fa=args.nfl_fa, nfl_fq=args.nfl_fq, N=args.N) # elif cmd == "i": # obj = IcePartialI(root_dir=args.root_dir, i=args.i, # ccs_fofn=args.ccs_fofn, # blasr_nproc=args.blasr_nproc, # tmp_dir=args.tmp_dir) elif cmd == "merge": obj = IcePartialMerge(root_dir=args.root_dir, N=args.N) else: raise ValueError( "Unknown command passed to {f}: {cmd}.".format( f=op.basename(__file__), cmd=cmd)) cmd_str = obj.cmd_str() logging.info("Running CMD: {cmd_str}".format(cmd_str=cmd_str)) obj.run() except: logging.exception("Exiting {cmd_str} with return code 1.".format( cmd_str=cmd_str)) return 1 return 0
def run(self): """Execute ice_quiver.py all|i|merge|postprocess.""" cmd = self.args.subCommand logging.info("Running {f} {cmd} v{v}.".format(f=op.basename(__file__), cmd=cmd, v=get_version())) cmd_str = "" try: args = self.args obj = None if cmd == "all": sge_opts = SgeOptions(unique_id=args.unique_id, use_sge=args.use_sge, max_sge_jobs=args.max_sge_jobs, blasr_nproc=args.blasr_nproc, quiver_nproc=args.quiver_nproc) ipq_opts = IceQuiverHQLQOptions( hq_isoforms_fa=args.hq_isoforms_fa, hq_isoforms_fq=args.hq_isoforms_fq, lq_isoforms_fa=args.lq_isoforms_fa, lq_isoforms_fq=args.lq_isoforms_fq, qv_trim_5=args.qv_trim_5, qv_trim_3=args.qv_trim_3, hq_quiver_min_accuracy=args.hq_quiver_min_accuracy) obj = IceQuiverAll(root_dir=args.root_dir, bas_fofn=args.bas_fofn, fasta_fofn=None, sge_opts=sge_opts, ipq_opts=ipq_opts, tmp_dir=args.tmp_dir) elif cmd == "i": sge_opts = SgeOptions(unique_id=args.unique_id, use_sge=args.use_sge, max_sge_jobs=args.max_sge_jobs, blasr_nproc=args.blasr_nproc, quiver_nproc=args.quiver_nproc) obj = IceQuiverI(root_dir=args.root_dir, i=args.i, N=args.N, bas_fofn=args.bas_fofn, fasta_fofn=None, sge_opts=sge_opts, tmp_dir=args.tmp_dir) elif cmd == "merge": obj = IceQuiverMerge(root_dir=args.root_dir, N=args.N) elif cmd == "postprocess": ipq_opts = IceQuiverHQLQOptions( hq_isoforms_fa=args.hq_isoforms_fa, hq_isoforms_fq=args.hq_isoforms_fq, lq_isoforms_fa=args.lq_isoforms_fa, lq_isoforms_fq=args.lq_isoforms_fq, qv_trim_5=args.qv_trim_5, qv_trim_3=args.qv_trim_3, hq_quiver_min_accuracy=args.hq_quiver_min_accuracy) obj = IceQuiverPostprocess(root_dir=args.root_dir, ipq_opts=ipq_opts, use_sge=args.use_sge, quit_if_not_done=args.quit_if_not_done, summary_fn=args.summary_fn, report_fn=args.report_fn) else: raise ValueError("Unknown command passed to {f}: {cmd}.". format(f=op.basename(__file__), cmd=cmd)) cmd_str = obj.cmd_str() logging.info("Running CMD: {cmd_str}".format(cmd_str=cmd_str)) obj.run() except: logging.exception("Exiting {cmd_str} with return code 1.". format(cmd_str=cmd_str)) return 1 return 0