def main(): def check_int_range(value, min_val, max_val, require_odd=False): ival = int(value) if ival < min_val or ival > max_val: raise argparse.ArgumentTypeError("value should be in the " "range [{0}, {1}]".format( min_val, max_val)) if require_odd and ival % 2 == 0: raise argparse.ArgumentTypeError("should be an odd number") return ival parser = argparse.ArgumentParser \ (description="Assembly of long reads with repeat graphs", formatter_class=argparse.RawDescriptionHelpFormatter, usage=_usage(), epilog=_epilog()) read_group = parser.add_mutually_exclusive_group(required=True) read_group.add_argument("--pacbio-raw", dest="pacbio_raw", default=None, metavar="path", nargs="+", help="PacBio regular CLR reads (<20%% error)") read_group.add_argument( "--pacbio-corr", dest="pacbio_corrected", default=None, metavar="path", nargs="+", help="PacBio reads that were corrected with other methods (<3%% error)" ) read_group.add_argument("--pacbio-hifi", dest="pacbio_hifi", default=None, metavar="path", nargs="+", help="PacBio HiFi reads (<1%% error)") read_group.add_argument("--nano-raw", dest="nano_raw", nargs="+", default=None, metavar="path", help="ONT regular reads, pre-Guppy5 (<20%% error)") read_group.add_argument( "--nano-corr", dest="nano_corrected", nargs="+", default=None, metavar="path", help="ONT reads that were corrected with other methods (<3%% error)") read_group.add_argument( "--nano-hq", dest="nano_hq", nargs="+", default=None, metavar="path", help="ONT high-quality reads: Guppy5+ SUP or Q20 (<5%% error)") read_group.add_argument("--subassemblies", dest="subassemblies", nargs="+", default=None, metavar="path", help="[deprecated] high-quality contigs input") parser.add_argument("-g", "--genome-size", dest="genome_size", metavar="size", required=False, default=None, help="estimated genome size (for example, 5m or 2.6g)") parser.add_argument("-o", "--out-dir", dest="out_dir", default=None, required=True, metavar="path", help="Output directory") parser.add_argument("-t", "--threads", dest="threads", type=lambda v: check_int_range(v, 1, 128), default=1, metavar="int", help="number of parallel threads [1]") parser.add_argument("-i", "--iterations", dest="num_iters", type=lambda v: check_int_range(v, 0, 10), default=1, help="number of polishing iterations [1]", metavar="int") parser.add_argument("-m", "--min-overlap", dest="min_overlap", metavar="int", type=lambda v: check_int_range(v, 1000, 10000), default=None, help="minimum overlap between reads [auto]") parser.add_argument("--asm-coverage", dest="asm_coverage", metavar="int", default=None, help="reduced coverage for initial " "disjointig assembly [not set]", type=int) parser.add_argument("--hifi-error", dest="hifi_error", metavar="float", default=None, help="[deprecated] same as --read-error", type=float) parser.add_argument( "--read-error", dest="read_error", metavar="float", default=None, help= "adjust parameters for given read error rate (as fraction e.g. 0.03)", type=float) parser.add_argument( "--extra-params", dest="extra_params", metavar="extra_params", required=False, default=None, help="extra configuration parameters list (comma-separated)") parser.add_argument("--plasmids", action="store_true", dest="plasmids", default=False, help="unused (retained for backward compatibility)") parser.add_argument("--meta", action="store_true", dest="meta", default=False, help="metagenome / uneven coverage mode") parser.add_argument("--keep-haplotypes", action="store_true", dest="keep_haplotypes", default=False, help="do not collapse alternative haplotypes") parser.add_argument( "--no-alt-contigs", action="store_true", dest="no_alt_contigs", default=False, help="do not output contigs representing alternative haplotypes") parser.add_argument( "--scaffold", action="store_true", dest="scaffold", default=False, help="enable scaffolding using graph [disabled by default]") parser.add_argument( "--trestle", action="store_true", dest="trestle", default=False, help="[deprecated] enable Trestle [disabled by default]") parser.add_argument("--polish-target", dest="polish_target", metavar="path", required=False, help="run polisher on the target sequence") parser.add_argument("--resume", action="store_true", dest="resume", default=False, help="resume from the last completed stage") parser.add_argument("--resume-from", dest="resume_from", metavar="stage_name", default=None, help="resume from a custom stage") parser.add_argument("--stop-after", dest="stop_after", metavar="stage_name", default=None, help="stop after the specified stage completed") #parser.add_argument("--kmer-size", dest="kmer_size", # type=lambda v: check_int_range(v, 11, 31, require_odd=True), # default=None, help="kmer size (default: auto)") parser.add_argument("--debug", action="store_true", dest="debug", default=False, help="enable debug output") parser.add_argument("-v", "--version", action="version", version=_version()) args = parser.parse_args() if args.asm_coverage and (args.genome_size is None): parser.error( "--asm-coverage option requires genome size estimate (--genome-size)" ) if args.asm_coverage and args.meta: parser.error("--asm-coverage is incompatible with --meta") if args.hifi_error and not args.read_error: args.read_error = args.hifi_error if args.read_error and (args.pacbio_raw or args.nano_raw): parser.error("--read-error can only be used with corr/hq/hifi modes") if args.read_error and args.read_error > 1: parser.error( "--read-error expressed as a decimal fraction, e.g. 0.01 or 0.03") if args.read_error: hifi_str = "assemble_ovlp_divergence={0},repeat_graph_ovlp_divergence={0}".format( args.read_error) if args.extra_params: args.extra_params += "," + hifi_str else: args.extra_params = hifi_str if args.no_alt_contigs: alt_params = "remove_alt_edges=1" if args.extra_params: args.extra_params += "," + alt_params else: args.extra_params = "remove_alt_edges=1" if args.pacbio_raw: args.reads = args.pacbio_raw args.platform = "pacbio" args.read_type = "raw" if args.pacbio_corrected: args.reads = args.pacbio_corrected args.platform = "pacbio" args.read_type = "corrected" if args.pacbio_hifi: args.reads = args.pacbio_hifi args.platform = "pacbio" args.read_type = "hifi" if args.nano_raw: args.reads = args.nano_raw args.platform = "nano" args.read_type = "raw" if args.nano_corrected: args.reads = args.nano_corrected args.platform = "nano" args.read_type = "corrected" if args.nano_hq: args.reads = args.nano_hq args.platform = "nano" args.read_type = "nano_hq" if args.subassemblies: args.reads = args.subassemblies args.platform = "pacbio" #arbitrary args.read_type = "subasm" if not os.path.isdir(args.out_dir): os.mkdir(args.out_dir) args.out_dir = os.path.abspath(args.out_dir) args.reads = [os.path.abspath(r) for r in args.reads] args.log_file = os.path.join(args.out_dir, "flye.log") _enable_logging(args.log_file, args.debug, overwrite=False) args.asm_config = os.path.join(cfg.vals["pkg_root"], cfg.vals["bin_cfg"][args.read_type]) if args.plasmids: logger.warning( "--plasmids mode is no longer available. Command line option will be removed in the future versions" ) if args.trestle: logger.warning( "--trestle mode is being deprecated. It will be removed in the future versions." ) if args.subassemblies: logger.warning( "--subassemblies mode is being deprecated. It will be removed in the future versions." ) try: aln.check_binaries() pol.check_binaries() asm.check_binaries() repeat.check_binaries() if not args.polish_target: _run(args) else: _run_polisher_only(args) except (AlignmentException, pol.PolishException, asm.AssembleException, repeat.RepeatException, ResumeException, fp.FastaError, ConfigException) as e: logger.error(e) logger.error("Pipeline aborted") return 1 return 0
def main(): def check_int_range(value, min_val, max_val, require_odd=False): ival = int(value) if ival < min_val or ival > max_val: raise argparse.ArgumentTypeError("value should be in the " "range [{0}, {1}]".format( min_val, max_val)) if require_odd and ival % 2 == 0: raise argparse.ArgumentTypeError("should be an odd number") return ival parser = argparse.ArgumentParser \ (description="Assembly of long reads with repeat graphs", formatter_class=argparse.RawDescriptionHelpFormatter, usage=_usage(), epilog=_epilog()) read_group = parser.add_mutually_exclusive_group(required=True) read_group.add_argument("--pacbio-raw", dest="pacbio_raw", default=None, metavar="path", nargs="+", help="PacBio raw reads") read_group.add_argument("--pacbio-corr", dest="pacbio_corrected", default=None, metavar="path", nargs="+", help="PacBio corrected reads") read_group.add_argument("--pacbio-hifi", dest="pacbio_hifi", default=None, metavar="path", nargs="+", help="PacBio HiFi reads") read_group.add_argument("--nano-raw", dest="nano_raw", nargs="+", default=None, metavar="path", help="ONT raw reads") read_group.add_argument("--nano-corr", dest="nano_corrected", nargs="+", default=None, metavar="path", help="ONT corrected reads") read_group.add_argument("--subassemblies", dest="subassemblies", nargs="+", default=None, metavar="path", help="high-quality contigs input") parser.add_argument("-g", "--genome-size", dest="genome_size", metavar="size", required=False, default=None, help="estimated genome size (for example, 5m or 2.6g)") parser.add_argument("-o", "--out-dir", dest="out_dir", default=None, required=True, metavar="path", help="Output directory") parser.add_argument("-t", "--threads", dest="threads", type=lambda v: check_int_range(v, 1, 128), default=1, metavar="int", help="number of parallel threads [1]") parser.add_argument("-i", "--iterations", dest="num_iters", type=lambda v: check_int_range(v, 0, 10), default=1, help="number of polishing iterations [1]", metavar="int") parser.add_argument("-m", "--min-overlap", dest="min_overlap", metavar="int", type=lambda v: check_int_range(v, 1000, 10000), default=None, help="minimum overlap between reads [auto]") parser.add_argument("--asm-coverage", dest="asm_coverage", metavar="int", default=None, help="reduced coverage for initial " "disjointig assembly [not set]", type=int) parser.add_argument("--plasmids", action="store_true", dest="plasmids", default=False, help="rescue short unassembled plasmids") parser.add_argument("--meta", action="store_true", dest="meta", default=False, help="metagenome / uneven coverage mode") parser.add_argument("--keep-haplotypes", action="store_true", dest="keep_haplotypes", default=False, help="do not collapse alternative haplotypes") parser.add_argument("--trestle", action="store_true", dest="trestle", default=False, help="enable Trestle [disabled]") parser.add_argument("--polish-target", dest="polish_target", metavar="path", required=False, help="run polisher on the target sequence") parser.add_argument("--resume", action="store_true", dest="resume", default=False, help="resume from the last completed stage") parser.add_argument("--resume-from", dest="resume_from", metavar="stage_name", default=None, help="resume from a custom stage") parser.add_argument("--stop-after", dest="stop_after", metavar="stage_name", default=None, help="stop after the specified stage completed") #parser.add_argument("--kmer-size", dest="kmer_size", # type=lambda v: check_int_range(v, 11, 31, require_odd=True), # default=None, help="kmer size (default: auto)") parser.add_argument("--debug", action="store_true", dest="debug", default=False, help="enable debug output") parser.add_argument("-v", "--version", action="version", version=_version()) args = parser.parse_args() if args.asm_coverage and (args.genome_size is None): parser.error( "--asm-coverage option requires genome size estimate (--genome-size)" ) if args.asm_coverage and args.meta: parser.error("--asm-coverage is incompatible with --meta") #if not args.genome_size and not args.polish_target: # parser.error("Genome size argument (-g/--genome-size) " # "is required for assembly") if args.pacbio_raw: args.reads = args.pacbio_raw args.platform = "pacbio" args.read_type = "raw" if args.pacbio_corrected: args.reads = args.pacbio_corrected args.platform = "pacbio" args.read_type = "corrected" if args.pacbio_hifi: args.reads = args.pacbio_hifi args.platform = "pacbio" args.read_type = "hifi" if args.nano_raw: args.reads = args.nano_raw args.platform = "nano" args.read_type = "raw" if args.nano_corrected: args.reads = args.nano_corrected args.platform = "nano" args.read_type = "corrected" if args.subassemblies: args.reads = args.subassemblies args.platform = "pacbio" #arbitrary args.read_type = "subasm" if not os.path.isdir(args.out_dir): os.mkdir(args.out_dir) args.out_dir = os.path.abspath(args.out_dir) args.log_file = os.path.join(args.out_dir, "flye.log") _enable_logging(args.log_file, args.debug, overwrite=False) args.asm_config = os.path.join(cfg.vals["pkg_root"], cfg.vals["bin_cfg"][args.read_type]) try: aln.check_binaries() pol.check_binaries() asm.check_binaries() repeat.check_binaries() if not args.polish_target: _run(args) else: _run_polisher_only(args) except (AlignmentException, pol.PolishException, asm.AssembleException, repeat.RepeatException, ResumeException, fp.FastaError) as e: logger.error(e) logger.error("Pipeline aborted") return 1 return 0
def main(): def check_int_range(value, min_val, max_val, require_odd=False): ival = int(value) if ival < min_val or ival > max_val: raise argparse.ArgumentTypeError("value should be in " "range [{0}, {1}]".format( min_val, max_val)) if require_odd and ival % 2 == 0: raise argparse.ArgumentTypeError("should be an odd number") return ival parser = argparse.ArgumentParser \ (description="Assembly of long and error-prone reads", formatter_class=argparse.RawDescriptionHelpFormatter, usage=_usage(), epilog=_epilog()) read_group = parser.add_mutually_exclusive_group(required=True) read_group.add_argument("--pacbio-raw", dest="pacbio_raw", default=None, metavar="path", nargs="+", help="PacBio raw reads") read_group.add_argument("--pacbio-corr", dest="pacbio_corrected", default=None, metavar="path", nargs="+", help="PacBio corrected reads") read_group.add_argument("--nano-raw", dest="nano_raw", nargs="+", default=None, metavar="path", help="ONT raw reads") read_group.add_argument("--nano-corr", dest="nano_corrected", nargs="+", default=None, metavar="path", help="ONT corrected reads") read_group.add_argument("--subassemblies", dest="subassemblies", nargs="+", default=None, metavar="path", help="high-quality contigs input") parser.add_argument("-g", "--genome-size", dest="genome_size", metavar="size", required=True, help="estimated genome size (for example, 5m or 2.6g)") parser.add_argument("-o", "--out-dir", dest="out_dir", default=None, required=True, metavar="path", help="Output directory") parser.add_argument("-t", "--threads", dest="threads", type=lambda v: check_int_range(v, 1, 128), default=1, metavar="int", help="number of parallel threads [1]") parser.add_argument("-i", "--iterations", dest="num_iters", type=lambda v: check_int_range(v, 0, 10), default=1, help="number of polishing iterations [1]", metavar="int") parser.add_argument("-m", "--min-overlap", dest="min_overlap", metavar="int", type=lambda v: check_int_range(v, 1000, 10000), default=None, help="minimum overlap between reads [auto]") parser.add_argument("--asm-coverage", dest="asm_coverage", metavar="int", default=None, help="reduced coverage for initial " "contig assembly [not set]", type=int) parser.add_argument("--resume", action="store_true", dest="resume", default=False, help="resume from the last completed stage") parser.add_argument("--resume-from", dest="resume_from", metavar="stage_name", default=None, help="resume from a custom stage") #parser.add_argument("--kmer-size", dest="kmer_size", # type=lambda v: check_int_range(v, 11, 31, require_odd=True), # default=None, help="kmer size (default: auto)") parser.add_argument("--debug", action="store_true", dest="debug", default=False, help="enable debug output") parser.add_argument("-v", "--version", action="version", version=_version()) args = parser.parse_args() if args.pacbio_raw: args.reads = args.pacbio_raw args.platform = "pacbio" args.read_type = "raw" if args.pacbio_corrected: args.reads = args.pacbio_corrected args.platform = "pacbio" args.read_type = "corrected" if args.nano_raw: args.reads = args.nano_raw args.platform = "nano" args.read_type = "raw" if args.nano_corrected: args.reads = args.nano_corrected args.platform = "nano" args.read_type = "corrected" if args.subassemblies: args.reads = args.subassemblies args.platform = "pacbio" #arbitrary args.read_type = "subasm" if not os.path.isdir(args.out_dir): os.mkdir(args.out_dir) args.out_dir = os.path.abspath(args.out_dir) args.log_file = os.path.join(args.out_dir, "flye.log") _enable_logging(args.log_file, args.debug, overwrite=False) _set_kmer_size(args) args.asm_config = os.path.join(cfg.vals["pkg_root"], cfg.vals["bin_cfg"][args.read_type]) try: aln.check_binaries() pol.check_binaries() asm.check_binaries() repeat.check_binaries() _run(args) except (aln.AlignmentException, pol.PolishException, asm.AssembleException, repeat.RepeatException, ResumeException) as e: logger.error(e) return 1 return 0