Example #1
0
def main():
    def check_int_range(value, min_val, max_val, require_odd=False):
        ival = int(value)
        if ival < min_val or ival > max_val:
            raise argparse.ArgumentTypeError("value should be in the "
                                             "range [{0}, {1}]".format(
                                                 min_val, max_val))
        if require_odd and ival % 2 == 0:
            raise argparse.ArgumentTypeError("should be an odd number")
        return ival

    parser = argparse.ArgumentParser \
        (description="Assembly of long reads with repeat graphs",
         formatter_class=argparse.RawDescriptionHelpFormatter,
         usage=_usage(), epilog=_epilog())

    read_group = parser.add_mutually_exclusive_group(required=True)
    read_group.add_argument("--pacbio-raw",
                            dest="pacbio_raw",
                            default=None,
                            metavar="path",
                            nargs="+",
                            help="PacBio regular CLR reads (<20%% error)")
    read_group.add_argument(
        "--pacbio-corr",
        dest="pacbio_corrected",
        default=None,
        metavar="path",
        nargs="+",
        help="PacBio reads that were corrected with other methods (<3%% error)"
    )
    read_group.add_argument("--pacbio-hifi",
                            dest="pacbio_hifi",
                            default=None,
                            metavar="path",
                            nargs="+",
                            help="PacBio HiFi reads (<1%% error)")
    read_group.add_argument("--nano-raw",
                            dest="nano_raw",
                            nargs="+",
                            default=None,
                            metavar="path",
                            help="ONT regular reads, pre-Guppy5 (<20%% error)")
    read_group.add_argument(
        "--nano-corr",
        dest="nano_corrected",
        nargs="+",
        default=None,
        metavar="path",
        help="ONT reads that were corrected with other methods (<3%% error)")
    read_group.add_argument(
        "--nano-hq",
        dest="nano_hq",
        nargs="+",
        default=None,
        metavar="path",
        help="ONT high-quality reads: Guppy5+ SUP or Q20 (<5%% error)")
    read_group.add_argument("--subassemblies",
                            dest="subassemblies",
                            nargs="+",
                            default=None,
                            metavar="path",
                            help="[deprecated] high-quality contigs input")
    parser.add_argument("-g",
                        "--genome-size",
                        dest="genome_size",
                        metavar="size",
                        required=False,
                        default=None,
                        help="estimated genome size (for example, 5m or 2.6g)")
    parser.add_argument("-o",
                        "--out-dir",
                        dest="out_dir",
                        default=None,
                        required=True,
                        metavar="path",
                        help="Output directory")
    parser.add_argument("-t",
                        "--threads",
                        dest="threads",
                        type=lambda v: check_int_range(v, 1, 128),
                        default=1,
                        metavar="int",
                        help="number of parallel threads [1]")
    parser.add_argument("-i",
                        "--iterations",
                        dest="num_iters",
                        type=lambda v: check_int_range(v, 0, 10),
                        default=1,
                        help="number of polishing iterations [1]",
                        metavar="int")
    parser.add_argument("-m",
                        "--min-overlap",
                        dest="min_overlap",
                        metavar="int",
                        type=lambda v: check_int_range(v, 1000, 10000),
                        default=None,
                        help="minimum overlap between reads [auto]")
    parser.add_argument("--asm-coverage",
                        dest="asm_coverage",
                        metavar="int",
                        default=None,
                        help="reduced coverage for initial "
                        "disjointig assembly [not set]",
                        type=int)
    parser.add_argument("--hifi-error",
                        dest="hifi_error",
                        metavar="float",
                        default=None,
                        help="[deprecated] same as --read-error",
                        type=float)
    parser.add_argument(
        "--read-error",
        dest="read_error",
        metavar="float",
        default=None,
        help=
        "adjust parameters for given read error rate (as fraction e.g. 0.03)",
        type=float)
    parser.add_argument(
        "--extra-params",
        dest="extra_params",
        metavar="extra_params",
        required=False,
        default=None,
        help="extra configuration parameters list (comma-separated)")
    parser.add_argument("--plasmids",
                        action="store_true",
                        dest="plasmids",
                        default=False,
                        help="unused (retained for backward compatibility)")
    parser.add_argument("--meta",
                        action="store_true",
                        dest="meta",
                        default=False,
                        help="metagenome / uneven coverage mode")
    parser.add_argument("--keep-haplotypes",
                        action="store_true",
                        dest="keep_haplotypes",
                        default=False,
                        help="do not collapse alternative haplotypes")
    parser.add_argument(
        "--no-alt-contigs",
        action="store_true",
        dest="no_alt_contigs",
        default=False,
        help="do not output contigs representing alternative haplotypes")
    parser.add_argument(
        "--scaffold",
        action="store_true",
        dest="scaffold",
        default=False,
        help="enable scaffolding using graph [disabled by default]")
    parser.add_argument(
        "--trestle",
        action="store_true",
        dest="trestle",
        default=False,
        help="[deprecated] enable Trestle [disabled by default]")
    parser.add_argument("--polish-target",
                        dest="polish_target",
                        metavar="path",
                        required=False,
                        help="run polisher on the target sequence")
    parser.add_argument("--resume",
                        action="store_true",
                        dest="resume",
                        default=False,
                        help="resume from the last completed stage")
    parser.add_argument("--resume-from",
                        dest="resume_from",
                        metavar="stage_name",
                        default=None,
                        help="resume from a custom stage")
    parser.add_argument("--stop-after",
                        dest="stop_after",
                        metavar="stage_name",
                        default=None,
                        help="stop after the specified stage completed")
    #parser.add_argument("--kmer-size", dest="kmer_size",
    #                    type=lambda v: check_int_range(v, 11, 31, require_odd=True),
    #                    default=None, help="kmer size (default: auto)")
    parser.add_argument("--debug",
                        action="store_true",
                        dest="debug",
                        default=False,
                        help="enable debug output")
    parser.add_argument("-v",
                        "--version",
                        action="version",
                        version=_version())
    args = parser.parse_args()

    if args.asm_coverage and (args.genome_size is None):
        parser.error(
            "--asm-coverage option requires genome size estimate (--genome-size)"
        )

    if args.asm_coverage and args.meta:
        parser.error("--asm-coverage is incompatible with --meta")

    if args.hifi_error and not args.read_error:
        args.read_error = args.hifi_error
    if args.read_error and (args.pacbio_raw or args.nano_raw):
        parser.error("--read-error can only be used with corr/hq/hifi modes")
    if args.read_error and args.read_error > 1:
        parser.error(
            "--read-error expressed as a decimal fraction, e.g. 0.01 or 0.03")

    if args.read_error:
        hifi_str = "assemble_ovlp_divergence={0},repeat_graph_ovlp_divergence={0}".format(
            args.read_error)
        if args.extra_params:
            args.extra_params += "," + hifi_str
        else:
            args.extra_params = hifi_str

    if args.no_alt_contigs:
        alt_params = "remove_alt_edges=1"
        if args.extra_params:
            args.extra_params += "," + alt_params
        else:
            args.extra_params = "remove_alt_edges=1"

    if args.pacbio_raw:
        args.reads = args.pacbio_raw
        args.platform = "pacbio"
        args.read_type = "raw"
    if args.pacbio_corrected:
        args.reads = args.pacbio_corrected
        args.platform = "pacbio"
        args.read_type = "corrected"
    if args.pacbio_hifi:
        args.reads = args.pacbio_hifi
        args.platform = "pacbio"
        args.read_type = "hifi"
    if args.nano_raw:
        args.reads = args.nano_raw
        args.platform = "nano"
        args.read_type = "raw"
    if args.nano_corrected:
        args.reads = args.nano_corrected
        args.platform = "nano"
        args.read_type = "corrected"
    if args.nano_hq:
        args.reads = args.nano_hq
        args.platform = "nano"
        args.read_type = "nano_hq"
    if args.subassemblies:
        args.reads = args.subassemblies
        args.platform = "pacbio"  #arbitrary
        args.read_type = "subasm"

    if not os.path.isdir(args.out_dir):
        os.mkdir(args.out_dir)
    args.out_dir = os.path.abspath(args.out_dir)

    args.reads = [os.path.abspath(r) for r in args.reads]

    args.log_file = os.path.join(args.out_dir, "flye.log")
    _enable_logging(args.log_file, args.debug, overwrite=False)

    args.asm_config = os.path.join(cfg.vals["pkg_root"],
                                   cfg.vals["bin_cfg"][args.read_type])

    if args.plasmids:
        logger.warning(
            "--plasmids mode is no longer available. Command line option will be removed in the future versions"
        )
    if args.trestle:
        logger.warning(
            "--trestle mode is being deprecated. It will be removed in the future versions."
        )
    if args.subassemblies:
        logger.warning(
            "--subassemblies mode is being deprecated. It will be removed in the future versions."
        )

    try:
        aln.check_binaries()
        pol.check_binaries()
        asm.check_binaries()
        repeat.check_binaries()

        if not args.polish_target:
            _run(args)
        else:
            _run_polisher_only(args)

    except (AlignmentException, pol.PolishException, asm.AssembleException,
            repeat.RepeatException, ResumeException, fp.FastaError,
            ConfigException) as e:
        logger.error(e)
        logger.error("Pipeline aborted")
        return 1

    return 0
Example #2
0
def main():
    def check_int_range(value, min_val, max_val, require_odd=False):
        ival = int(value)
        if ival < min_val or ival > max_val:
            raise argparse.ArgumentTypeError("value should be in the "
                                             "range [{0}, {1}]".format(
                                                 min_val, max_val))
        if require_odd and ival % 2 == 0:
            raise argparse.ArgumentTypeError("should be an odd number")
        return ival

    parser = argparse.ArgumentParser \
        (description="Assembly of long reads with repeat graphs",
         formatter_class=argparse.RawDescriptionHelpFormatter,
         usage=_usage(), epilog=_epilog())

    read_group = parser.add_mutually_exclusive_group(required=True)
    read_group.add_argument("--pacbio-raw",
                            dest="pacbio_raw",
                            default=None,
                            metavar="path",
                            nargs="+",
                            help="PacBio raw reads")
    read_group.add_argument("--pacbio-corr",
                            dest="pacbio_corrected",
                            default=None,
                            metavar="path",
                            nargs="+",
                            help="PacBio corrected reads")
    read_group.add_argument("--pacbio-hifi",
                            dest="pacbio_hifi",
                            default=None,
                            metavar="path",
                            nargs="+",
                            help="PacBio HiFi reads")
    read_group.add_argument("--nano-raw",
                            dest="nano_raw",
                            nargs="+",
                            default=None,
                            metavar="path",
                            help="ONT raw reads")
    read_group.add_argument("--nano-corr",
                            dest="nano_corrected",
                            nargs="+",
                            default=None,
                            metavar="path",
                            help="ONT corrected reads")
    read_group.add_argument("--subassemblies",
                            dest="subassemblies",
                            nargs="+",
                            default=None,
                            metavar="path",
                            help="high-quality contigs input")
    parser.add_argument("-g",
                        "--genome-size",
                        dest="genome_size",
                        metavar="size",
                        required=False,
                        default=None,
                        help="estimated genome size (for example, 5m or 2.6g)")
    parser.add_argument("-o",
                        "--out-dir",
                        dest="out_dir",
                        default=None,
                        required=True,
                        metavar="path",
                        help="Output directory")
    parser.add_argument("-t",
                        "--threads",
                        dest="threads",
                        type=lambda v: check_int_range(v, 1, 128),
                        default=1,
                        metavar="int",
                        help="number of parallel threads [1]")
    parser.add_argument("-i",
                        "--iterations",
                        dest="num_iters",
                        type=lambda v: check_int_range(v, 0, 10),
                        default=1,
                        help="number of polishing iterations [1]",
                        metavar="int")
    parser.add_argument("-m",
                        "--min-overlap",
                        dest="min_overlap",
                        metavar="int",
                        type=lambda v: check_int_range(v, 1000, 10000),
                        default=None,
                        help="minimum overlap between reads [auto]")
    parser.add_argument("--asm-coverage",
                        dest="asm_coverage",
                        metavar="int",
                        default=None,
                        help="reduced coverage for initial "
                        "disjointig assembly [not set]",
                        type=int)
    parser.add_argument("--plasmids",
                        action="store_true",
                        dest="plasmids",
                        default=False,
                        help="rescue short unassembled plasmids")
    parser.add_argument("--meta",
                        action="store_true",
                        dest="meta",
                        default=False,
                        help="metagenome / uneven coverage mode")
    parser.add_argument("--keep-haplotypes",
                        action="store_true",
                        dest="keep_haplotypes",
                        default=False,
                        help="do not collapse alternative haplotypes")
    parser.add_argument("--trestle",
                        action="store_true",
                        dest="trestle",
                        default=False,
                        help="enable Trestle [disabled]")
    parser.add_argument("--polish-target",
                        dest="polish_target",
                        metavar="path",
                        required=False,
                        help="run polisher on the target sequence")
    parser.add_argument("--resume",
                        action="store_true",
                        dest="resume",
                        default=False,
                        help="resume from the last completed stage")
    parser.add_argument("--resume-from",
                        dest="resume_from",
                        metavar="stage_name",
                        default=None,
                        help="resume from a custom stage")
    parser.add_argument("--stop-after",
                        dest="stop_after",
                        metavar="stage_name",
                        default=None,
                        help="stop after the specified stage completed")
    #parser.add_argument("--kmer-size", dest="kmer_size",
    #                    type=lambda v: check_int_range(v, 11, 31, require_odd=True),
    #                    default=None, help="kmer size (default: auto)")
    parser.add_argument("--debug",
                        action="store_true",
                        dest="debug",
                        default=False,
                        help="enable debug output")
    parser.add_argument("-v",
                        "--version",
                        action="version",
                        version=_version())
    args = parser.parse_args()

    if args.asm_coverage and (args.genome_size is None):
        parser.error(
            "--asm-coverage option requires genome size estimate (--genome-size)"
        )

    if args.asm_coverage and args.meta:
        parser.error("--asm-coverage is incompatible with --meta")

    #if not args.genome_size and not args.polish_target:
    #    parser.error("Genome size argument (-g/--genome-size) "
    #                 "is required for assembly")

    if args.pacbio_raw:
        args.reads = args.pacbio_raw
        args.platform = "pacbio"
        args.read_type = "raw"
    if args.pacbio_corrected:
        args.reads = args.pacbio_corrected
        args.platform = "pacbio"
        args.read_type = "corrected"
    if args.pacbio_hifi:
        args.reads = args.pacbio_hifi
        args.platform = "pacbio"
        args.read_type = "hifi"
    if args.nano_raw:
        args.reads = args.nano_raw
        args.platform = "nano"
        args.read_type = "raw"
    if args.nano_corrected:
        args.reads = args.nano_corrected
        args.platform = "nano"
        args.read_type = "corrected"
    if args.subassemblies:
        args.reads = args.subassemblies
        args.platform = "pacbio"  #arbitrary
        args.read_type = "subasm"

    if not os.path.isdir(args.out_dir):
        os.mkdir(args.out_dir)
    args.out_dir = os.path.abspath(args.out_dir)

    args.log_file = os.path.join(args.out_dir, "flye.log")
    _enable_logging(args.log_file, args.debug, overwrite=False)

    args.asm_config = os.path.join(cfg.vals["pkg_root"],
                                   cfg.vals["bin_cfg"][args.read_type])

    try:
        aln.check_binaries()
        pol.check_binaries()
        asm.check_binaries()
        repeat.check_binaries()

        if not args.polish_target:
            _run(args)
        else:
            _run_polisher_only(args)

    except (AlignmentException, pol.PolishException, asm.AssembleException,
            repeat.RepeatException, ResumeException, fp.FastaError) as e:
        logger.error(e)
        logger.error("Pipeline aborted")
        return 1

    return 0
Example #3
0
File: main.py Project: pythseq/Flye
def main():
    def check_int_range(value, min_val, max_val, require_odd=False):
        ival = int(value)
        if ival < min_val or ival > max_val:
            raise argparse.ArgumentTypeError("value should be in "
                                             "range [{0}, {1}]".format(
                                                 min_val, max_val))
        if require_odd and ival % 2 == 0:
            raise argparse.ArgumentTypeError("should be an odd number")
        return ival

    parser = argparse.ArgumentParser \
        (description="Assembly of long and error-prone reads",
         formatter_class=argparse.RawDescriptionHelpFormatter,
         usage=_usage(), epilog=_epilog())

    read_group = parser.add_mutually_exclusive_group(required=True)
    read_group.add_argument("--pacbio-raw",
                            dest="pacbio_raw",
                            default=None,
                            metavar="path",
                            nargs="+",
                            help="PacBio raw reads")
    read_group.add_argument("--pacbio-corr",
                            dest="pacbio_corrected",
                            default=None,
                            metavar="path",
                            nargs="+",
                            help="PacBio corrected reads")
    read_group.add_argument("--nano-raw",
                            dest="nano_raw",
                            nargs="+",
                            default=None,
                            metavar="path",
                            help="ONT raw reads")
    read_group.add_argument("--nano-corr",
                            dest="nano_corrected",
                            nargs="+",
                            default=None,
                            metavar="path",
                            help="ONT corrected reads")
    read_group.add_argument("--subassemblies",
                            dest="subassemblies",
                            nargs="+",
                            default=None,
                            metavar="path",
                            help="high-quality contigs input")

    parser.add_argument("-g",
                        "--genome-size",
                        dest="genome_size",
                        metavar="size",
                        required=True,
                        help="estimated genome size (for example, 5m or 2.6g)")
    parser.add_argument("-o",
                        "--out-dir",
                        dest="out_dir",
                        default=None,
                        required=True,
                        metavar="path",
                        help="Output directory")

    parser.add_argument("-t",
                        "--threads",
                        dest="threads",
                        type=lambda v: check_int_range(v, 1, 128),
                        default=1,
                        metavar="int",
                        help="number of parallel threads [1]")
    parser.add_argument("-i",
                        "--iterations",
                        dest="num_iters",
                        type=lambda v: check_int_range(v, 0, 10),
                        default=1,
                        help="number of polishing iterations [1]",
                        metavar="int")
    parser.add_argument("-m",
                        "--min-overlap",
                        dest="min_overlap",
                        metavar="int",
                        type=lambda v: check_int_range(v, 1000, 10000),
                        default=None,
                        help="minimum overlap between reads [auto]")
    parser.add_argument("--asm-coverage",
                        dest="asm_coverage",
                        metavar="int",
                        default=None,
                        help="reduced coverage for initial "
                        "contig assembly [not set]",
                        type=int)

    parser.add_argument("--resume",
                        action="store_true",
                        dest="resume",
                        default=False,
                        help="resume from the last completed stage")
    parser.add_argument("--resume-from",
                        dest="resume_from",
                        metavar="stage_name",
                        default=None,
                        help="resume from a custom stage")
    #parser.add_argument("--kmer-size", dest="kmer_size",
    #                    type=lambda v: check_int_range(v, 11, 31, require_odd=True),
    #                    default=None, help="kmer size (default: auto)")
    parser.add_argument("--debug",
                        action="store_true",
                        dest="debug",
                        default=False,
                        help="enable debug output")
    parser.add_argument("-v",
                        "--version",
                        action="version",
                        version=_version())
    args = parser.parse_args()

    if args.pacbio_raw:
        args.reads = args.pacbio_raw
        args.platform = "pacbio"
        args.read_type = "raw"
    if args.pacbio_corrected:
        args.reads = args.pacbio_corrected
        args.platform = "pacbio"
        args.read_type = "corrected"
    if args.nano_raw:
        args.reads = args.nano_raw
        args.platform = "nano"
        args.read_type = "raw"
    if args.nano_corrected:
        args.reads = args.nano_corrected
        args.platform = "nano"
        args.read_type = "corrected"
    if args.subassemblies:
        args.reads = args.subassemblies
        args.platform = "pacbio"  #arbitrary
        args.read_type = "subasm"

    if not os.path.isdir(args.out_dir):
        os.mkdir(args.out_dir)
    args.out_dir = os.path.abspath(args.out_dir)

    args.log_file = os.path.join(args.out_dir, "flye.log")
    _enable_logging(args.log_file, args.debug, overwrite=False)

    _set_kmer_size(args)
    args.asm_config = os.path.join(cfg.vals["pkg_root"],
                                   cfg.vals["bin_cfg"][args.read_type])

    try:
        aln.check_binaries()
        pol.check_binaries()
        asm.check_binaries()
        repeat.check_binaries()
        _run(args)
    except (aln.AlignmentException, pol.PolishException, asm.AssembleException,
            repeat.RepeatException, ResumeException) as e:
        logger.error(e)
        return 1

    return 0