예제 #1
0
def main():
    """main function
    """

    default_parser = default_argparser(CFG_DIR)
    parser = argparse.ArgumentParser(description=__doc__.format(
        PIPELINE_NAME=PIPELINE_NAME, PIPELINE_VERSION=get_pipeline_version()),
                                     parents=[default_parser])

    parser._optionals.title = "Arguments"
    # pipeline specific args
    parser.add_argument(
        '-1',
        "--fq1",
        nargs="+",
        help="FastQ file/s (gzip only)."
        " Multiple input files supported (auto-sorted)."
        " Note: each file (or pair) gets a unique read-group id."
        " Collides with --sample-cfg.")
    parser.add_argument(
        '-2',
        "--fq2",
        nargs="+",
        help="FastQ file/s (if paired) (gzip only). See also --fq1")
    parser.add_argument('-s',
                        "--sample",
                        help="Sample name. Collides with --sample-cfg.")

    args = parser.parse_args()

    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose)
    aux_logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose)

    if os.path.exists(args.outdir):
        logger.fatal("Output directory %s already exists", args.outdir)
        sys.exit(1)

    # samples is a dictionary with sample names as key (mostly just
    # one) and readunit keys as value. readunits is a dict with
    # readunits (think: fastq pairs with attributes) as value
    if args.sample_cfg:
        if any([args.fq1, args.fq2, args.sample]):
            logger.fatal(
                "Config file overrides fastq and sample input arguments."
                " Use one or the other")
            sys.exit(1)
        if not os.path.exists(args.sample_cfg):
            logger.fatal("Config file %s does not exist", args.sample_cfg)
            sys.exit(1)
        samples, readunits = get_samples_and_readunits_from_cfgfile(
            args.sample_cfg)
    else:
        if not all([args.fq1, args.sample]):
            logger.fatal("Need at least fq1 and sample without config file")
            sys.exit(1)

        readunits = get_readunits_from_args(args.fq1, args.fq2)
        # all readunits go into this one sample specified on the command-line
        samples = dict()
        samples[args.sample] = list(readunits.keys())

    # FIXME now exported to ref.cfg. how to auto check there?
    #if not os.path.exists(args.reffa):
    #    logger.fatal("Reference '%s' doesn't exist", args.reffa)
    #    sys.exit(1)
    #
    #for p in ['bwa', 'samtools']:
    #    if not ref_is_indexed(args.reffa, p):
    #        logger.fatal("Reference '%s' doesn't appear to be indexed"
    #                     " with %s", args.reffa, p)
    #        sys.exit(1)

    # turn arguments into cfg_dict that gets merged into pipeline config
    #
    cfg_dict = dict()
    cfg_dict['readunits'] = readunits
    cfg_dict['samples'] = samples
    cfg_dict['mark_dups'] = MARK_DUPS

    pipeline_handler = PipelineHandler(
        PIPELINE_NAME,
        PIPELINE_BASEDIR,
        args,
        cfg_dict,
        cluster_cfgfile=get_cluster_cfgfile(CFG_DIR))
    pipeline_handler.setup_env()
    pipeline_handler.submit(args.no_run)
예제 #2
0
def main():
    """main function
    """

    parser = argparse.ArgumentParser(
        description=__doc__.format(PIPELINE_NAME=PIPELINE_NAME, PIPELINE_VERSION=get_pipeline_version())
    )

    # generic args
    parser.add_argument("-o", "--outdir", required=True, help="Output directory (may not exist)")
    parser.add_argument("--name", help="Give this analysis run a name (used in email and report)")
    parser.add_argument("--no-mail", action="store_true", help="Don't send mail on completion")
    # site = get_site()
    default = get_default_queue("slave")
    parser.add_argument(
        "-w", "--slave-q", default=default, help="Queue to use for slave jobs (default: {})".format(default)
    )
    default = get_default_queue("master")
    parser.add_argument(
        "-m", "--master-q", default=default, help="Queue to use for master job (default: {})".format(default)
    )
    parser.add_argument("-n", "--no-run", action="store_true")
    parser.add_argument("-v", "--verbose", action="count", default=0, help="Increase verbosity")
    parser.add_argument("-q", "--quiet", action="count", default=0, help="Decrease verbosity")
    cfg_group = parser.add_argument_group("Configuration files (advanced)")
    cfg_group.add_argument(
        "--sample-cfg", help="Config-file (YAML) listing samples and readunits." " Collides with -1, -2 and -s"
    )
    for name, descr in [("references", "reference sequences"), ("params", "parameters"), ("modules", "modules")]:
        default = os.path.abspath(os.path.join(CFG_DIR, "{}.yaml".format(name)))
        cfg_group.add_argument(
            "--{}-cfg".format(name),
            default=default,
            help="Config-file (yaml) for {}. (default: {})".format(descr, default),
        )

    # pipeline specific args
    parser.add_argument(
        "--normal-fq1",
        nargs="+",
        help="Normal FastQ file/s (gzip only)."
        " Multiple input files supported (auto-sorted)."
        " Note: each file (or pair) gets a unique read-group id."
        " Collides with --sample-cfg.",
    )
    parser.add_argument("--normal-fq2", nargs="+", help="Normal FastQ file/s (if paired) (gzip only). See also --fq1")
    parser.add_argument(
        "--tumor-fq1",
        nargs="+",
        help="Tumor FastQ file/s (gzip only)."
        " Multiple input files supported (auto-sorted)."
        " Note: each file (or pair) gets a unique read-group id."
        " Collides with --sample-cfg.",
    )
    parser.add_argument("--tumor-fq2", nargs="+", help="Tumor FastQ file/s (if paired) (gzip only). See also --fq1")
    parser.add_argument("-t", "--seqtype", required=True, choices=["WGS", "WES", "targeted"], help="Sequencing type")
    parser.add_argument(
        "-l",
        "--intervals",
        help="Intervals file (e.g. bed file) listing regions of interest." " Required for WES and targeted sequencing.",
    )
    parser.add_argument("-D", "--dont-mark-dups", action="store_true", help="Don't mark duplicate reads")
    parser.add_argument(
        "--normal-bam",
        help="Advanced: Injects normal BAM (overwrites normal-fq options)."
        " WARNING: reference and postprocessing need to match pipeline requirements",
    )
    parser.add_argument(
        "--tumor-bam",
        help="Advanced: Injects tumor BAM (overwrites tumor-fq options)."
        " WARNING: reference and postprocessing need to match pipeline requirements",
    )

    args = parser.parse_args()

    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose)
    aux_logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose)

    if os.path.exists(args.outdir):
        logger.fatal("Output directory %s already exists", args.outdir)
        sys.exit(1)

    # samples is a dictionary with sample names as key (mostly just
    # one) and readunit keys as value. readunits is a dict with
    # readunits (think: fastq pairs with attributes) as value
    if args.sample_cfg:
        if any([args.normal_fq1, args.normal_fq2, args.tumor_fq1, args.tumor_fq2, args.normal_bam, args.tumor_bam]):
            logger.fatal("Config file overrides fastq and sample input arguments." " Use one or the other")
            sys.exit(1)
        if not os.path.exists(args.sample_cfg):
            logger.fatal("Config file %s does not exist", args.sample_cfg)
            sys.exit(1)
        samples, readunits = get_samples_and_readunits_from_cfgfile(args.sample_cfg)
    else:
        samples = dict()

        if args.normal_bam:
            normal_readunits = dict()
            samples["normal"] = []
            assert os.path.exists(args.normal_bam)
        else:
            if not all([args.normal_fq1, args.tumor_fq1]):
                logger.fatal("Need at least fq1 and sample without config file")
                sys.exit(1)
            normal_readunits = get_readunits_from_args(args.normal_fq1, args.normal_fq2)
            samples["normal"] = list(normal_readunits.keys())

        if args.tumor_bam:
            tumor_readunits = dict()
            samples["tumor"] = []
            assert os.path.exists(args.tumor_bam)
        else:
            tumor_readunits = get_readunits_from_args(args.tumor_fq1, args.tumor_fq2)
            samples["tumor"] = list(tumor_readunits.keys())

        readunits = dict(normal_readunits)
        readunits.update(tumor_readunits)

    assert sorted(samples) == sorted(["normal", "tumor"])

    # FIXME howt to
    # if not os.path.exists(reffa):
    #    logger.fatal("Reference '%s' doesn't exist", reffa)
    #    sys.exit(1)
    #
    # for p in ['bwa', 'samtools']:
    #    if not ref_is_indexed(reffa, p):
    #        logger.fatal("Reference '%s' doesn't appear to be indexed with %s", reffa, p)
    #        sys.exit(1)

    if args.seqtype in ["WES", "targeted"]:
        if not args.intervals:
            logger.fatal("Analysis of exome and targeted sequence runs requires a bed file")
            sys.exit(1)
        else:
            if not os.path.exists(args.intervals):
                logger.fatal("Intervals file %s does not exist", args.sample_cfg)
                sys.exit(1)
            logger.warning("Compatilibity between interval file and" " reference not checked")  # FIXME

    # turn arguments into user_data that gets merged into pipeline config
    #
    # generic data first
    user_data = dict()
    user_data["mail_on_completion"] = not args.no_mail
    user_data["readunits"] = readunits
    user_data["samples"] = samples
    if args.name:
        user_data["analysis_name"] = args.name

    user_data["seqtype"] = args.seqtype
    user_data["intervals"] = args.intervals
    user_data["mark_dups"] = not args.dont_mark_dups

    pipeline_handler = PipelineHandler(
        PIPELINE_NAME,
        PIPELINE_BASEDIR,
        args.outdir,
        user_data,
        master_q=args.master_q,
        slave_q=args.slave_q,
        params_cfgfile=args.params_cfg,
        modules_cfgfile=args.modules_cfg,
        refs_cfgfile=args.references_cfg,
        cluster_cfgfile=get_cluster_cfgfile(CFG_DIR),
    )

    pipeline_handler.setup_env()

    # inject existing BAM by symlinking (everything upstream is temporary anyway)
    for sample, bam in [("normal", args.normal_bam), ("tumor", args.tumor_bam)]:
        if bam:
            # target as defined in Snakefile!
            target = os.path.join(args.outdir, "out", sample, "{}.bwamem.lofreq.dedup.lacer.bam".format(sample))
            os.makedirs(os.path.dirname(target))
            os.symlink(os.path.abspath(bam), target)

    pipeline_handler.submit(args.no_run)
예제 #3
0
def main():
    """main function
    """

    parser = argparse.ArgumentParser(description=__doc__.format(
        PIPELINE_NAME=PIPELINE_NAME, PIPELINE_VERSION=get_pipeline_version()))

    # generic args
    parser.add_argument('-c', "--config",
                        help="Config file (YAML) listing samples and readunits."
                        " Collides with -1, -2 and -s")
    parser.add_argument('-o', "--outdir", required=True,
                        help="Output directory (must not exist)")
    parser.add_argument('--no-mail', action='store_true',
                        help="Don't send mail on completion")
    site = get_site()
    default = DEFAULT_SLAVE_Q.get(site, None)
    parser.add_argument('-w', '--slave-q', default=default,
                        help="Queue to use for slave jobs (default: {})".format(default))
    default = DEFAULT_MASTER_Q.get(site, None)
    parser.add_argument('-m', '--master-q', default=default,
                        help="Queue to use for master job (default: {})".format(default))
    parser.add_argument('-n', '--no-run', action='store_true')
    parser.add_argument('-v', '--verbose', action='count', default=0,
                        help="Increase verbosity")
    parser.add_argument('-q', '--quiet', action='count', default=0,
                        help="Decrease verbosity")

    # pipeline specific args
    parser.add_argument('-1', "--fq1", nargs="+",
                        help="FastQ file/s (gzip only)."
                        " Multiple input files supported (auto-sorted)."
                        " Note: each file (or pair) gets a unique read-group id."
                        " Collides with -c.")
    parser.add_argument('-2', "--fq2", nargs="+",
                        help="FastQ file/s (if paired) (gzip only). See also --fq1")
    parser.add_argument('-s', "--sample",
                        help="Sample name. Collides with -c.")
    parser.add_argument('-C', "--cuffdiff", action='store_true',
                        dest="run_cuffdiff",
                        help="Also run cuffdiff")
    parser.add_argument('-S', '--stranded', action='store_true',
                        help="Stranded library prep (default is unstranded)")

    args = parser.parse_args()

    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    logger.setLevel(logging.WARN + 10*args.quiet - 10*args.verbose)
    aux_logger.setLevel(logging.WARN + 10*args.quiet - 10*args.verbose)

    if os.path.exists(args.outdir):
        logger.fatal("Output directory %s already exists", args.outdir)
        sys.exit(1)

    # samples is a dictionary with sample names as key (mostly just
    # one) and readunit keys as value. readunits is a dict with
    # readunits (think: fastq pairs with attributes) as value
    if args.config:
        if any([args.fq1, args.fq2, args.sample]):
            logger.fatal("Config file overrides fastq and sample input arguments."
                         " Use one or the other")
            sys.exit(1)
        if not os.path.exists(args.config):
            logger.fatal("Config file %s does not exist", args.config)
            sys.exit(1)
        samples, readunits = get_samples_and_readunits_from_cfgfile(args.config)
    else:
        if not all([args.fq1, args.sample]):
            logger.fatal("Need at least fq1 and sample without config file")
            sys.exit(1)

        readunits = get_readunits_from_args(args.fq1, args.fq2)
        # all readunits go into this one sample specified on the command-line
        samples = dict()
        samples[args.sample] = list(readunits.keys())

    # FIXME checks on reffa index (currently not exposed via args)


    # turn arguments into user_data that gets merged into pipeline config
    #
    # generic data first
    user_data = dict()
    user_data['mail_on_completion'] = not args.no_mail
    user_data['readunits'] = readunits
    user_data['samples'] = samples

    user_data['stranded'] = args.stranded
    user_data['run_cuffdiff'] = args.run_cuffdiff
    user_data['paired_end'] = any(ru.get('fq2') for ru in readunits.values())
    if user_data['paired_end']:
        assert all(ru.get('fq2') for ru in readunits.values()), (
            "Can't handle mix of paired-end and single-end")

    pipeline_handler = PipelineHandler(
        PIPELINE_NAME, PIPELINE_BASEDIR,
        args.outdir, user_data, site=site,
        master_q=args.master_q, slave_q=args.slave_q)
    pipeline_handler.setup_env()
    pipeline_handler.submit(args.no_run)
예제 #4
0
def main():
    """main function
    """

    default_parser = default_argparser(CFG_DIR, with_readunits=True)
    parser = configargparse.ArgumentParser(description=__doc__.format(
        PIPELINE_NAME=PIPELINE_NAME, PIPELINE_VERSION=get_pipeline_version()),
                                           parents=[default_parser])

    parser._optionals.title = "Arguments"
    # pipeline specific args
    parser.add_argument('-t',
                        "--seqtype",
                        required=True,
                        choices=['WGS', 'WES', 'targeted'],
                        help="Sequencing type")
    parser.add_argument('-l',
                        "--bed",
                        help="Bed file listing regions of interest."
                        " Required for WES and targeted sequencing.")
    parser.add_argument('-D',
                        '--dont-mark-dups',
                        action='store_true',
                        help="Don't mark duplicate reads")
    # raw bam not possible because the pipeline splits on the fly into chromosomes
    parser.add_argument(
        '--proc-bam',
        help="Advanced: Injects processed BAM (overwrites fq options)."
        " WARNING: reference and pre-processing need to match pipeline requirements"
    )
    parser.add_argument('--bam-only',
                        action='store_true',
                        help="Don't call variants, just process BAM file")

    args = parser.parse_args()

    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose)
    aux_logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose)

    if os.path.exists(args.outdir):
        logger.fatal("Output directory %s already exists", args.outdir)
        sys.exit(1)

    # samples is a dictionary with sample names as key (mostly just
    # one) and readunit keys as value. readunits is a dict with
    # readunits (think: fastq pairs with attributes) as value
    if args.sample_cfg:
        if any([args.fq1, args.fq2, args.sample, args.proc_bam]):
            logger.fatal("Config file overrides fastq and sample arguments."
                         " Use one or the other")
            sys.exit(1)
        if not os.path.exists(args.sample_cfg):
            logger.fatal("Config file %s does not exist", args.sample_cfg)
            sys.exit(1)
        samples, readunits = get_samples_and_readunits_from_cfgfile(
            args.sample_cfg)

    else:  # no sample config, so input is either fastq or existing bam
        samples = dict()

        if not args.sample:
            logger.fatal("Need sample name if not using config file")
            sys.exit(1)

        if args.proc_bam:
            assert not args.fq1, ("BAM injection overwrites fastq arguments")

            if args.proc_bam:
                assert os.path.exists(args.proc_bam)

            readunits = dict()
            samples[args.sample] = []

        elif args.fq1:

            readunits = get_readunits_from_args(args.fq1, args.fq2)
            # all readunits go into this one sample specified on the command-line
            samples[args.sample] = list(readunits.keys())

        else:
            logger.fatal(
                "Need at least one fastq files as argument if not using config file"
            )
            sys.exit(1)

    if args.seqtype in ['WES', 'targeted']:
        if not args.bed:
            logger.fatal(
                "Analysis of exome and targeted sequence runs requires a bed file"
            )
            sys.exit(1)
        else:
            if not os.path.exists(args.bed):
                logger.fatal("Bed file %s does not exist", args.sample_cfg)
                sys.exit(1)

    # turn arguments into cfg_dict (gets merged with other configs late)
    #
    cfg_dict = dict()
    cfg_dict['readunits'] = readunits
    cfg_dict['samples'] = samples
    cfg_dict['seqtype'] = args.seqtype
    cfg_dict['intervals'] = os.path.abspath(
        args.bed
    ) if args.bed else None  # always safe, might be used for WGS as well
    cfg_dict['mark_dups'] = not args.dont_mark_dups
    cfg_dict['bam_only'] = args.bam_only

    pipeline_handler = PipelineHandler(
        PIPELINE_NAME,
        PIPELINE_BASEDIR,
        args,
        cfg_dict,
        cluster_cfgfile=get_cluster_cfgfile(CFG_DIR))
    pipeline_handler.setup_env()

    # Inject existing BAM by symlinking (everything upstream is temporary anyway)
    # WARNING: filename has to match definition in Snakefile!
    if args.proc_bam:
        target = os.path.join(args.outdir, "out", args.sample,
                              "{}.bwamem.lofreq".format(args.sample))
        if cfg_dict['mark_dups']:
            target += ".dedup"
        target += ".lacer.bam"
        os.makedirs(os.path.dirname(target))
        os.symlink(os.path.abspath(args.proc_bam), target)

    pipeline_handler.submit(args.no_run)
예제 #5
0
def main():
    """main function
    """

    parser = argparse.ArgumentParser(description=__doc__.format(
        PIPELINE_NAME=PIPELINE_NAME, PIPELINE_VERSION=get_pipeline_version()))

    # generic args
    parser.add_argument('-o', "--outdir", required=True,
                        help="Output directory (may not exist)")
    parser.add_argument('--name',
                        help="Give this analysis run a name (used in email and report)")
    parser.add_argument('--no-mail', action='store_true',
                        help="Don't send mail on completion")
    #site = get_site()
    default = get_default_queue('slave')
    parser.add_argument('-w', '--slave-q', default=default,
                        help="Queue to use for slave jobs (default: {})".format(default))
    default = get_default_queue('master')
    parser.add_argument('-m', '--master-q', default=default,
                        help="Queue to use for master job (default: {})".format(default))
    parser.add_argument('-n', '--no-run', action='store_true')
    parser.add_argument('-v', '--verbose', action='count', default=0,
                        help="Increase verbosity")
    parser.add_argument('-q', '--quiet', action='count', default=0,
                        help="Decrease verbosity")
    cfg_group = parser.add_argument_group('Configuration files (advanced)')
    cfg_group.add_argument('--sample-cfg',
                           help="Config-file (YAML) listing samples and readunits."
                           " Collides with -1, -2 and -s")
    for name, descr in [("references", "reference sequences"),
                        ("params", "parameters"),
                        ("modules", "modules")]:
        default = os.path.abspath(os.path.join(CFG_DIR, "{}.yaml".format(name)))
        cfg_group.add_argument('--{}-cfg'.format(name),
                               default=default,
                               help="Config-file (yaml) for {}. (default: {})".format(descr, default))
        
    # pipeline specific args
    parser.add_argument('-1', "--fq1", nargs="+",
                        help="FastQ file/s (gzip only)."
                        " Multiple input files supported (auto-sorted)."
                        " Note: each file (or pair) gets a unique read-group id."
                        " Collides with --sample-cfg.")
    parser.add_argument('-2', "--fq2", nargs="+",
                        help="FastQ file/s (if paired) (gzip only). See also --fq1")
    parser.add_argument('-s', "--sample",
                        help="Sample name. Collides with --sample-cfg.")
    parser.add_argument('-t', "--seqtype", required=True,
                        choices=['WGS', 'WES', 'targeted'],
                        help="Sequencing type")
    parser.add_argument('-l', "--intervals",
                        help="Intervals file (e.g. bed file) listing regions of interest."
                        " Required for WES and targeted sequencing.")
    parser.add_argument('-D', '--dont-mark-dups', action='store_true',
                        help="Don't mark duplicate reads")

    args = parser.parse_args()

    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    logger.setLevel(logging.WARN + 10*args.quiet - 10*args.verbose)
    aux_logger.setLevel(logging.WARN + 10*args.quiet - 10*args.verbose)

    if os.path.exists(args.outdir):
        logger.fatal("Output directory %s already exists", args.outdir)
        sys.exit(1)

    # samples is a dictionary with sample names as key (mostly just
    # one) and readunit keys as value. readunits is a dict with
    # readunits (think: fastq pairs with attributes) as value
    if args.sample_cfg:
        if any([args.fq1, args.fq2, args.sample]):
            logger.fatal("Config file overrides fastq and sample input arguments."
                         " Use one or the other")
            sys.exit(1)
        if not os.path.exists(args.sample_cfg):
            logger.fatal("Config file %s does not exist", args.sample_cfg)
            sys.exit(1)
        samples, readunits = get_samples_and_readunits_from_cfgfile(args.sample_cfg)
    else:
        if not all([args.fq1, args.sample]):
            logger.fatal("Need at least fq1 and sample without config file")
            sys.exit(1)

        readunits = get_readunits_from_args(args.fq1, args.fq2)
        # all readunits go into this one sample specified on the command-line
        samples = dict()
        samples[args.sample] = list(readunits.keys())

    # FIXME how to?
    #for p in ['bwa', 'samtools']:
    #    if not ref_is_indexed(args.reffa, p):
    #        logger.fatal("Reference '%s' doesn't appear to be indexed with %s", args.reffa, p)
    #        sys.exit(1)

    if args.seqtype in ['WES', 'targeted']:
        if not args.intervals:
            logger.fatal("Analysis of exome and targeted sequence runs requires a bed file")
            sys.exit(1)
        else:
            if not os.path.exists(args.intervals):
                logger.fatal("Intervals file %s does not exist", args.sample_cfg)
                sys.exit(1)
            logger.warning("Compatilibity between interval file and"
                           " reference not checked")# FIXME

    # turn arguments into user_data that gets merged into pipeline config
    #
    # generic data first
    user_data = dict()
    user_data['mail_on_completion'] = not args.no_mail
    user_data['readunits'] = readunits
    user_data['samples'] = samples
    if args.name:
        user_data['analysis_name'] = args.name
    


    user_data['seqtype'] = args.seqtype
    user_data['intervals'] = args.intervals
    user_data['mark_dups'] = not args.dont_mark_dups

    pipeline_handler = PipelineHandler(
        PIPELINE_NAME, PIPELINE_BASEDIR,
        args.outdir, user_data,
        master_q=args.master_q,
        slave_q=args.slave_q,
        params_cfgfile=args.params_cfg,
        modules_cfgfile=args.modules_cfg,
        refs_cfgfile=args.references_cfg,
        cluster_cfgfile=get_cluster_cfgfile(CFG_DIR))
    
    pipeline_handler.setup_env()
    pipeline_handler.submit(args.no_run)
예제 #6
0
파일: vipr.py 프로젝트: kohjy-ag/pipelines
def main():
    """main function
    """

    default_parser = default_argparser(CFG_DIR, allow_missing_cfgfile=True)
    parser = argparse.ArgumentParser(description=__doc__.format(
        PIPELINE_NAME=PIPELINE_NAME, PIPELINE_VERSION=get_pipeline_version()),
                                     parents=[default_parser])

    parser._optionals.title = "Arguments"
    # pipeline specific args
    parser.add_argument(
        '-1',
        "--fq1",
        nargs="+",
        help="FastQ file/s (gzip only)."
        " Multiple input files supported (auto-sorted)."
        " Note: each file (or pair) gets a unique read-group id."
        " Collides with --sample-cfg.")
    parser.add_argument(
        '-2',
        "--fq2",
        nargs="+",
        help="FastQ file/s (if paired) (gzip only). See also --fq1")
    parser.add_argument('-s',
                        "--sample",
                        help="Sample name. Collides with --sample-cfg.")
    parser.add_argument(
        '-r', "--reffa", required=True,
        help="Reference genome")  # FIXME create local copy for indexing?
    args = parser.parse_args()

    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose)
    aux_logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose)

    if os.path.exists(args.outdir):
        logger.fatal("Output directory %s already exists", args.outdir)
        sys.exit(1)

    # samples is a dictionary with sample names as key (mostly just
    # one) and readunit keys as value. readunits is a dict with
    # readunits (think: fastq pairs with attributes) as value
    if args.sample_cfg:
        if any([args.fq1, args.fq2, args.sample]):
            logger.fatal("Config file overrides fastq and sample arguments."
                         " Use one or the other")
            sys.exit(1)
        if not os.path.exists(args.sample_cfg):
            logger.fatal("Config file %s does not exist", args.sample_cfg)
            sys.exit(1)
        samples, readunits = get_samples_and_readunits_from_cfgfile(
            args.sample_cfg)

    else:  # no sample config, so input is either fastq or existing bam
        samples = dict()

        if not args.sample:
            logger.fatal("Need sample name if not using config file")
            sys.exit(1)

        if args.fq1:
            readunits = get_readunits_from_args(args.fq1, args.fq2)
            # all readunits go into this one sample specified on the command-line
            samples[args.sample] = list(readunits.keys())

        else:
            logger.fatal(
                "Need at least one fastq files as argument if not using config file"
            )
            sys.exit(1)

    for ru in readunits.values():
        assert ru['fq2'], (
            "FastQ R2 missing, but assemblers assume paired-end reads")

    # turn arguments into cfg_dict (gets merged with other configs late)
    #
    cfg_dict = dict()
    cfg_dict['readunits'] = readunits
    cfg_dict['samples'] = samples
    assert os.path.exists(args.reffa)
    # FIXME only works because yaml missing and thus not overwritten
    cfg_dict['references'] = {'genome': os.path.abspath(args.reffa)}

    pipeline_handler = PipelineHandler(
        PIPELINE_NAME,
        PIPELINE_BASEDIR,
        args,
        cfg_dict,
        cluster_cfgfile=get_cluster_cfgfile(CFG_DIR))
    pipeline_handler.setup_env()
    pipeline_handler.submit(args.no_run)
예제 #7
0
def main():
    """main function
    """

    default_parser = default_argparser(CFG_DIR)
    parser = configargparse.ArgumentParser(description=__doc__.format(
        PIPELINE_NAME=PIPELINE_NAME, PIPELINE_VERSION=get_pipeline_version()),
                                           parents=[default_parser])

    parser._optionals.title = "Arguments"
    # pipeline specific args
    parser.add_argument(
        "--normal-fq1",
        nargs="+",
        help="Normal FastQ file/s (gzip only)."
        " Multiple input files supported (auto-sorted)."
        " Note: each file (or pair) gets a unique read-group id."
        " Collides with --sample-cfg.")
    parser.add_argument(
        '--normal-fq2',
        nargs="+",
        help=
        "Normal FastQ file/s (if paired) (gzip only). See also --normal-fq1")
    parser.add_argument(
        "--tumor-fq1",
        nargs="+",
        help="Tumor FastQ file/s (gzip only)."
        " Multiple input files supported (auto-sorted)."
        " Note: each file (or pair) gets a unique read-group id."
        " Collides with --sample-cfg.")
    parser.add_argument(
        '--tumor-fq2',
        nargs="+",
        help="Tumor FastQ file/s (if paired) (gzip only). See also --tumor-fq1"
    )
    parser.add_argument('-t',
                        "--seqtype",
                        required=True,
                        choices=['WGS', 'WES', 'targeted'],
                        help="Sequencing type")
    parser.add_argument('-l',
                        "--bed",
                        help="Bed file listing regions of interest."
                        " Required for WES and targeted sequencing.")
    parser.add_argument('-D',
                        '--dont-mark-dups',
                        action='store_true',
                        help="Don't mark duplicate reads")
    parser.add_argument(
        '--normal-bam',
        help="Advanced: Injects normal BAM (overwrites normal-fq options)."
        " WARNING: reference and postprocessing need to match pipeline requirements"
    )
    parser.add_argument(
        '--tumor-bam',
        help="Advanced: Injects tumor BAM (overwrites tumor-fq options)."
        " WARNING: reference and postprocessing need to match pipeline requirements"
    )

    args = parser.parse_args()

    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose)
    aux_logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose)

    if os.path.exists(args.outdir):
        logger.fatal("Output directory %s already exists", args.outdir)
        sys.exit(1)

    # samples is a dictionary with sample names as key (mostly just
    # one) and readunit keys as value. readunits is a dict with
    # readunits (think: fastq pairs with attributes) as value
    if args.sample_cfg:
        if any([
                args.normal_fq1, args.normal_fq2, args.tumor_fq1,
                args.tumor_fq2, args.normal_bam, args.tumor_bam
        ]):
            logger.fatal(
                "Config file overrides fastq and sample input arguments."
                " Use one or the other")
            sys.exit(1)
        if not os.path.exists(args.sample_cfg):
            logger.fatal("Config file %s does not exist", args.sample_cfg)
            sys.exit(1)
        samples, readunits = get_samples_and_readunits_from_cfgfile(
            args.sample_cfg)
    else:
        samples = dict()

        if args.normal_bam:
            normal_readunits = dict()
            samples["normal"] = []
            assert os.path.exists(args.normal_bam)
        else:
            if not all([args.normal_fq1, args.tumor_fq1]):
                logger.fatal(
                    "Need at least fq1 and sample without config file")
                sys.exit(1)
            normal_readunits = get_readunits_from_args(args.normal_fq1,
                                                       args.normal_fq2)
            samples["normal"] = list(normal_readunits.keys())

        if args.tumor_bam:
            tumor_readunits = dict()
            samples["tumor"] = []
            assert os.path.exists(args.tumor_bam)
        else:
            tumor_readunits = get_readunits_from_args(args.tumor_fq1,
                                                      args.tumor_fq2)
            samples["tumor"] = list(tumor_readunits.keys())

        readunits = dict(normal_readunits)
        readunits.update(tumor_readunits)

    assert sorted(samples) == sorted(["normal", "tumor"])

    # FIXME howt to
    # if not os.path.exists(reffa):
    #    logger.fatal("Reference '%s' doesn't exist", reffa)
    #    sys.exit(1)
    #
    #for p in ['bwa', 'samtools']:
    #    if not ref_is_indexed(reffa, p):
    #        logger.fatal("Reference '%s' doesn't appear to be indexed with %s", reffa, p)
    #        sys.exit(1)

    if args.seqtype in ['WES', 'targeted']:
        if not args.bed:
            logger.fatal(
                "Analysis of exome and targeted sequence runs requires a bed file"
            )
            sys.exit(1)
        else:
            if not os.path.exists(args.bed):
                logger.fatal("Bed file %s does not exist", args.sample_cfg)
                sys.exit(1)
            logger.warning("Compatilibity between bed file and"
                           " reference not checked")  # FIXME

    # turn arguments into cfg_dict that gets merged into pipeline config
    #
    cfg_dict = dict()
    cfg_dict['readunits'] = readunits
    cfg_dict['samples'] = samples

    cfg_dict['seqtype'] = args.seqtype
    cfg_dict['intervals'] = os.path.abspath(args.bed) if args.bed else None
    cfg_dict['mark_dups'] = not args.dont_mark_dups

    pipeline_handler = PipelineHandler(
        PIPELINE_NAME,
        PIPELINE_BASEDIR,
        args,
        cfg_dict,
        cluster_cfgfile=get_cluster_cfgfile(CFG_DIR))

    pipeline_handler.setup_env()

    # inject existing BAM by symlinking (everything upstream is temporary anyway)
    for sample, bam in [("normal", args.normal_bam),
                        ("tumor", args.tumor_bam)]:
        if bam:
            # target as defined in Snakefile!
            target = os.path.join(
                args.outdir, "out", sample,
                "{}.bwamem.lofreq.dedup.lacer.bam".format(sample))
            os.makedirs(os.path.dirname(target))
            os.symlink(os.path.abspath(bam), target)

    pipeline_handler.submit(args.no_run)
예제 #8
0
def main():
    """main function
    """

    default_parser = default_argparser(CFG_DIR, with_readunits=True)
    parser = configargparse.ArgumentParser(description=__doc__.format(
        PIPELINE_NAME=PIPELINE_NAME, PIPELINE_VERSION=get_pipeline_version()),
                                           parents=[default_parser])

    parser._optionals.title = "Arguments"
    # pipeline specific args
    default = 2000
    parser.add_argument(
        "--fragment-length",
        type=int,
        default=default,
        help="Fragment length argument for Bowtie (default {})".format(
            default))
    default = 200
    parser.add_argument(
        "--extsize",
        type=int,
        default=default,
        help=
        "extsize argument for MACS2; only used for single-end reads (default {})"
        .format(default))
    default = -100
    parser.add_argument(
        "--shift",
        type=int,
        default=default,
        help=
        "shift argument for MACS2; only used for single-end reads (default {})"
        .format(default))
    default = 250
    parser.add_argument(
        "--peak-ext-bp",
        type=int,
        default=default,
        help="Extension around peaks for bed creation (default {})".format(
            default))
    args = parser.parse_args()

    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose)
    aux_logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose)

    if os.path.exists(args.outdir):
        logger.fatal("Output directory %s already exists", args.outdir)
        sys.exit(1)

    # samples is a dictionary with sample names as key (mostly just
    # one) and readunit keys as value. readunits is a dict with
    # readunits (think: fastq pairs with attributes) as value
    if args.sample_cfg:
        if any([args.fq1, args.fq2, args.sample]):
            logger.fatal(
                "Config file overrides fastq and sample input arguments."
                " Use one or the other")
            sys.exit(1)
        if not os.path.exists(args.sample_cfg):
            logger.fatal("Config file %s does not exist", args.sample_cfg)
            sys.exit(1)
        samples, readunits = get_samples_and_readunits_from_cfgfile(
            args.sample_cfg)
    else:
        if not all([args.fq1, args.sample]):
            logger.fatal("Need at least fq1 and sample without config file")
            sys.exit(1)

        readunits = get_readunits_from_args(args.fq1, args.fq2)
        # all readunits go into this one sample specified on the command-line
        samples = dict()
        samples[args.sample] = list(readunits.keys())

    # turn arguments into cfg_dict that gets merged into pipeline config
    #
    cfg_dict = dict()
    cfg_dict['readunits'] = readunits
    cfg_dict['samples'] = samples
    cfg_dict['paired_end'] = any(ru.get('fq2') for ru in readunits.values())
    if cfg_dict['paired_end']:
        assert all(ru.get('fq2') for ru in readunits.values()), (
            "Can't handle mix of paired-end and single-end")
    cfg_dict['mapper'] = 'bowtie2'  # FIXME fixed for now
    # cfg_dict["bowtie2_custom_args"]
    # cfg_dict['platform']
    # cfg_dict['center']
    # cfg_dict["macs2_custom_args"]

    cfg_dict['fragment_length'] = args.fragment_length
    cfg_dict['shift'] = args.shift
    cfg_dict['extsize'] = args.extsize
    cfg_dict["peak_ext_bp"] = args.peak_ext_bp

    pipeline_handler = PipelineHandler(
        PIPELINE_NAME,
        PIPELINE_BASEDIR,
        args,
        cfg_dict,
        cluster_cfgfile=get_cluster_cfgfile(CFG_DIR))
    pipeline_handler.setup_env()
    pipeline_handler.submit(args.no_run)
예제 #9
0
def main():
    """main function
    """

    default_parser = default_argparser(CFG_DIR, with_readunits=True)
    parser = configargparse.ArgumentParser(description=__doc__.format(
        PIPELINE_NAME=PIPELINE_NAME, PIPELINE_VERSION=get_pipeline_version()),
                                           parents=[default_parser])

    parser._optionals.title = "Arguments"
    # pipeline specific args
    default = ['kraken', 'metaphlan2']
    parser.add_argument("-p",
                        "--profilers",
                        nargs='+',
                        default=default,
                        help="Profilers to run (default = {}".format(
                            ", ".join(default)))

    args = parser.parse_args()

    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose)
    aux_logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose)

    if os.path.exists(args.outdir):
        logger.fatal("Output directory %s already exists", args.outdir)
        sys.exit(1)

    # samples is a dictionary with sample names as key (mostly just
    # one) and readunit keys as value. readunits is a dict with
    # readunits (think: fastq pairs with attributes) as value
    if args.sample_cfg:
        if any([args.fq1, args.fq2, args.sample]):
            logger.fatal("Config file overrides fastq and sample arguments."
                         " Use one or the other")
            sys.exit(1)
        if not os.path.exists(args.sample_cfg):
            logger.fatal("Config file %s does not exist", args.sample_cfg)
            sys.exit(1)
        samples, readunits = get_samples_and_readunits_from_cfgfile(
            args.sample_cfg)

    else:  # no sample config, so input is either fastq or existing bam
        samples = dict()

        if not args.sample:
            logger.fatal("Need sample name if not using config file")
            sys.exit(1)

        if args.fq1:
            readunits = get_readunits_from_args(args.fq1, args.fq2)
            # all readunits go into this one sample specified on the command-line
            samples[args.sample] = list(readunits.keys())

        else:
            logger.fatal(
                "Need at least one fastq files as argument if not using config file"
            )
            sys.exit(1)

    for ru in readunits.values():
        assert ru['fq2'], (
            "FastQ R2 missing, but pipelines requires paired-end reads")

    # turn arguments into cfg_dict (gets merged with other configs late)
    #
    cfg_dict = dict()
    cfg_dict['readunits'] = readunits
    cfg_dict['samples'] = samples
    cfg_dict['profilers'] = args.profilers

    pipeline_handler = PipelineHandler(
        PIPELINE_NAME,
        PIPELINE_BASEDIR,
        args,
        cfg_dict,
        cluster_cfgfile=get_cluster_cfgfile(CFG_DIR))
    pipeline_handler.setup_env()
    pipeline_handler.submit(args.no_run)
예제 #10
0
def main():
    """main function
    """

    default_parser = default_argparser(CFG_DIR, with_readunits=True)
    parser = configargparse.ArgumentParser(description=__doc__.format(
        PIPELINE_NAME=PIPELINE_NAME, PIPELINE_VERSION=get_pipeline_version()),
                                           parents=[default_parser])

    parser._optionals.title = "Arguments"
    # pipeline specific args
    #/
    args = parser.parse_args()

    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose)
    aux_logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose)

    if os.path.exists(args.outdir):
        logger.fatal("Output directory %s already exists", args.outdir)
        sys.exit(1)

    # samples is a dictionary with sample names as key (mostly just
    # one) and readunit keys as value. readunits is a dict with
    # readunits (think: fastq pairs with attributes) as value
    if args.sample_cfg:
        if any([args.fq1, args.fq2, args.sample]):
            logger.fatal(
                "Config file overrides fastq and sample input arguments."
                " Use one or the other")
            sys.exit(1)
        if not os.path.exists(args.sample_cfg):
            logger.fatal("Config file %s does not exist", args.sample_cfg)
            sys.exit(1)
        samples, readunits = get_samples_and_readunits_from_cfgfile(
            args.sample_cfg)
    else:
        if not all([args.fq1, args.sample]):
            logger.fatal("Need at least fq1 and sample without config file")
            sys.exit(1)

        readunits = get_readunits_from_args(args.fq1, args.fq2)
        # all readunits go into this one sample specified on the command-line
        samples = dict()
        samples[args.sample] = list(readunits.keys())

    # FIXME implement checks on reffa index (currently not exposed via args)

    # turn arguments into cfg_dict that gets merged into pipeline config
    #
    cfg_dict = dict()
    cfg_dict['readunits'] = readunits
    cfg_dict['samples'] = samples

    pipeline_handler = PipelineHandler(
        PIPELINE_NAME,
        PIPELINE_BASEDIR,
        args,
        cfg_dict,
        cluster_cfgfile=get_cluster_cfgfile(CFG_DIR))
    pipeline_handler.setup_env()
    pipeline_handler.submit(args.no_run)
예제 #11
0
def main():
    """main function
    """

    default_parser = default_argparser(CFG_DIR)
    parser = argparse.ArgumentParser(description=__doc__.format(
        PIPELINE_NAME=PIPELINE_NAME, PIPELINE_VERSION=get_pipeline_version()),
                                     parents=[default_parser])

    parser._optionals.title = "Arguments"
    # pipeline specific args
    parser.add_argument(
        '-1',
        "--fq1",
        nargs="+",
        help="FastQ file/s (gzip only)."
        " Multiple input files supported (auto-sorted)."
        " Note: each file (or pair) gets a unique read-group id."
        " Collides with --sample-cfg.")
    parser.add_argument(
        '-2',
        "--fq2",
        nargs="+",
        help="FastQ file/s (if paired) (gzip only). See also --fq1")
    parser.add_argument('-s',
                        "--sample",
                        help="Sample name. Collides with --sample-cfg.")
    parser.add_argument('-C',
                        "--cuffdiff",
                        action='store_true',
                        dest="run_cuffdiff",
                        help="Also run cuffdiff")
    parser.add_argument('-S',
                        '--stranded',
                        action='store_true',
                        help="Stranded library prep (default is unstranded)")

    args = parser.parse_args()

    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose)
    aux_logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose)

    if os.path.exists(args.outdir):
        logger.fatal("Output directory %s already exists", args.outdir)
        sys.exit(1)

    # samples is a dictionary with sample names as key (mostly just
    # one) and readunit keys as value. readunits is a dict with
    # readunits (think: fastq pairs with attributes) as value
    if args.sample_cfg:
        if any([args.fq1, args.fq2, args.sample]):
            logger.fatal(
                "Config file overrides fastq and sample input arguments."
                " Use one or the other")
            sys.exit(1)
        if not os.path.exists(args.sample_cfg):
            logger.fatal("Config file %s does not exist", args.sample_cfg)
            sys.exit(1)
        samples, readunits = get_samples_and_readunits_from_cfgfile(
            args.sample_cfg)
    else:
        if not all([args.fq1, args.sample]):
            logger.fatal("Need at least fq1 and sample without config file")
            sys.exit(1)

        readunits = get_readunits_from_args(args.fq1, args.fq2)
        # all readunits go into this one sample specified on the command-line
        samples = dict()
        samples[args.sample] = list(readunits.keys())

    # FIXME checks on reffa index (currently not exposed via args)

    # turn arguments into cfg_dict that gets merged into pipeline config
    #
    cfg_dict = dict()
    cfg_dict['readunits'] = readunits
    cfg_dict['samples'] = samples

    cfg_dict['stranded'] = args.stranded
    cfg_dict['run_cuffdiff'] = args.run_cuffdiff
    cfg_dict['paired_end'] = any(ru.get('fq2') for ru in readunits.values())
    if cfg_dict['paired_end']:
        assert all(ru.get('fq2') for ru in readunits.values()), (
            "Can't handle mix of paired-end and single-end")

    pipeline_handler = PipelineHandler(
        PIPELINE_NAME,
        PIPELINE_BASEDIR,
        args,
        cfg_dict,
        cluster_cfgfile=get_cluster_cfgfile(CFG_DIR))
    pipeline_handler.setup_env()
    pipeline_handler.submit(args.no_run)
예제 #12
0
def main():
    """main function
    """

    default_parser = default_argparser(CFG_DIR, with_readunits=True)
    parser = configargparse.ArgumentParser(description=__doc__.format(
        PIPELINE_NAME=PIPELINE_NAME, PIPELINE_VERSION=get_pipeline_version()),
                                           parents=[default_parser])

    parser._optionals.title = "Arguments"
    # pipeline specific args
    parser.add_argument("--cuffdiff",
                        action='store_true',
                        dest="run_cuffdiff",
                        help="Also run cuffdiff")
    choices = ["none", "forward", "reverse"]
    default = "none"
    parser.add_argument(
        '--stranded',
        choices=choices,
        default=default,
        help=
        "Stranded library prep (default is {}; Following RSEM definition but see also"
        " http://chipster.csc.fi/manual/library-type-summary.html)".format(
            default))
    parser.add_argument(
        '--rsem-estimate-rspd',
        action='store_true',
        help="Estimate read start position distribution in RSEM")
    args = parser.parse_args()

    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose)
    aux_logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose)

    if os.path.exists(args.outdir):
        logger.fatal("Output directory %s already exists", args.outdir)
        sys.exit(1)

    # samples is a dictionary with sample names as key (mostly just
    # one) and readunit keys as value. readunits is a dict with
    # readunits (think: fastq pairs with attributes) as value
    if args.sample_cfg:
        if any([args.fq1, args.fq2, args.sample]):
            logger.fatal(
                "Config file overrides fastq and sample input arguments."
                " Use one or the other")
            sys.exit(1)
        if not os.path.exists(args.sample_cfg):
            logger.fatal("Config file %s does not exist", args.sample_cfg)
            sys.exit(1)
        samples, readunits = get_samples_and_readunits_from_cfgfile(
            args.sample_cfg)
    else:
        if not all([args.fq1, args.sample]):
            logger.fatal("Need at least fq1 and sample without config file")
            sys.exit(1)

        readunits = get_readunits_from_args(args.fq1, args.fq2)
        # all readunits go into this one sample specified on the command-line
        samples = dict()
        samples[args.sample] = list(readunits.keys())

    # FIXME add checks on reffa index (currently not exposed via args)

    # turn arguments into cfg_dict that gets merged into pipeline config
    #
    cfg_dict = dict()
    cfg_dict['readunits'] = readunits
    cfg_dict['samples'] = samples
    cfg_dict['rsem_extra_args'] = ''
    if args.rsem_estimate_rspd:
        cfg_dict['rsem_extra_args'] += ' --estimate-rspd'
    cfg_dict['stranded'] = args.stranded
    cfg_dict['run_cuffdiff'] = args.run_cuffdiff
    cfg_dict['paired_end'] = any(ru.get('fq2') for ru in readunits.values())
    if cfg_dict['paired_end']:
        assert all(ru.get('fq2') for ru in readunits.values()), (
            "Can't handle mix of paired-end and single-end")

    pipeline_handler = PipelineHandler(
        PIPELINE_NAME,
        PIPELINE_BASEDIR,
        args,
        cfg_dict,
        cluster_cfgfile=get_cluster_cfgfile(CFG_DIR))
    pipeline_handler.setup_env()
    pipeline_handler.submit(args.no_run)
예제 #13
0
def main():
    """main function
    """

    default_parser = default_argparser(CFG_DIR, with_readunits=True)
    parser = configargparse.ArgumentParser(description=__doc__.format(
        PIPELINE_NAME=PIPELINE_NAME, PIPELINE_VERSION=get_pipeline_version()),
                                           parents=[default_parser])

    parser._optionals.title = "Arguments"
    # pipeline specific args
    parser.add_argument('-t',
                        "--seqtype",
                        required=True,
                        choices=['WGS', 'WES', 'targeted'],
                        help="Sequencing type")
    parser.add_argument('-l',
                        "--bed",
                        help="Bed file listing regions of interest."
                        " Required for WES and targeted sequencing.")
    default = 4
    parser.add_argument(
        "-c",
        "--hc-nct",
        default=default,
        type=int,
        help="Number of Haplotype Caller threads (per region cluster)."
        " Values>1 reported to make Haplotype Caller unstable (default={})".
        format(default))
    default = 100
    parser.add_argument(
        '-i',
        "--interval-padding",
        default=default,
        help="Interval padding (for non-WGS only; default = {})".format(
            default))
    parser.add_argument(
        '-j',
        "--joint-calls",
        action='store_true',
        help="Perform joint/cohort calling (requires multisample input)")
    parser.add_argument(
        '--raw-bam',
        help=
        "Advanced: Injects raw (pre-dedup, pre-BQSR etc.) BAM (overwrites fq options)."
        " WARNING: reference needs to match pipeline requirements")
    parser.add_argument(
        '--proc-bam',
        help=
        "Advanced: Injects processed (post-dedup, post-BQSR etc.) BAM (overwrites fq options)."
        " WARNING: reference and pre-processing need to match pipeline requirements"
    )
    # FIXME can be achieved with --until rule as well
    parser.add_argument('--bam-only',
                        action='store_true',
                        help="Only process up until BAM file")
    parser.add_argument('--gvcf-only',
                        action='store_true',
                        help="Only process up until GVCF file")

    args = parser.parse_args()

    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose)
    aux_logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose)

    if os.path.exists(args.outdir):
        logger.fatal("Output directory %s already exists", args.outdir)
        sys.exit(1)

    # samples is a dictionary with sample names as key (mostly just
    # one) and readunit keys as value. readunits is a dict with
    # readunits (think: fastq pairs with attributes) as value
    if args.sample_cfg:
        if any([args.fq1, args.fq2, args.sample, args.raw_bam, args.proc_bam]):
            logger.fatal(
                "Config file overrides fastq, sample and BAM arguments."
                " Use one or the other")
            sys.exit(1)
        if not os.path.exists(args.sample_cfg):
            logger.fatal("Config file %s does not exist", args.sample_cfg)
            sys.exit(1)
        samples, readunits = get_samples_and_readunits_from_cfgfile(
            args.sample_cfg)

    else:  # no sample config, so input is either fastq or existing bam
        samples = dict()

        if not args.sample:
            logger.fatal("Need sample name if not using config file")
            sys.exit(1)

        if args.raw_bam or args.proc_bam:
            assert not args.fq1, ("BAM injection overwrites fastq arguments")

            if args.raw_bam:
                assert os.path.exists(args.raw_bam)
                assert not args.proc_bam, (
                    "Cannot inject raw and processed BAM")
            if args.proc_bam:
                assert os.path.exists(args.proc_bam)
                assert not args.raw_bam, (
                    "Cannot inject raw and processed BAM")

            readunits = dict()
            samples[args.sample] = []

        elif args.fq1:

            readunits = get_readunits_from_args(args.fq1, args.fq2)
            # all readunits go into this one sample specified on the command-line
            samples[args.sample] = list(readunits.keys())

        else:
            logger.fatal(
                "Need at least one fastq files as argument if not using config file"
            )
            sys.exit(1)

    if args.seqtype in ['WES', 'targeted']:
        if not args.bed:
            logger.fatal(
                "Analysis of exome and targeted sequence runs requires a bed file"
            )
            sys.exit(1)
        else:
            if not os.path.exists(args.bed):
                logger.fatal("Bed file %s does not exist", args.sample_cfg)
                sys.exit(1)

    if args.joint_calls:
        if len(samples) < 2:
            logger.fatal("Need at least two samples for joint calling")
            sys.exit(1)

    # turn arguments into cfg_dict (gets merged with other configs late)
    #
    cfg_dict = dict()
    cfg_dict['readunits'] = readunits
    cfg_dict['samples'] = samples
    cfg_dict['seqtype'] = args.seqtype
    cfg_dict['intervals'] = os.path.abspath(
        args.bed
    ) if args.bed else None  # always safe, might be used for WGS as well
    cfg_dict['mark_dups'] = MARK_DUPS
    cfg_dict['bam_only'] = args.bam_only
    cfg_dict['gvcf_only'] = args.gvcf_only
    cfg_dict['hc_nct'] = args.hc_nct
    cfg_dict['joint_calls'] = args.joint_calls
    cfg_dict['interval_padding'] = args.interval_padding
    pipeline_handler = PipelineHandler(
        PIPELINE_NAME,
        PIPELINE_BASEDIR,
        args,
        cfg_dict,
        cluster_cfgfile=get_cluster_cfgfile(CFG_DIR))
    pipeline_handler.setup_env()

    # Inject existing BAM by symlinking (everything upstream is temporary anyway)
    # WARNING: filename has to match definition in Snakefile!
    if args.raw_bam:
        target = os.path.join(args.outdir, "out", args.sample,
                              "{}.bwamem.bam".format(args.sample))
        os.makedirs(os.path.dirname(target))
        os.symlink(os.path.abspath(args.raw_bam), target)

        src_bai = os.path.abspath(args.raw_bam) + ".bai"
        if os.path.exists(src_bai):
            os.symlink(src_bai, target + ".bai")

    elif args.proc_bam:
        target = os.path.join(args.outdir, "out", args.sample,
                              "{}.bwamem".format(args.sample))
        if cfg_dict['mark_dups']:
            target += ".dedup"
        if cfg_dict['seqtype'] != 'targeted':
            target += ".bqsr"
        target += ".bam"
        os.makedirs(os.path.dirname(target))
        os.symlink(os.path.abspath(args.proc_bam), target)
        if os.path.exists(os.path.abspath(args.proc_bam) + ".bai"):
            os.symlink(
                os.path.abspath(args.proc_bam) + ".bai", target + ".bai")

    pipeline_handler.submit(args.no_run)
예제 #14
0
def main():
    """main function
    """

    parser = argparse.ArgumentParser(description=__doc__.format(
        PIPELINE_NAME=PIPELINE_NAME, PIPELINE_VERSION=get_pipeline_version()))

    # generic args
    parser.add_argument('-c', "--config",
                        help="Config file (YAML) listing samples and readunits."
                        " Collides with -1, -2 and -s")
    parser.add_argument('-o', "--outdir", required=True,
                        help="Output directory (must not exist)")
    parser.add_argument('--no-mail', action='store_true',
                        help="Don't send mail on completion")
    site = get_site()
    default = DEFAULT_SLAVE_Q.get(site, None)
    parser.add_argument('-w', '--slave-q', default=default,
                        help="Queue to use for slave jobs (default: {})".format(default))
    default = DEFAULT_MASTER_Q.get(site, None)
    parser.add_argument('-m', '--master-q', default=default,
                        help="Queue to use for master job (default: {})".format(default))
    parser.add_argument('-n', '--no-run', action='store_true')
    parser.add_argument('-v', '--verbose', action='count', default=0,
                        help="Increase verbosity")
    parser.add_argument('-q', '--quiet', action='count', default=0,
                        help="Decrease verbosity")

    # pipeline specific args
    parser.add_argument('-1', "--fq1", nargs="+",
                        help="FastQ file/s (gzip only)."
                        " Multiple input files supported (auto-sorted)."
                        " Note: each file (or pair) gets a unique read-group id."
                        " Collides with -c.")
    parser.add_argument('-2', "--fq2", nargs="+",
                        help="FastQ file/s (if paired) (gzip only). See also --fq1")
    parser.add_argument('-s', "--sample",
                        help="Sample name. Collides with -c.")
    fake_pipeline_handler = PipelineHandler("FAKE", PIPELINE_BASEDIR, "FAKE", None)
    default_cfg = fake_pipeline_handler.read_default_config()
    default = default_cfg['references']['genome']
    parser.add_argument('-r', "--reffa", default=default,
                        help=argparse.SUPPRESS)
                        # WARN do not change. this is just to set args.reffa (used later).
                        # any change here would require changes in dbsnp, hapmap, g1k, omni and mills as well
    parser.add_argument('-t', "--seqtype", required=True,
                        choices=['WGS', 'WES', 'targeted'],
                        help="Sequencing type")
    parser.add_argument('-l', "--intervals",
                        help="Intervals file (e.g. bed file) listing regions of interest."
                        " Required for WES and targeted sequencing.")

    args = parser.parse_args()

    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    logger.setLevel(logging.WARN + 10*args.quiet - 10*args.verbose)
    aux_logger.setLevel(logging.WARN + 10*args.quiet - 10*args.verbose)

    if os.path.exists(args.outdir):
        logger.fatal("Output directory %s already exists", args.outdir)
        sys.exit(1)

    # samples is a dictionary with sample names as key (mostly just
    # one) and readunit keys as value. readunits is a dict with
    # readunits (think: fastq pairs with attributes) as value
    if args.config:
        if any([args.fq1, args.fq2, args.sample]):
            logger.fatal("Config file overrides fastq and sample input arguments."
                         " Use one or the other")
            sys.exit(1)
        if not os.path.exists(args.config):
            logger.fatal("Config file %s does not exist", args.config)
            sys.exit(1)
        samples, readunits = get_samples_and_readunits_from_cfgfile(args.config)
    else:
        if not all([args.fq1, args.sample]):
            logger.fatal("Need at least fq1 and sample without config file")
            sys.exit(1)

        readunits = get_readunits_from_args(args.fq1, args.fq2)
        # all readunits go into this one sample specified on the command-line
        samples = dict()
        samples[args.sample] = list(readunits.keys())

    if args.seqtype in ['WES', 'targeted']:
        if not args.intervals:
            logger.fatal("Analysis of exome and targeted sequence runs requires a bed file")
            sys.exit(1)
        else:
            if not os.path.exists(args.intervals):
                logger.fatal("Intervals file %s does not exist", args.config)
                sys.exit(1)
            logger.warning("Compatilibity between interval file and"
                           " reference not checked")# FIXME

    # turn arguments into user_data that gets merged into pipeline config
    #
    # generic data first
    user_data = dict()
    user_data['mail_on_completion'] = not args.no_mail
    user_data['readunits'] = readunits
    user_data['samples'] = samples

    user_data['num_chroms'] = len(list(chroms_and_lens_from_from_fasta(args.reffa)))
    user_data['seqtype'] = args.seqtype
    user_data['intervals'] = args.intervals# always safe, might be used for WGS as well
    user_data['mark_dups'] = MARK_DUPS

    pipeline_handler = PipelineHandler(
        PIPELINE_NAME, PIPELINE_BASEDIR,
        args.outdir, user_data, site=site,
        master_q=args.master_q, slave_q=args.slave_q)
    pipeline_handler.setup_env()
    pipeline_handler.submit(args.no_run)
예제 #15
0
def main():
    """main function
    """

    default_parser = default_argparser(CFG_DIR)
    parser = configargparse.ArgumentParser(description=__doc__.format(
        PIPELINE_NAME=PIPELINE_NAME, PIPELINE_VERSION=get_pipeline_version()),
                                           parents=[default_parser])

    parser._optionals.title = "Arguments"
    # pipeline specific args
    parser.add_argument(
        "--control-fq1",
        nargs="+",
        help="Control FastQ file/s (gzip only)."
        " Multiple input files supported (auto-sorted)."
        " Note: each file (or pair) gets a unique read-group id."
        " Collides with --sample-cfg.")
    parser.add_argument(
        '--control-fq2',
        nargs="+",
        help=
        "Control FastQ file/s (if paired) (gzip only). See also --control-fq1")
    parser.add_argument(
        "--treatment-fq1",
        nargs="+",
        help="Treatment FastQ file/s (gzip only)."
        " Multiple input files supported (auto-sorted)."
        " Note: each file (or pair) gets a unique read-group id."
        " Collides with --sample-cfg.")
    parser.add_argument(
        '--treatment-fq2',
        nargs="+",
        help=
        "Treatment FastQ file/s (if paired) (gzip only). See also --treatment-fq1"
    )
    parser.add_argument(
        '--control-bam',
        help="Advanced: Injects control BAM (overwrites control-fq options)."
        " WARNING: reference and postprocessing need to match pipeline requirements"
    )
    parser.add_argument(
        '--treatment-bam',
        help="Advanced: Injects treatment BAM (overwrites treatment-fq options)."
        " WARNING: reference and postprocessing need to match pipeline requirements"
    )
    choices = ['bwa-aln', 'bwa-mem']
    default = choices[0]
    parser.add_argument('--mapper',
                        default=default,
                        choices=choices,
                        help="Mapper to use. One of {}. Default {}".format(
                            ",".join(choices), default))

    choices = ['TF', 'histone-narrow', 'histone-broad']  #, 'open-chromatin']
    parser.add_argument('-t',
                        '--peak-type',
                        required=True,
                        choices=choices,
                        help="Peak type. One of {}".format(",".join(choices)))
    parser.add_argument('--skip-macs2',
                        action='store_true',
                        help="Don't run MACS2")
    parser.add_argument('--skip-dfilter',
                        action='store_true',
                        help="Don't run DFilter")

    args = parser.parse_args()

    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose)
    aux_logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose)

    if os.path.exists(args.outdir):
        logger.fatal("Output directory %s already exists", args.outdir)
        sys.exit(1)

    # samples is a dictionary with sample names as key (mostly just
    # one) and readunit keys as value. readunits is a dict with
    # readunits (think: fastq pairs with attributes) as value
    if args.sample_cfg:
        if any([
                args.control_fq1, args.control_fq2, args.treatment_fq1,
                args.treatment_fq2, args.control_bam, args.treatment_bam
        ]):
            logger.fatal(
                "Config file overrides fastq and sample input arguments."
                " Use one or the other")
            sys.exit(1)
            if not os.path.exists(args.sample_cfg):
                logger.fatal("Config file %s does not exist", args.sample_cfg)
                sys.exit(1)
        samples, readunits = get_samples_and_readunits_from_cfgfile(
            args.sample_cfg)
    else:
        samples = dict()

        if args.control_bam:
            control_readunits = dict()
            samples["control"] = []
            assert os.path.exists(args.control_bam)
        else:
            if not all([args.control_fq1, args.treatment_fq1]):
                logger.fatal(
                    "Need at least fq1 and sample without config file")
                sys.exit(1)
            control_readunits = get_readunits_from_args(
                args.control_fq1, args.control_fq2)
            samples["control"] = list(control_readunits.keys())

        if args.treatment_bam:
            treatment_readunits = dict()
            samples["treatment"] = []
            assert os.path.exists(args.treatment_bam)
        else:
            treatment_readunits = get_readunits_from_args(
                args.treatment_fq1, args.treatment_fq2)
            samples["treatment"] = list(treatment_readunits.keys())

        readunits = dict(control_readunits)
        readunits.update(treatment_readunits)

    assert sorted(samples) == sorted(["control", "treatment"])

    # turn arguments into cfg_dict that gets merged into pipeline config
    #
    cfg_dict = dict()
    cfg_dict['readunits'] = readunits
    cfg_dict['samples'] = samples

    # either paired end or not, but no mix allows
    if all([ru.get('fq2') for ru in readunits.values()]):
        cfg_dict['paired_end'] = True
    elif not any([ru.get('fq2') for ru in readunits.values()]):
        cfg_dict['paired_end'] = False
    else:
        logger.fatal("Mixed paired-end and single-end not allowed")
        sys.exit(1)
    cfg_dict['peak_type'] = args.peak_type
    cfg_dict['mapper'] = args.mapper
    cfg_dict['skip_macs2'] = args.skip_macs2
    cfg_dict['skip_dfilter'] = args.skip_dfilter

    pipeline_handler = PipelineHandler(
        PIPELINE_NAME,
        PIPELINE_BASEDIR,
        args,
        cfg_dict,
        cluster_cfgfile=get_cluster_cfgfile(CFG_DIR))
    pipeline_handler.setup_env()

    if args.control_bam or args.treatment_bam:
        raise NotImplementedError("BAM injection not implemented yet")

    pipeline_handler.submit(args.no_run)
예제 #16
0
def main():
    """main function
    """

    default_parser = default_argparser(CFG_DIR)
    parser = argparse.ArgumentParser(description=__doc__.format(
        PIPELINE_NAME=PIPELINE_NAME, PIPELINE_VERSION=get_pipeline_version()),
                                     parents=[default_parser])

    parser._optionals.title = "Arguments"
    # pipeline specific args
    parser.add_argument('-1', "--fq1", nargs="+",
                        help="FastQ file/s (gzip only)."
                        " Multiple input files supported (auto-sorted)."
                        " Note: each file (or pair) gets a unique read-group id."
                        " Collides with --sample-cfg.")
    parser.add_argument('-2', "--fq2", nargs="+",
                        help="FastQ file/s (if paired) (gzip only). See also --fq1")
    parser.add_argument('-s', "--sample",
                        help="Sample name. Collides with --sample-cfg.")
    parser.add_argument('-D', '--dont-mark-dups', action='store_true',
                        help="Don't mark duplicate reads")

    args = parser.parse_args()

    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    logger.setLevel(logging.WARN + 10*args.quiet - 10*args.verbose)
    aux_logger.setLevel(logging.WARN + 10*args.quiet - 10*args.verbose)

    if os.path.exists(args.outdir):
        logger.fatal("Output directory %s already exists", args.outdir)
        sys.exit(1)

    # samples is a dictionary with sample names as key (mostly just
    # one) and readunit keys as value. readunits is a dict with
    # readunits (think: fastq pairs with attributes) as value
    if args.sample_cfg:
        if any([args.fq1, args.fq2, args.sample]):
            logger.fatal("Config file overrides fastq and sample input arguments."
                         " Use one or the other")
            sys.exit(1)
        if not os.path.exists(args.sample_cfg):
            logger.fatal("Config file %s does not exist", args.sample_cfg)
            sys.exit(1)
        samples, readunits = get_samples_and_readunits_from_cfgfile(args.sample_cfg)
    else:
        if not all([args.fq1, args.sample]):
            logger.fatal("Need at least fq1 and sample without config file")
            sys.exit(1)

        readunits = get_readunits_from_args(args.fq1, args.fq2)
        # all readunits go into this one sample specified on the command-line
        samples = dict()
        samples[args.sample] = list(readunits.keys())

    # turn arguments into cfg_dict that gets merged into pipeline config
    #
    cfg_dict = dict()
    cfg_dict['readunits'] = readunits
    cfg_dict['samples'] = samples

    cfg_dict['mark_dups'] = not args.dont_mark_dups

    # create mongodb update command, used later, after submission
    #mongo_update_cmd = "true"{} -r {} -s STARTED".format(mongo_status_script, cfg_dict['run_num'])
    #mongo_update_cmd += " -a $ANALYSIS_ID -o {}".format(outdir)# set in run.sh
    #if args.testing:
    #    mongo_update_cmd += " -t"

    pipeline_handler = PipelineHandler(
        PIPELINE_NAME, PIPELINE_BASEDIR,
        args, cfg_dict,
        cluster_cfgfile=get_cluster_cfgfile(CFG_DIR))

    pipeline_handler.setup_env()
    pipeline_handler.submit(args.no_run)
예제 #17
0
def main():
    """main function
    """

    default_parser = default_argparser(CFG_DIR, with_readunits=True)
    parser = configargparse.ArgumentParser(description=__doc__.format(
        PIPELINE_NAME=PIPELINE_NAME, PIPELINE_VERSION=get_pipeline_version()),
                                           parents=[default_parser])

    parser._optionals.title = "Arguments"
    # pipeline specific args
    #/
    parser.add_argument('-c',
                        "--cell-barcodes",
                        required=True,
                        help="File listing cell barcodes")
    d = 200
    parser.add_argument(
        "--frag-len",
        default=d,
        type=int,
        help="Estimated fragment length (default={})".format(d))
    d = 20.0
    parser.add_argument(
        '--frag-len-sd',
        default=d,
        type=float,
        help="Estimated fragment length standard deviation (default={})".
        format(d))
    parser.add_argument(
        '--dedup',
        action="store_true",
        help="Run UMI-based deduplication (slow for large data-sets!)")
    args = parser.parse_args()

    # Repeateable -v and -q for setting logging level.
    # See https://www.reddit.com/r/Python/comments/3nctlm/what_python_tools_should_i_be_using_on_every/
    # and https://gist.github.com/andreas-wilm/b6031a84a33e652680d4
    # script -vv -> DEBUG
    # script -v -> INFO
    # script -> WARNING
    # script -q -> ERROR
    # script -qq -> CRITICAL
    # script -qqq -> no logging at all
    logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose)
    aux_logger.setLevel(logging.WARN + 10 * args.quiet - 10 * args.verbose)

    if os.path.exists(args.outdir):
        logger.fatal("Output directory %s already exists", args.outdir)
        sys.exit(1)

    # samples is a dictionary with sample names as key (mostly just
    # one) and readunit keys as value. readunits is a dict with
    # readunits (think: fastq pairs with attributes) as value
    if args.sample_cfg:
        if any([args.fq1, args.fq2, args.sample]):
            logger.fatal(
                "Config file overrides fastq and sample input arguments."
                " Use one or the other")
            sys.exit(1)
        if not os.path.exists(args.sample_cfg):
            logger.fatal("Config file '%s' does not exist", args.sample_cfg)
            sys.exit(1)
        samples, readunits = get_samples_and_readunits_from_cfgfile(
            args.sample_cfg)
    else:
        if not all([args.fq1, args.sample]):
            logger.fatal("Need at least fq1 and sample without config file")
            sys.exit(1)

        readunits = get_readunits_from_args(args.fq1, args.fq2)
        # all readunits go into this one sample specified on the command-line
        samples = dict()
        samples[args.sample] = list(readunits.keys())

    for ru in readunits.values():
        assert ru['fq2'], (
            "FastQ R2 missing, but pipeline requires paired-end reads")

    # turn arguments into cfg_dict that gets merged into pipeline config
    #
    cfg_dict = dict()
    cfg_dict['readunits'] = readunits
    cfg_dict['samples'] = samples

    if not os.path.exists(args.cell_barcodes):
        logger.fatal("Cellular barcodes file '%s' does not exist",
                     args.cell_barcodes)
        sys.exit(1)

    cfg_dict['cell_barcodes'] = os.path.abspath(args.cell_barcodes)
    cfg_dict['frag_len'] = args.frag_len
    cfg_dict['frag_len_sd'] = args.frag_len_sd
    cfg_dict['no_dedup'] = not args.dedup
    cfg_dict['scrnapipe_transform'] = os.path.abspath(
        os.path.join(PIPELINE_BASEDIR, 'aux/transform.json'))
    cfg_dict['scrna_conf_template'] = os.path.abspath(
        os.path.join(PIPELINE_BASEDIR, 'aux/scrna.conf.template'))
    cfg_dict['adapters'] = os.path.abspath(
        os.path.join(PIPELINE_BASEDIR, 'aux/adapters.fa'))

    pipeline_handler = PipelineHandler(
        PIPELINE_NAME,
        PIPELINE_BASEDIR,
        args,
        cfg_dict,
        cluster_cfgfile=get_cluster_cfgfile(CFG_DIR))
    pipeline_handler.setup_env()
    pipeline_handler.submit(args.no_run)