Beispiel #1
0
def proc_args(argv):
    info(' '.join(sys.argv))
    info()

    description = 'This script generates target QC reports for each BAM provided as an input. ' \
                  'Usage: ' + basename(__file__) + ' sample2bam.tsv --bed target.bed --contols sample1:sample2 -o results_dir'
    parser = OptionParser(description=description, usage=description)
    add_cnf_t_reuse_prjname_donemarker_workdir_genome_debug(parser)
    parser.add_option('-o', dest='output_dir', metavar='DIR', default=join(os.getcwd(), 'seq2c'))
    parser.add_option('--bed', dest='bed', help='BED file to run Seq2C analysis')
    parser.add_option('-c', '--controls', dest='controls', help='Optional control sample names for Seq2C. For multiple controls, separate them using :')
    parser.add_option('--seq2c-opts', dest='seq2c_opts', help='Options for the final lr2gene.pl script.')
    parser.add_option('--no-prep-bed', dest='prep_bed', help=SUPPRESS_HELP, action='store_false', default=True)

    (opts, args) = parser.parse_args()
    logger.is_debug = opts.debug

    if len(args) == 0:
        parser.print_usage()
        sys.exit(1)
    if len(args) == 1 and not args[0].endswith('.bam'):
        sample_names, bam_fpaths = read_samples(verify_file(args[0], is_critical=True, description='Input sample2bam.tsv'))
        bam_by_sample = OrderedDict()
        for s, b in zip(sample_names, bam_fpaths):
            bam_by_sample[s] = b
    else:
        bam_by_sample = find_bams(args)

    run_cnf = determine_run_cnf(opts, is_wgs=not opts.__dict__.get('bed'))
    cnf = Config(opts.__dict__, determine_sys_cnf(opts), run_cnf)
    check_genome_resources(cnf)

    cnf.output_dir = adjust_path(cnf.output_dir)
    verify_dir(dirname(cnf.output_dir), is_critical=True)
    safe_mkdir(cnf.output_dir)

    if not cnf.project_name:
        cnf.project_name = basename(cnf.output_dir)
    info('Project name: ' + cnf.project_name)

    cnf.proc_name = 'Seq2C'
    set_up_dirs(cnf)

    samples = [
        source.TargQC_Sample(name=s_name, dirpath=join(cnf.output_dir, s_name), bam=bam_fpath)
            for s_name, bam_fpath in bam_by_sample.items()]
    info('Samples: ')
    for s in samples:
        info('  ' + s.name)
    samples.sort(key=lambda _s: _s.key_to_sort())

    target_bed = verify_bed(cnf.bed, is_critical=True) if cnf.bed else None

    if not cnf.only_summary:
        cnf.qsub_runner = adjust_system_path(cnf.qsub_runner)
        if not cnf.qsub_runner: critical('Error: qsub-runner is not provided is sys-config.')
        verify_file(cnf.qsub_runner, is_critical=True)

    return cnf, samples, target_bed, cnf.output_dir
Beispiel #2
0
def main():
    info(' '.join(sys.argv))
    info()

    description = 'This script generates target QC reports for each BAM provided as an input.'
    parser = OptionParser(description=description)
    add_cnf_t_reuse_prjname_donemarker_workdir_genome_debug(parser, threads=1)
    parser.add_option('--work-dir', dest='work_dir', metavar='DIR')
    parser.add_option('--log-dir', dest='log_dir')
    parser.add_option('--only-summary',
                      dest='only_summary',
                      action='store_true')
    parser.add_option('-o',
                      dest='output_dir',
                      metavar='DIR',
                      default=join(os.getcwd(), 'targetqc'))
    parser.add_option('--reannotate',
                      dest='reannotate',
                      action='store_true',
                      default=False,
                      help='re-annotate BED file with gene names')
    parser.add_option('--dedup',
                      dest='dedup',
                      action='store_true',
                      default=False,
                      help='count duplicates in coverage metrics')
    parser.add_option('--bed',
                      dest='bed',
                      help='BED file to run targetSeq and Seq2C analysis on.')
    parser.add_option(
        '--exons',
        '--exome',
        '--features',
        dest='features',
        help=
        'Annotated CDS/Exon/Gene/Transcripts BED file to make targetSeq exon/amplicon regions reports.'
    )

    (opts, args) = parser.parse_args()
    logger.is_debug = opts.debug

    if len(args) == 0:
        critical('No BAMs provided to input.')
    bam_fpaths = list(set([abspath(a) for a in args]))

    bad_bam_fpaths = []
    for fpath in bam_fpaths:
        if not verify_bam(fpath):
            bad_bam_fpaths.append(fpath)
    if bad_bam_fpaths:
        critical('BAM files cannot be found, empty or not BAMs:' +
                 ', '.join(bad_bam_fpaths))

    run_cnf = determine_run_cnf(opts, is_wgs=not opts.__dict__.get('bed'))
    cnf = Config(opts.__dict__, determine_sys_cnf(opts), run_cnf)

    if not cnf.project_name:
        cnf.project_name = basename(cnf.output_dir)
    info('Project name: ' + cnf.project_name)

    cnf.proc_name = 'TargQC'
    set_up_dirs(cnf)
    # cnf.name = 'TargQC_' + cnf.project_name

    check_genome_resources(cnf)

    verify_bed(cnf.bed, is_critical=True)
    bed_fpath = adjust_path(cnf.bed)
    info('Using amplicons/capture panel ' + bed_fpath)

    features_bed_fpath = adjust_path(
        cnf.features) if cnf.features else adjust_path(cnf.genome.features)
    info('Features: ' + features_bed_fpath)

    genes_fpath = None
    if cnf.genes:
        genes_fpath = adjust_path(cnf.genes)
        info('Custom genes list: ' + genes_fpath)

    if not cnf.only_summary:
        cnf.qsub_runner = adjust_system_path(cnf.qsub_runner)
        if not cnf.qsub_runner:
            critical('Error: qsub-runner is not provided is sys-config.')
        verify_file(cnf.qsub_runner, is_critical=True)

    info('*' * 70)
    info()

    targqc_html_fpath = run_targqc(cnf, cnf.output_dir, bam_fpaths, bed_fpath,
                                   features_bed_fpath, genes_fpath)
    if targqc_html_fpath:
        send_email(
            cnf, 'TargQC report for ' + cnf.project_name + ':\n  ' +
            targqc_html_fpath)
Beispiel #3
0
def read_opts_and_cnfs(extra_opts,
                       key_for_sample_name=None,
                       required_keys=list(),
                       file_keys=list(),
                       dir_keys=list(),
                       description='',
                       extra_msg=None,
                       proc_name=None,
                       fpath_for_sample_name=None,
                       main_output_is_file=False,
                       main_output_is_dir=True):
    options = extra_opts
    if main_output_is_file:
        options += [(['-o', '--output-file'],
                     dict(dest='output_file',
                          metavar='FILE',
                          help='Output file'))]
        options += [(
            ['--output-dir'],
            dict(
                dest='output_dir',
                metavar='DIR',
                help=
                'Output directory (or directory name in case of bcbio final dir)',
                default=os.getcwd()))]
    elif main_output_is_dir:
        options += [(
            ['-o', '--output-dir'],
            dict(
                dest='output_dir',
                metavar='DIR',
                help=
                'Output directory (or directory name in case of bcbio final dir)',
                default=os.getcwd()))]
        options += [(['--output-file'],
                     dict(dest='output_file',
                          metavar='FILE',
                          help='Output file'))]

    options += [
        (['-s', '--sample', '--name'],
         dict(
             dest='sample',
             metavar='NAME',
             help=
             'Sample name (default is part of name of the first parameter prior to the first - or .'
         )),
        (['-c', '--caller'],
         dict(
             dest='caller',
             metavar='CALLER_NAME',
             help=
             'Variant caller name (default is part of name of the first parameter between the first - and following .'
         )),
        (['-t', '--nt', '--threads'],
         dict(dest='threads', type='int', help='Number of threads')),
        (
            ['--clean'],
            dict(  # do not keep work directory
                dest='keep_intermediate',
                action='store_false',
                help=SUPPRESS_HELP)),
        (['--debug'],
         dict(dest='debug',
              action='store_true',
              default=False,
              help=SUPPRESS_HELP)),
        (['--reuse'],
         dict(
             dest='reuse_intermediate',
             help=
             'reuse intermediate non-empty files in the work dir from previous run',
             action='store_true')),
        (['--sys-cnf'],
         dict(
             dest='sys_cnf',
             metavar='SYS_CNF.yaml',
             help=
             'System configuration file with paths to external tools and genome resources. The default is  '
             '(see default one %s)' % defaults['sys_cnf'])),
        (['--run-cnf'],
         dict(
             dest='run_cnf',
             metavar='RUN_CNF.yaml',
             default=defaults['run_cnf_exome_seq'],
             help=
             'Customised run details: list of annotations/QC metrics/databases/filtering criteria. '
             'The default is %s' % defaults['run_cnf_exome_seq'])),
        (['--transcripts'], dict(dest='transcripts_fpath')),
        (['--work-dir'],
         dict(dest='work_dir', metavar='DIR', help=SUPPRESS_HELP)),
        (['--log-dir'], dict(dest='log_dir', metavar='DIR',
                             help=SUPPRESS_HELP)),
        (['--proc-name'], dict(dest='proc_name', help=SUPPRESS_HELP)),
        (['--project-name'], dict(dest='project_name')),
        (['--no-check'],
         dict(dest='no_check', action='store_true', help=SUPPRESS_HELP)),
        (['-g', '--genome'], dict(dest='genome')),
        (['--email'], dict(dest='email', help=SUPPRESS_HELP)),
        (['--done-marker'], dict(dest='done_marker', help=SUPPRESS_HELP)),
    ]

    parser = OptionParser(description=description)
    for args, kwargs in options:
        parser.add_option(*args, **kwargs)

    req_keys_usage = ''
    if required_keys:
        req_keys_usage = '\nRequired options:'
    for args, kwargs in options:
        try:
            if kwargs['dest'] in required_keys:
                req_keys_usage += '\n  ' + '/'.join(args)
        except:
            err(format_exc())
            pass
    parser.set_usage(parser.get_usage() + req_keys_usage)

    (opts, args) = parser.parse_args()
    logger.is_debug = opts.debug

    run_cnf = determine_run_cnf(opts, is_wgs=not opts.__dict__.get('bed'))
    cnf = Config(opts.__dict__, determine_sys_cnf(opts), run_cnf)

    errors = check_keys_presence(cnf, required_keys)
    if errors:
        parser.print_help()
        critical(errors)
    file_keys = [k for k in file_keys if k in required_keys]
    dir_keys = [k for k in dir_keys if k in required_keys]
    errors = check_dirs_and_files(cnf, file_keys, dir_keys)
    if errors:
        critical(errors)

    if cnf.sample:
        cnf.sample = remove_quotes(cnf.sample)
    else:
        if not fpath_for_sample_name:
            if not key_for_sample_name:
                critical('Error: --sample must be provided in options.')

            fpath_for_sample_name = cnf[key_for_sample_name]
            if not fpath_for_sample_name:
                critical('Error: --sample or ' + (str(key_for_sample_name)) +
                         ' must be provided in options.')

            key_fname = basename(cnf[key_for_sample_name])
            cnf.sample = key_fname.split('.')[0]

    if cnf.caller:
        cnf.caller = remove_quotes(cnf.caller)
    # elif key_for_sample_name and cnf[key_for_sample_name]:
    #     key_fname = basename(cnf[key_for_sample_name])
    #     try:
    #         cnf.caller = cnf.caller or key_fname.split('.')[0].split('-')[1]
    #     except:
    #         cnf.caller = ''
    else:
        cnf.caller = None

    cnf.proc_name = cnf.proc_name or proc_name
    set_up_dirs(cnf)
    info(' '.join(sys.argv))
    info()

    return cnf
Beispiel #4
0
def proc_args(argv):
    info(' '.join(sys.argv))
    info()

    description = 'This script generates target QC reports for each BAM provided as an input.'
    parser = OptionParser(description=description)
    add_cnf_t_reuse_prjname_donemarker_workdir_genome_debug(parser)
    parser.add_option('--log-dir', dest='log_dir')
    parser.add_option('--is-wgs',
                      dest='is_wgs',
                      action='store_true',
                      default=False,
                      help='whole genome sequencing')
    parser.add_option('--is-deep-seq',
                      dest='is_deep_seq',
                      action='store_true',
                      default=False,
                      help='deep targeted sequencing')
    parser.add_option('--only-summary',
                      dest='only_summary',
                      action='store_true')
    parser.add_option('-o',
                      dest='output_dir',
                      metavar='DIR',
                      default=join(os.getcwd(), 'targetqc'))
    parser.add_option('-c', '--caller', dest='caller')
    parser.add_option('--qc', dest='qc', action='store_true', default=False)
    parser.add_option('--no-qc',
                      dest='qc',
                      action='store_false',
                      default=False)
    parser.add_option('--qc-caption', dest='qc_caption', help=SUPPRESS_HELP)
    parser.add_option('--no-tsv',
                      dest='tsv',
                      action='store_false',
                      default=True,
                      help=SUPPRESS_HELP)

    (opts, args) = parser.parse_args()
    logger.is_debug = opts.debug

    if len(args) == 0:
        critical('No vcf files provided to input.')

    run_cnf = determine_run_cnf(opts,
                                is_targetseq=opts.is_deep_seq,
                                is_wgs=opts.is_wgs)
    cnf = Config(opts.__dict__, determine_sys_cnf(opts), run_cnf)

    vcf_fpath_by_sample = read_samples(args, cnf.caller)
    info()

    if not cnf.project_name:
        cnf.project_name = basename(cnf.output_dir)
    info('Project name: ' + cnf.project_name)

    cnf.proc_name = 'Variants'
    set_up_dirs(cnf)
    # cnf.name = 'TargQC_' + cnf.project_name
    info(' '.join(sys.argv))

    samples = [
        source.VarSample(s_name, join(cnf.output_dir, s_name), vcf=vcf_fpath)
        for s_name, vcf_fpath in vcf_fpath_by_sample.items()
    ]
    samples.sort(key=lambda _s: _s.key_to_sort())

    check_genome_resources(cnf)

    if not cnf.only_summary:
        cnf.qsub_runner = adjust_system_path(cnf.qsub_runner)
        if not cnf.qsub_runner:
            critical('Error: qsub-runner is not provided is sys-config.')
        verify_file(cnf.qsub_runner, is_critical=True)

    return cnf, samples