def main():
    cnf = read_opts_and_cnfs(
        description='Plotting Seq2C results.',
        extra_opts=[
            (['--seq2c-results'], dict(
                dest='seq2c_tsv_fpath')
             ),
            (['--key-genes'], dict(
                dest='key_genes_fpath')
             ),
        ],
        required_keys=['seq2c_tsv_fpath', 'output_dir'],
        file_keys=['seq2c_tsv_fpath', 'key_genes'],
        key_for_sample_name=None,
    )
    check_system_resources(cnf)
    check_genome_resources(cnf)

    key_gene_names = None
    if cnf.key_genes_fpath:
        with open(cnf.key_genes_fpath) as f:
            key_gene_names = set([l.strip() for l in f.readlines() if l.strip() != ''])

    plot_fpath = draw_seq2c_plot(cnf, cnf.seq2c_tsv_fpath, cnf.sample, cnf.output_dir, key_gene_names)
    if plot_fpath:
        info('Saved plot to ' + plot_fpath)
Ejemplo n.º 2
0
def main(args):
    cnf = read_opts_and_cnfs(
        extra_opts=[
            (['--vcf', '--var'], dict(
                dest='vcf',
                help='variants to annotate')
             ),
            (['--bam'], dict(
                dest='bam',
                help='(outdated) used to generate some annotations by GATK')
             ),
            (['--match-normal-sample-name'], dict(
                dest='match_normal_normal_name')
             ),
            (['--clinical-reporting'], dict(
                dest='clinical_reporting',
                help='used to generate some annotations by GATK',
                action='store_true',
                default=None)
             ),
            (['--qc'], dict(
                dest='qc',
                action='store_true',
                default=True,
                help=SUPPRESS_HELP)
             ),
            (['--no-qc'], dict(
                dest='qc',
                action='store_false',
                help=SUPPRESS_HELP)
             ),
        ],
        required_keys=['vcf'],
        file_keys=['bam', 'vcf'],
        key_for_sample_name='vcf',
        proc_name=source.varannotate_name)

    check_system_resources(cnf,
        required=['java', 'perl', 'snpeff'],
        optional=['transcripts_fpath'])

    check_genome_resources(cnf)

    # info('Using variants ' + cnf['vcf'])
    # info('Using alignement ' + cnf['bam'])

    run_one(cnf, process_one, finalize_one)

    if not cnf['keep_intermediate']:
        shutil.rmtree(cnf['work_dir'])
Ejemplo n.º 3
0
def main():
    cnf = read_opts_and_cnfs(
        extra_opts=[
            (['--targqc-dir'], dict(dest='targqc_dirpath', )),
            (['--mutations'], dict(dest='mutations_fpath', )),
            (['--sv'], dict(dest='sv_fpath', )),
            (['--sv-vcf'], dict(dest='sv_vcf_fpath', )),
            (['--varqc'], dict(dest='varqc_json_fpath', )),
            (['--varqc-after'], dict(dest='varqc_after_json_fpath', )),
            (['--target-type'], dict(
                dest='target_type',
                default='panel',
            )),
            (['--bed'], dict(dest='bed_fpath', )),
            (['--seq2c'], dict(dest='seq2c_tsv_fpath', )),
            (['--project-level-report'], dict(dest='project_report_path', )),
            (['--targqc-html'], dict(dest='targqc_report_path', )),
            (['--match'], dict(dest='match_sample_name', )),
            (['--jira'], dict(dest='jira_url', )),
        ],
        key_for_sample_name=None,
        required_keys=[],
        file_keys=[
            'mutations_fpath',
            'varqc_json_fpath',
            'varqc_after_json_fpath',
            'bed_fpath',
            'seq2c_tsv_fpath',
            'sv_fpath',
            'sv_vcf_fpath',
            #'project_report_path',  # DO NOT UNCOMMENT! Project level report might not yet exist
        ],
        # do not check mutations_fpath! could be either of:
        #   vardict.PASS.txt,
        #   vardict-java.PASS.txt,
        #   vardict.single.PASS.txt,
        #   vardict.paired.PASS.txt,
        dir_keys=['targqc_dirpath'],
    )

    check_genome_resources(cnf)
    check_system_resources(cnf, required=['bedtools'], optional=[])

    clin_info = clinical_sample_info_from_cnf(cnf)
    html_fpath = make_clinical_report(cnf, clin_info,
                                      clin_info.sample.clinical_html)
    info('Clinical report: ' + html_fpath)

    if not cnf['keep_intermediate']:
        shutil.rmtree(cnf['work_dir'])
Ejemplo n.º 4
0
def main():
    cnf = read_opts_and_cnfs(extra_opts=[
        (['--bam'], dict(dest='bam', help='path to the BAM file')),
        (['--bed', '--capture',
          '--amplicons'], dict(dest='bed', help='capture panel/amplicons')),
        (['--pcr'],
         dict(
             dest='pcr',
             action='store_true',
             help='deduplication was not perfomed, thus do not try to dedup')),
    ],
                             required_keys=['bam'],
                             file_keys=['bam', 'bed'],
                             key_for_sample_name='bam',
                             proc_name=BCBioStructure.qualimap_name)

    index_bam(cnf, cnf.bam)
    info('Using alignment ' + cnf.bam)

    bed = ''
    if cnf.bed:
        bed = ' -gff ' + cnf.bed + ' '
        info('Using amplicons/capture panel ' + cnf.bed)

    qualimap = get_system_path(cnf, 'qualimap', is_critical=True)
    if not qualimap:
        critical('Cannot find qualimap')

    info()

    mem_cmdl = ''
    mem_m = get_qualimap_max_mem(cnf.bam)
    mem = str(int(mem_m)) + 'M'
    mem_cmdl = ' --java-mem-size=' + mem

    cmdline = (
        '{qualimap} bamqc --skip-duplicated -nt ' + str(cnf.threads) +
        mem_cmdl + ' -nr 5000 '
        '-bam {cnf.bam} -outdir {cnf.output_dir} {bed} -c -gd HUMAN').format(
            **locals())
    report_fpath = join(cnf.output_dir, 'qualimapReport.html')

    call(cnf,
         cmdline,
         output_fpath=report_fpath,
         stdout_to_outputfile=False,
         env_vars=dict(DISPLAY=None))

    info('Qualimap report: ' + str(report_fpath))
Ejemplo n.º 5
0
def proc_args(argv):
    cnf = read_opts_and_cnfs(
        extra_opts=[
            (['--bam'], dict(dest='bam', )),
        ],
        required_keys=['bam'],
        file_keys=['bam'],
    )

    check_genome_resources(cnf)

    if not cnf.bam:
        critical('No bam file provided to input')
    if not cnf.genome:
        critical('Please, specify the --genome option (e.g. --genome hg19)')

    return cnf
Ejemplo n.º 6
0
def main(args):
    cnf = read_opts_and_cnfs(
        extra_opts=[
            (['--var', '--vcf'], dict(
                dest='vcf',
                help='variants to evaluate')
             ),
        ],
        required_keys=['vcf'],
        file_keys=['vcf'],
        key_for_sample_name='vcf',
        proc_name=source.varqc_name,
    )

    check_system_resources(cnf)

    check_genome_resources(cnf)

    info('Using variants ' + cnf['vcf'])

    run_one(cnf, process_one, finalize_one)

    if not cnf['keep_intermediate']:
        shutil.rmtree(cnf['work_dir'])
Ejemplo n.º 7
0
def main(args):
    cnf = read_opts_and_cnfs(extra_opts=[
        (['--bam'], dict(dest='bam', help='a path to the BAM file to study')),
        (['-1'], dict(dest='l_fpath')), (['-2'], dict(dest='r_fpath')),
        (['--bed', '--capture', '--amplicons'],
         dict(dest='bed', help='a BED file for capture panel or amplicons')),
        (['--exons', '--exome', '--features'],
         dict(
             dest='features',
             help=
             'a BED file with real CDS/Exon/Gene/Transcript regions with annotations (default "features" is in system_config)'
         )),
        (['--exons-no-genes', '--features-no-genes'],
         dict(
             dest='features_no_genes',
             help=
             'a BED file with real CDS/Exon regions with annotations, w/o Gene/Transcript records (default "features" is in system_config)'
         )),
        (['--original-bed'],
         dict(dest='original_target_bed', help=SUPPRESS_HELP)),
        (['--original-exons', '--original-features'],
         dict(
             dest='original_features_bed',
             help='original features genes bed file path (just for reporting)')
         ),
        (['--reannotate'],
         dict(dest='reannotate',
              help='re-annotate BED file with gene names',
              action='store_true',
              default=False)),
        (['--no-prep-bed'],
         dict(dest='prep_bed',
              help='do not fix input beds and exons',
              action='store_false',
              default=True)),
        (['-e', '--extended'],
         dict(dest='extended',
              help='extended - flagged regions and missed variants',
              action='store_true',
              default=False)),
        (['--genes'], dict(dest='genes', help='custom list of genes')),
        (['--padding'],
         dict(
             dest='padding',
             help=
             'integer indicating the number of bases to extend each target region up and down-stream. '
             'Default is ' + str(defaults['coverage_reports']['padding']),
             type='int')),
        (['--no-dedup'],
         dict(dest='no_dedup', action='store_true', help=SUPPRESS_HELP)),
        (['--downsample-to'],
         dict(dest='downsample_to', type='int', help=SUPPRESS_HELP)),
        (['--downsampled'],
         dict(dest='downsampled', action='store_true', help=SUPPRESS_HELP)),
        (['--fastqc-dirpath'], dict(dest='fastqc_dirpath', help=SUPPRESS_HELP))
    ],
                             file_keys=['bam', 'l_fpath', 'r_fpath', 'bed'],
                             key_for_sample_name='bam')

    if cnf.padding:
        cnf.coverage_reports.padding = cnf.padding

    check_system_resources(cnf, required=['bedtools'], optional=[])

    check_genome_resources(cnf)

    features_bed = adjust_path(cnf.features) if cnf.features else adjust_path(
        cnf.genome.features)
    if features_bed:
        info('Features: ' + features_bed)
        features_bed = verify_file(features_bed)
    else:
        info('No features BED found')

    if cnf.bed:
        cnf.bed = verify_file(cnf.bed, is_critical=True)
        info('Using amplicons/capture panel ' + cnf.bed)
    elif features_bed:
        info('WGS, taking CDS as target')

    cnf.bam = verify_bam(cnf.bam, is_critical=True)

    reports = process_one(cnf,
                          cnf.output_dir,
                          cnf.bam,
                          features_bed=features_bed,
                          features_no_genes_bed=cnf.features_no_genes)
    summary_report, gene_report = reports[:2]

    info('')
    info('*' * 70)
    if summary_report.txt_fpath:
        info('Summary report: ' + summary_report.txt_fpath)
    if gene_report:
        if gene_report.txt_fpath:
            info('All regions: ' + gene_report.txt_fpath + ' (' +
                 str(len(gene_report.rows)) + ' regions)')

    if len(reports) > 2:
        selected_regions_report = reports[2]
        if selected_regions_report.txt_fpath:
            info('Flagged regions: ' + selected_regions_report.txt_fpath +
                 ' (' + str(len(selected_regions_report.rows)) + ' regions)')

    for fpaths in reports:
        if fpaths:
            ok = True
            info('Checking expected results...')
            if not isinstance(fpaths, list):
                fpaths = [fpaths]
            for fpath in fpaths:
                if isinstance(fpath, basestring):
                    if not verify_file(fpath):
                        ok = False
            if ok:
                info('The results are good.')

    if not cnf['keep_intermediate']:
        shutil.rmtree(cnf['work_dir'])
Ejemplo n.º 8
0
def main():
    if len(sys.argv[1]) < 0:
        critical('Usage: ' + __file__ +
                 ' Input_BED_file -g hg19 -o Annotated_BED_file')
    input_bed_fpath = verify_bed(sys.argv[1],
                                 is_critical=True,
                                 description='Input BED file for ' + __file__)

    cnf = read_opts_and_cnfs(
        description=
        'Annotating BED file based on reference features annotations.',
        extra_opts=[
            (['--reference'], dict(dest='reference')),
        ],
        required_keys=['output_file'],
        file_keys=['reference'],
        key_for_sample_name=None,
        fpath_for_sample_name=input_bed_fpath,
        main_output_is_file=True)
    check_system_resources(cnf)
    check_genome_resources(cnf)

    chr_order = get_chrom_order(cnf)

    features_fpath = adjust_path(cnf.genome.bed_annotation_features)
    if not verify_bed(features_fpath, 'Annotated reference BED file'):
        critical('Annotated reference is required')

    # features_and_beds = _split_reference_by_priority(cnf, features_fpath)

    bed = BedTool(input_bed_fpath).cut([0, 1, 2])

    info()

    annotated = None
    off_targets = None

    for feature in ['CDS', 'Exon', 'Transcript', 'Gene']:
        if bed:
            info('Extracting ' + feature + ' features from ' + features_fpath)
            features_bed = BedTool(features_fpath).filter(
                lambda x: x[6] == feature)

            info('Annotating based on ' + feature)
            new_annotated, off_targets = _annotate(cnf, bed, features_bed,
                                                   chr_order)
            if not annotated:
                annotated = new_annotated
                for a in annotated:
                    a.feature = feature
            else:
                annotated.extend(new_annotated)

            if off_targets:
                bed = BedTool([(r.chrom, r.start, r.end) for r in off_targets])

                # off_target_fpath = _save_regions(off_targets, join(work_dirpath, 'off_target_1.bed'))
                # log('Saved off target1 to ' + str(off_target_fpath))
                info()

    if annotated is not None and off_targets is not None:
        annotated.extend(off_targets)

    info()
    info('Saving annotated regions to ' + str(cnf.output_file))
    with open(cnf.output_file, 'w') as out:
        for region in sorted(annotated, key=lambda r: r.get_key()):
            out.write(str(region))

        # for r, overlap_size in overlaps:
        #     sys.stdout.write('\t' + '\t'.join([
        #         r.chrom, '{:,}'.format(r.start), '{:,}'.format(r.end), r.gene, r.exon, str(r.strand), r.feature, r.biotype,
        #         str(overlap_size),
        #         '{:.2f}%'.format(100.0 * overlap_size / (r.end - r.start))
        #     ]))
        # sys.stdout.write('\n')
    info('Done.')
Ejemplo n.º 9
0
def main(args):
    cnf = read_opts_and_cnfs(
        extra_opts=[
            (['--vcf', '--var'], dict(
                dest='vcf',
                help='variants to filter')
             ),
            (['--vcf2txt'], dict(
                dest='vcf2txt',
                help='variants in vcf2txt to filter')
             ),
            (['--cohort-freqs'], dict(
                dest='cohort_freqs_fpath',
                help='frequencies of variants in a cohort')
             ),
            (['--qc'], dict(
                dest='qc',
                action='store_true',
                default=True,
                help=SUPPRESS_HELP)
             ),
            (['--no-qc'], dict(
                dest='qc',
                action='store_false',
                help=SUPPRESS_HELP)
             ),
            (['--no-tsv'], dict(
                dest='tsv',
                action='store_false',
                default=True,
                help=SUPPRESS_HELP)
             ),
        ],
        required_keys=['vcf'],
        file_keys=['vcf'],
        key_for_sample_name='vcf',
        proc_name=source.varfilter_name + '_post')

    check_system_resources(cnf, required=['perl'])
    check_genome_resources(cnf)

    if not cnf.output_file:
        cnf.output_file = join(cnf.output_dir, (cnf.caller or 'variants') + '.txt')

    safe_mkdir(dirname(cnf.output_file))
    safe_mkdir(cnf.output_dir)

    if cnf.vcf.endswith('.vcf.gz') or cnf.vcf.endswith('.vcf'):
        verify_vcf(cnf.vcf, is_critical=True)

    if not cnf.vcf2txt:
        vcf2txt_res_fpath = run_vcf2txt(cnf, {cnf.sample: cnf.vcf}, cnf.output_file)
        if not vcf2txt_res_fpath:
            critical('vcf2txt run returned non-0')
        info('Saved vcf2txt output to ' + vcf2txt_res_fpath)
    else:
        cnf.vcf2txt = verify_file(cnf.vcf2txt, is_critical=True)
        info('Input is vcf2txt output, grepping by sample name ' + cnf.sample)
        vcf2txt_res_fpath = cnf.output_file
        with file_transaction(cnf.work_dir, vcf2txt_res_fpath) as tx:
            with open(cnf.vcf2txt) as f, open(tx, 'w') as out:
                for i, l in enumerate(f):
                    if l.strip():
                        if i == 0:
                            out.write(l)
                        else:
                            if l.split('\t')[0] == cnf.sample:
                                out.write(l)
        info('Using vcf2txt from ' + vcf2txt_res_fpath)

    # if is_local():
    #     vardict2mut_pl = get_script_cmdline(cnf, 'perl', join('VarDict', 'vardict2mut.pl'))
    #     info('Running vardict2mut perl')
    #     res = run_vardict2mut(cnf, vcf2txt_res_fpath,
    #         add_suffix(vcf2txt_res_fpath, source.mut_pass_suffix + '_perl'),
    #         vardict2mut_executable=vardict2mut_pl)
    #     if not res:
    #         critical('vardict2mut.pl run returned non-0')

    mut_fpath = run_vardict2mut(cnf, vcf2txt_res_fpath, add_suffix(vcf2txt_res_fpath, variant_filtering.mut_pass_suffix))
    if not mut_fpath:
        err('vardict2mut failed')
    else:
        info('Saved passed mutations to ' + mut_fpath)

        var_s = source.VarSample(cnf.sample, cnf.output_dir)
        var_s.anno_vcf_fpath = cnf.vcf
        var_s.varfilter_dirpath = var_s.dirpath

        ungz_anno_vcf_fpath = var_s.anno_vcf_fpath if not var_s.anno_vcf_fpath.endswith('.gz') else splitext(var_s.anno_vcf_fpath)[0]
        ungz_filt_vcf_fpath = join(cnf.output_dir, add_suffix(basename(ungz_anno_vcf_fpath), 'filt'))
        var_s.filt_vcf_fpath = ungz_filt_vcf_fpath + '.gz'

        var_s.variants_fpath = vcf2txt_res_fpath
        var_s.variants_pass_fpath = add_suffix(vcf2txt_res_fpath, source.mut_pass_suffix)

        ungz_pass_filt_vcf_fpath = add_suffix(ungz_filt_vcf_fpath, 'pass')
        var_s.pass_filt_vcf_fpath = add_suffix(var_s.filt_vcf_fpath, 'pass')

        filt_vcf = write_vcf(cnf, var_s, cnf.output_dir, cnf.caller, vcf2txt_res_fpath, mut_fpath)
        index_vcf(cnf, var_s.name, filt_vcf, cnf.caller)
        index_vcf(cnf, var_s.name, ungz_pass_filt_vcf_fpath, cnf.caller)

        if cnf.qc:
            report = qc.make_report(cnf, var_s.pass_filt_vcf_fpath, var_s)
            qc_dirpath = join(cnf.output_dir, 'qc')
            safe_mkdir(qc_dirpath)
            qc.save_report(cnf, report, var_s, cnf.caller, qc_dirpath, source.varqc_after_name)
            info('Saved QC to ' + qc_dirpath + ' (' + report.html_fpath + ')')
            info('-' * 70)
            info()

        if not cnf['keep_intermediate']:
            shutil.rmtree(cnf['work_dir'])

        info()
        info('*' * 70)
        info('Done filtering ' + var_s.name)