def main():
    info(' '.join(sys.argv))
    info()
    cnf, bcbio_structure = bcbio_summary_script_proc_params(
            'expression', BCBioStructure.expression_dir)

    step_greetings('Gene expression heatmaps summary for all samples')
    report_caption_names = ['Gene counts', 'Exon counts', 'Gene TPM', 'Isoform TPM']
    genes_dict, transcripts_dict = _get_gene_transcripts_id(cnf)
    for counts_fname, report_caption_name in zip(bcbio_structure.counts_names, report_caption_names):
        counts_fpath = join(bcbio_structure.expression_dirpath, counts_fname)
        if not verify_file(counts_fpath, silent=True):
            raw_counts_fpath = join(bcbio_structure.expression_dirpath, 'raw', 'combined.' + counts_fname.replace('.tsv', ''))
            info('Annotating ' + report_caption_name + ' from ' + raw_counts_fpath)
            annotate_gene_counts(cnf, raw_counts_fpath, counts_fpath, genes_dict)
        verify_file(counts_fpath, is_critical=True, description=counts_fname)

        isoforms_found = counts_fname == 'isoform.sf.tpm' and counts_fpath
        used_dict = transcripts_dict if isoforms_found else genes_dict
        report_fpath = join(safe_mkdir(join(bcbio_structure.expression_dirpath, 'html')),
                            counts_fname.replace('.tsv', '') + '.html')

        make_gene_expression_heatmaps(cnf, bcbio_structure, counts_fpath, used_dict, report_fpath,
                                      report_caption_name, keep_gene_names=isoforms_found)
    info('Done')
def main():
    info(' '.join(sys.argv))
    info()

    cnf, bcbio_structure = bcbio_summary_script_proc_params(
        BCBioStructure.targqc_name,
        BCBioStructure.targqc_summary_dir,
        extra_opts=
        [(['--bed', '--capture', '--amplicons'],
          dict(dest='bed',
               help='BED file to run targetSeq and Seq2C analysis on.')),
         (['--exons', '--exome', '--features'],
          dict(
              dest='features',
              help=
              'Annotated CDS/Exons/Gene/Transcript BED file to make targetSeq exon/amplicon regions reports.'
          ))])

    bed_fpath, features_bed_fpath = adjust_path(cnf.bed), adjust_path(
        cnf.features)

    summarize_targqc(cnf,
                     cnf.threads or len(bcbio_structure.samples),
                     cnf.output_dir,
                     bcbio_structure.samples,
                     bed_fpath=bed_fpath,
                     features_fpath=features_bed_fpath)
def proc_args():
    info(' '.join(sys.argv))
    info()

    cnf, bcbio_structure = bcbio_summary_script_proc_params(
        BCBioStructure.seq2c_name,
        extra_opts=[
           (['--bed', '--capture', '--amplicons'], dict(
                dest='bed'
           ))
        ],
    )
    return cnf, bcbio_structure
Exemple #4
0
def main(args):
    cnf, bcbio_structure = bcbio_summary_script_proc_params(
        BCBioStructure.targqc_name,
        BCBioStructure.targqc_summary_dir,
        extra_opts=[
            (['--mutations'], dict(dest='mutations_fpath', )),
            (['--bed', '--capture', '--amplicons'],
             dict(dest='bed',
                  help='a BED file for capture panel or amplicons')),
        ])

    check_system_resources(cnf, required=['bedtools'], optional=[])
    process_all(cnf, bcbio_structure)

    if not cnf['keep_intermediate']:
        shutil.rmtree(cnf['work_dir'])
def main():
    info(' '.join(sys.argv))
    info()

    cnf, bcbio_structure = bcbio_summary_script_proc_params(
        BCBioStructure.seq2c_name,
        BCBioStructure.cnv_summary_dir,
        extra_opts=[
           (['--controls', '-c'], dict(
                dest='controls',
                help='Optional control sample names for Seq2C. For multiple controls, separate them using :',
                default=''
           )),
           (['--seq2c-opts'], dict(
                dest='seq2c_opts',
                help='Options for the final lr2gene.pl script.',
                default=''
           )),
           (['--bed', '--capture', '--amplicons'], dict(
                dest='bed',
                help='BED file to run targetSeq and Seq2C analysis on.')
            ),
           (['--reannotate'], dict(
                dest='reannotate',
                help='re-annotate BED file with gene names',
                action='store_true',
                default=False)
            ),
           (['--wgs'], dict(
                dest='is_wgs',
                action='store_true',
                default=False)
            ),
            (['--no-prep-bed'], dict(
                dest='prep_bed',
                help='do not fix input beds and exons',
                action='store_false',
                default=True)
             ),           # (['--dedup'], dict(
           #      dest='dedup',
           #      help='Remove duplicates from the input bedfile.')
           #  ),
        ],
    )

    run_seq2c_bcbio_structure(cnf, bcbio_structure)
Exemple #6
0
def main():
    info(' '.join(sys.argv))
    info()

    cnf, bcbio_structure = bcbio_summary_script_proc_params(
        BCBioStructure.fastqc_name, BCBioStructure.fastqc_dir)

    step_greetings('FastQC summary for all samples')

    final_summary_report_fpath = join(cnf.output_dir,
                                      source.fastqc_name + '.html')

    write_fastqc_combo_report(cnf, final_summary_report_fpath,
                              bcbio_structure.samples)

    info()
    info('*' * 70)
    info('Fastqc summary:')
    info('  ' + final_summary_report_fpath)
Exemple #7
0
def main():
    info(' '.join(sys.argv))
    info()

    description = '''
        The program filters an annotated VCF file by SnpEff using dbSNP and COSMIC,
        setting the value of the FILTER column.

        A novel variant (non-dbSNP, non-COSMIC) is considered false positive
        if all three conditions (-r -f -n) are met. False positive variants are
        annotated PASS in column FILTER if the conditions are satisfied, or with
        other value otherwise, where the value is ;-separated list of failed criteria.
        '''

    dfts = defaults['variant_filtering']
    extra_opts = [
        (['--caller'],
         dict(
             dest='caller',
             help=
             'Variant caller name to process. If not set, processed all variant callers'
         )),
        (['--wgs'],
         dict(
             dest='is_wgs',
             action='store_true',
             default=False,
             help=
             'Splits vcf2txt runs by samples, thus turns off cohort filtering')
         ),
        (['-b', '--bias'],
         dict(dest='bias',
              action='store_true',
              help='Novel or dbSNP variants with strand bias "2;1" or "2;0" '
              'and AF < 0.3 will be considered as false positive.')),
        (['-M', '--min-mq'],
         dict(
             dest='min_mq',
             type='float',
             help='The filtering mean mapping quality score for variants. '
             'The raw variant will be filtered if the mean mapping quality '
             'score is less then specified. Default %d' % dfts['min_mq'],
         )),
        (['-D', '--filt-depth'],
         dict(
             dest='filt_depth',
             type='int',
             help='The filtering total depth. The raw variant will be filtered '
             'on first place if the total depth is less then [filt_depth]. '
             'Default %s' % str(dfts['filt_depth']),
         )),
        (['-V', '--min-vd'],
         dict(
             dest='min_vd',
             type='int',
             help=
             'The filtering variant depth. Variants with depth < [min_vd] will '
             'be considered false positive. Default is %d (meaning at least %d reads '
             'are needed for a variant)' % (dfts['min_vd'], dfts['min_vd']))),
        (['-m', '--maf'],
         dict(
             dest='maf',
             type='float',
             help='If there is MAF with frequency, it will be considered dbSNP '
             'regardless of COSMIC. Default MAF is %f' % dfts['maf'],
         )),
        (['-r', '--fraction'],
         dict(
             dest='fraction',
             type='float',
             help='When a novel variant is present in more than [fraction] '
             'of samples and mean allele frequency is less than [freq], '
             'it\'s considered as likely false positive. Default %f. '
             'Used with -f and -n' % dfts['fraction'],
         )),
        (['-F', '--ave-freq'],
         dict(
             dest='ave_freq',
             type='float',
             help='When the average allele frequency is also below the [freq], '
             'the variant is considered likely false positive. '
             'Default %f. Used with -r and -n' % dfts['ave_freq'],
         )),
        (['--min-hotspot-freq'],
         dict(
             dest='min_hotspot_freq',
             type='float',
             help=
             'The minimum allele frequency hotspot somatic mutations, typically lower then -f.'
             'Default: 0.01 or half _min_freq_, whichever is less',
         )),
        (['-n'],
         dict(
             dest='sample_cnt',
             type='int',
             help=
             'When the variant is detected in greater or equal [sample_cnt] '
             'samples, the variant is considered likely false positive. '
             'Default %d. Used with -r and -f' % dfts['sample_cnt'],
         )),
        (['-R', '--max-ratio'],
         dict(
             dest='max_ratio',
             type='float',
             help=
             'When a variant is present in more than [fraction] of samples, '
             'and AF < 0.3, it\'s considered as likely false positive, '
             'even if it\'s in COSMIC. Default %f.' % dfts['max_ratio'],
         )),

        # This option moved to add_post_bcbio_args()
        # (['-f', '--min-freq'], dict(
        #     dest='min_freq',
        #     type='float',
        #     help='When individual allele frequency < freq for variants, '
        #          'it was considered likely false poitives. '
        #          'Default %f' % defaults['default_min_freq'],
        # )),
        (['-p'],
         dict(
             dest='min_p_mean',
             type='int',
             help='The minimum mean position in reads for variants.'
             'Default %d bp' % dfts['min_p_mean'],
         )),
        (['-q'],
         dict(
             dest='min_q_mean',
             type='float',
             help='The minimum mean base quality phred score for variant.'
             'Default %d' % dfts['min_q_mean'],
         )),
        (['-P'],
         dict(
             dest='filt_p_mean',
             type='int',
             help='The filtering mean position in reads for variants. '
             'The raw variant will be filtered on first place if the mean '
             'posititon is less then [filt_p_mean]. '
             'Default %s bp' % str(dfts['filt_p_mean']),
         )),
        (['-Q'],
         dict(
             dest='filt_q_mean',
             type='float',
             help='The filtering mean base quality phred score for variants. '
             'The raw variant will be filtered on first place  '
             'if the mean quality is less then [filt_q_mean]. '
             'Default %s' % str(dfts['filt_q_mean']),
         )),
        (['--sn'],
         dict(dest='signal_noise',
              type='int',
              help='Minimal signal/noise value. Default %d' %
              dfts['signal_noise'])),
        (['-u'],
         dict(dest='count_undetermined',
              action='store_false',
              default=True,
              help='Undeteremined won\'t be counted for the sample count.')),
        (['-c', '--control'],
         dict(
             dest='control',
             help=
             'The control sample name. Any novel or COSMIC variants passing all '
             'above filters but also detected in Control sample will be deemed '
             'considered false positive. Use only when there\'s control sample.'
         )),
        (['--datahub-path'],
         dict(
             dest='datahub_path',
             help=
             'DataHub directory path to upload final MAFs and CNV (can be remote).',
         )),
    ]

    cnf, bcbio_structure = bcbio_summary_script_proc_params(
        BCBioStructure.varfilter_name,
        description=description,
        extra_opts=extra_opts)

    info('*' * 70)
    info()

    filter_bcbio_structure(cnf, bcbio_structure)