Beispiel #1
0
def haplotype_caller(core_req=16,
                     mem_req=12 * 1024,
                     in_bams=find('bam$', n='>0'),
                     in_bais=find('bai$', n='>0'),
                     in_target_bed=find('target.bed'),
                     out_vcf=out_dir('raw_variants.g.vcf')):
    in_bams = bam_list_to_inputs(in_bams)
    intervals = arg('--intervals', in_target_bed)
    return r"""
        {gatk} \
        -T HaplotypeCaller \
        -R {s[ref][reference_fasta]} \
        -D {s[ref][dbsnp_vcf]} \
        -nct {core_req} \
        --emitRefConfidence GVCF \
        -stand_call_conf 30 \
        -stand_emit_conf 10 \
        -I {in_bams} \        -o {out_vcf} \
        {intervals} \
        -A Coverage \
        -A GCContent \
        -A AlleleBalanceBySample \
        -A AlleleBalance \
        -A MappingQualityRankSumTest \
        -A InbreedingCoeff \
        -A FisherStrand \
        -A QualByDepth
    """.format(s=s, gatk=gatk(mem_req), **locals())
Beispiel #2
0
def indel_realigner(core_req=4,  # proxy for mem_req until i test mem_req out
                    mem_req=8 * 1024,
                    contig=None,
                    in_bams=find('bam$', n='>0'),
                    in_bais=find('bai$', n='>0'),
                    in_sites=find('denovo_realign_targets.bed'),
                    out_bam=out_dir('realigned.bam'),
                    out_bai=out_dir('realigned.bai')):
    in_bams = bam_list_to_inputs(in_bams)

    if s['ref']['version'] == 'b37':
        in_knowns = s['ref']['1kg_indel_vcf'], s['ref']['mills_vcf']
    elif s['ref']['version'] == 'hg38':
        in_knowns = [s['ref']['mills_and_1kg_indel_vcf']]

    return r"""
        # IR does not support parallelization
        {gatk} \
        -T IndelRealigner \
        -R {s[ref][reference_fasta]} \
        -I {in_bams} \
        -o {out_bam} \
        -targetIntervals {in_sites} \
        {knowns} \
        -model USE_READS \
        --filter_bases_not_stored \
        {intervals}

        {s[opt][samtools]} index {out_bam}
    """.format(s=s,
               intervals=arg('--intervals', contig),
               gatk=gatk(mem_req),
               knowns=' '.join('-known %s' % p for p in in_knowns),
               **locals())
Beispiel #3
0
def realigner_target_creator(core_req=8,
                             mem_req=8 * 1024,
                             in_target_bed=find('target.bed'),
                             in_bams=find('bam$', n='>0'),
                             in_bais=find('bai$', n='>0'),
                             out_bams=forward('in_bams'),
                             out_bais=forward('in_bais'),
                             out_sites=out_dir('denovo_realign_targets.bed')):
    in_bams = bam_list_to_inputs(in_bams)

    if s['ref']['version'] == 'b37':
        in_knowns = s['ref']['1kg_indel_vcf'], s['ref']['mills_vcf']
    elif s['ref']['version'] == 'hg38':
        in_knowns = [s['ref']['mills_and_1kg_indel_vcf']]

    # TODO should we pad intervals?  might be indels on perimeter that need realigner.  Not too worried because we're using HaplotypeCaller, though.
    return r"""
        #could add more knowns from ESP and other seq projects...
        {gatk} \
        -T RealignerTargetCreator \
        -R {s[ref][reference_fasta]} \
        -I {in_bams} \
        -o {out_sites} \
        {knowns} \
        -nt {core_req} \
        {args}
    """.format(s=s, gatk=gatk(mem_req),
               args=arg('--intervals', in_target_bed),
               knowns=' '.join('-known %s' % p for p in in_knowns),
               **locals())
Beispiel #4
0
def haplotype_caller(core_req=16,
                     mem_req=29 * 1024,
                     in_bams=find('bam$', n='>0'),
                     in_bais=find('bai$', n='>0'),
                     in_target_bed=find('target.bed'),
                     out_vcf=out_dir('raw_variants.g.vcf')):
    in_bams = bam_list_to_inputs(in_bams)
    intervals = arg('--intervals', in_target_bed)
    return r"""
        {gatk} \
        -T HaplotypeCaller \
        -R {s[ref][reference_fasta]} \
        -D {s[ref][dbsnp_vcf]} \
        -nct {core_req} \
        --emitRefConfidence GVCF \
        -I {in_bams} \
        -o {out_vcf} \
        {intervals} \
        -A Coverage \
        -A GCContent \
        -A AlleleBalanceBySample \
        -A AlleleBalance \
        -A MappingQualityRankSumTest \
        -A InbreedingCoeff \
        -A FisherStrand \
        -A QualByDepth
    """.format(s=s, gatk=gatk(mem_req), **locals())
Beispiel #5
0
def indel_realigner(core_req=4,  # proxy for mem_req until i test mem_req out
                    mem_req=8 * 1024,
                    contig=None,
                    in_bams=find('bam$', n='>0'),
                    in_bais=find('bai$', n='>0'),
                    in_sites=find('denovo_realign_targets.bed'),
                    out_bam=out_dir('realigned.bam'),
                    out_bai=out_dir('realigned.bai')):
    in_bams = bam_list_to_inputs(in_bams)

    if s['ref']['version'] == 'b37':
        in_knowns = s['ref']['1kg_indel_vcf'], s['ref']['mills_vcf']
    elif s['ref']['version'] == 'hg38':
        in_knowns = [s['ref']['mills_and_1kg_indel_vcf']]

    return r"""
        # IR does not support parallelization
        {gatk} \
        -T IndelRealigner \
        -R {s[ref][reference_fasta]} \
        -I {in_bams} \
        -o {out_bam} \
        -targetIntervals {in_sites} \
        {knowns} \
        -model USE_READS \
        --filter_bases_not_stored \
        {intervals}

        {s[opt][samtools]} index {out_bam}
    """.format(s=s,
               intervals=arg('--intervals', contig),
               gatk=gatk(mem_req),
               knowns=' '.join('-known %s' % p for p in in_knowns),
               **locals())
Beispiel #6
0
def realigner_target_creator(core_req=8,
                             mem_req=8 * 1024,
                             in_target_bed=find('target.bed'),
                             in_bams=find('bam$', n='>0'),
                             in_bais=find('bai$', n='>0'),
                             out_bams=forward('in_bams'),
                             out_bais=forward('in_bais'),
                             out_sites=out_dir('denovo_realign_targets.bed')):
    in_bams = bam_list_to_inputs(in_bams)

    if s['ref']['version'] == 'b37':
        in_knowns = s['ref']['1kg_indel_vcf'], s['ref']['mills_vcf']
    elif s['ref']['version'] == 'hg38':
        in_knowns = [s['ref']['mills_and_1kg_indel_vcf']]

    # TODO should we pad intervals?  might be indels on perimeter that need realigner.  Not too worried because we're using HaplotypeCaller, though.
    return r"""
        #could add more knowns from ESP and other seq projects...
        {gatk} \
        -T RealignerTargetCreator \
        -R {s[ref][reference_fasta]} \
        -I {in_bams} \
        -o {out_sites} \
        {knowns} \
        -nt {core_req} \
        {args}
    """.format(s=s, gatk=gatk(mem_req),
               args=arg('--intervals', in_target_bed),
               knowns=' '.join('-known %s' % p for p in in_knowns),
               **locals())