def haplotype_caller(core_req=16, mem_req=12 * 1024, in_bams=find('bam$', n='>0'), in_bais=find('bai$', n='>0'), in_target_bed=find('target.bed'), out_vcf=out_dir('raw_variants.g.vcf')): in_bams = bam_list_to_inputs(in_bams) intervals = arg('--intervals', in_target_bed) return r""" {gatk} \ -T HaplotypeCaller \ -R {s[ref][reference_fasta]} \ -D {s[ref][dbsnp_vcf]} \ -nct {core_req} \ --emitRefConfidence GVCF \ -stand_call_conf 30 \ -stand_emit_conf 10 \ -I {in_bams} \ -o {out_vcf} \ {intervals} \ -A Coverage \ -A GCContent \ -A AlleleBalanceBySample \ -A AlleleBalance \ -A MappingQualityRankSumTest \ -A InbreedingCoeff \ -A FisherStrand \ -A QualByDepth """.format(s=s, gatk=gatk(mem_req), **locals())
def indel_realigner(core_req=4, # proxy for mem_req until i test mem_req out mem_req=8 * 1024, contig=None, in_bams=find('bam$', n='>0'), in_bais=find('bai$', n='>0'), in_sites=find('denovo_realign_targets.bed'), out_bam=out_dir('realigned.bam'), out_bai=out_dir('realigned.bai')): in_bams = bam_list_to_inputs(in_bams) if s['ref']['version'] == 'b37': in_knowns = s['ref']['1kg_indel_vcf'], s['ref']['mills_vcf'] elif s['ref']['version'] == 'hg38': in_knowns = [s['ref']['mills_and_1kg_indel_vcf']] return r""" # IR does not support parallelization {gatk} \ -T IndelRealigner \ -R {s[ref][reference_fasta]} \ -I {in_bams} \ -o {out_bam} \ -targetIntervals {in_sites} \ {knowns} \ -model USE_READS \ --filter_bases_not_stored \ {intervals} {s[opt][samtools]} index {out_bam} """.format(s=s, intervals=arg('--intervals', contig), gatk=gatk(mem_req), knowns=' '.join('-known %s' % p for p in in_knowns), **locals())
def realigner_target_creator(core_req=8, mem_req=8 * 1024, in_target_bed=find('target.bed'), in_bams=find('bam$', n='>0'), in_bais=find('bai$', n='>0'), out_bams=forward('in_bams'), out_bais=forward('in_bais'), out_sites=out_dir('denovo_realign_targets.bed')): in_bams = bam_list_to_inputs(in_bams) if s['ref']['version'] == 'b37': in_knowns = s['ref']['1kg_indel_vcf'], s['ref']['mills_vcf'] elif s['ref']['version'] == 'hg38': in_knowns = [s['ref']['mills_and_1kg_indel_vcf']] # TODO should we pad intervals? might be indels on perimeter that need realigner. Not too worried because we're using HaplotypeCaller, though. return r""" #could add more knowns from ESP and other seq projects... {gatk} \ -T RealignerTargetCreator \ -R {s[ref][reference_fasta]} \ -I {in_bams} \ -o {out_sites} \ {knowns} \ -nt {core_req} \ {args} """.format(s=s, gatk=gatk(mem_req), args=arg('--intervals', in_target_bed), knowns=' '.join('-known %s' % p for p in in_knowns), **locals())
def haplotype_caller(core_req=16, mem_req=29 * 1024, in_bams=find('bam$', n='>0'), in_bais=find('bai$', n='>0'), in_target_bed=find('target.bed'), out_vcf=out_dir('raw_variants.g.vcf')): in_bams = bam_list_to_inputs(in_bams) intervals = arg('--intervals', in_target_bed) return r""" {gatk} \ -T HaplotypeCaller \ -R {s[ref][reference_fasta]} \ -D {s[ref][dbsnp_vcf]} \ -nct {core_req} \ --emitRefConfidence GVCF \ -I {in_bams} \ -o {out_vcf} \ {intervals} \ -A Coverage \ -A GCContent \ -A AlleleBalanceBySample \ -A AlleleBalance \ -A MappingQualityRankSumTest \ -A InbreedingCoeff \ -A FisherStrand \ -A QualByDepth """.format(s=s, gatk=gatk(mem_req), **locals())