def GATKpreprocessing(infile, outfile): '''Reorders BAM according to reference fasta and add read groups using SAMtools, realigns around indels and recalibrates base quality scores using GATK''' to_cluster = USECLUSTER track = P.snip(os.path.basename(infile), ".bam") tmpdir_gatk = P.getTempDir() job_memory = PARAMS["gatk_memory"] genome = "%s/%s.fa" % (PARAMS["bwa_index_dir"], PARAMS["genome"]) outfile1 = outfile.replace(".bqsr", ".readgroups.bqsr") outfile2 = outfile.replace(".bqsr", ".realign.bqsr") PipelineExome.GATKReadGroups(infile, outfile1, genome, PARAMS["readgroup_library"], PARAMS["readgroup_platform"], PARAMS["readgroup_platform_unit"]) PipelineExome.GATKIndelRealign(outfile1, outfile2, genome, PARAMS["gatk_threads"]) IOTools.zapFile(outfile1) PipelineExome.GATKBaseRecal(outfile2, outfile, genome, PARAMS["gatk_dbsnp"], PARAMS["gatk_solid_options"]) IOTools.zapFile(outfile2)
def realignMatchedSample(infile, outfile): ''' repeat realignments with merged bam of control and tumor this should help avoid problems with sample-specific realignments''' genome = "%s/%s.fa" % (PARAMS["bwa_index_dir"], PARAMS["genome"]) PipelineExome.GATKIndelRealign(infile, outfile, genome) IOTools.zapFile(infile)