Ejemplo n.º 1
0
 def variantCalling_mutect2_partial(chromosome):
     # variant caller
     vcf = self.output_folder + self.sample_name + ".chr" + chromosome + ".mutect2.vcf.gz"
     parameters_dict = {
         "input_tumor":
         self.output_folder + self.sample_name + "_r_tumor.chr" +
         chromosome + "_RG.bam",
         "input_normal":
         self.output_folder + self.sample_name + "_r_normal.chr" +
         chromosome + "_RG.bam",
         "normal-sample":
         self.sample_name + "_r_normal.chr" + chromosome + "_RG",
         "tumor-sample":
         self.sample_name + "_r_tumor.chr" + chromosome + "_RG",
         "output":
         vcf,
         "reference":
         reference_fasta,
         #	"--disable-read-filter":""
         #	"dbsnp":
         #	"emit-reference-confidence":"true",
         #	"max-alternate-alleles":"3"
     }
     mutect2_log = self.output_folder + self.sample_name + ".chr" + chromosome + ".Mutect2.log"
     gatk_docker("gatk_mutect2", parameters_dict, mutect2_log, self.ram,
                 self.docker_images_dict["gatk"])
Ejemplo n.º 2
0
 def filterMutectCalls(vcf_file):
     # perform variant filtering
     parameters_dict = {
         "variant": vcf_file,
         "output": vcf_file.replace(".vcf.gz", ".filtered.vcf.gz")
     }
     filter_log = self.output_folder + self.sample_name + ".filter.log"
     gatk_docker("gatk_filter_mutect", parameters_dict, filter_log,
                 self.ram, self.docker_images_dict["gatk"])
Ejemplo n.º 3
0
		def validateSam():
			# Validate alignment file integrity
			parameters_dict = {
								"input":output_folder+self.sample_name+".bwa.bam",
								"MODE":"SUMMARY"
								}
			validate_log = output_folder+self.sample_name+".validateSamFile.log"
			gatk_docker(gatk_validate_sam, parameters_dict,
						HaplotypeCaller_log, self.ram,self.docker_images_dict["gatk"])
Ejemplo n.º 4
0
		def buildRecalibrator():
			# Base recalibrator (GATK)
			parameters_dict = {
								"input":sorted_bam,
								"output":BaseRecalibrator_metrics,
								"reference":reference_fasta,
								"known-sites":dbsnp_vcf,
								"use-original-qualities":"true"
								}
			BaseRecalibrator_log = output_folder+self.sample_name+".BaseRecalibrator.log"
			gatk_docker("gatk_build_recalibrator", parameters_dict,
						BaseRecalibrator_log, self.ram,self.docker_images_dict["gatk"])
Ejemplo n.º 5
0
 def variantCalling_HaplotypeCaller(input_bam):
     # Haplotype caller
     parameters_dict = {
         "input": input_bam,
         "output": input_bam.replace(".bam", ".vcf.gz"),
         "reference": reference_fasta
         #	"emit-reference-confidence":"true",
         #	"max-alternate-alleles":"3"
     }
     HaplotypeCaller_log = input_bam + ".HaplotypeCaller.log"
     gatk_docker("gatk_haplotype_caller", parameters_dict,
                 HaplotypeCaller_log, self.ram,
                 self.docker_images_dict["gatk"])
Ejemplo n.º 6
0
		def addReadGroups():
			# Add read groups (GATK)
			parameters_dict = {
								"input":unsorted_bam,
								"output":ReadGroups_bam,
								"RGLB":self.lib_ID,
								"RGPL":self.pl_ID,
								"RGPU":self.pu_ID,
								"RGSM":self.sample_name
								}
			ReadGroups_log = self.output_folder+self.sample_name+".ReadGroups.log"
			gatk_docker("gatk_add_read_groups", parameters_dict,
						ReadGroups_log, self.ram,self.docker_images_dict["gatk"], )
Ejemplo n.º 7
0
 def addReadGroups(t_n):
     # Add read groups (GATK)
     parameters_dict = {
         "input": markDuplicates_bam,
         "output": ReadGroups_bam,
         "RGLB": self.lib_ID,
         "RGPL": self.pl_ID,
         "RGPU": self.pu_ID,
         "RGSM": self.sample_name + "_r_" + t_n
     }
     ReadGroups_log = self.output_folder + t_n_sample_name + ".ReadGroups." + t_n + ".log"
     gatk_docker("gatk_add_read_groups", parameters_dict,
                 ReadGroups_log, self.ram,
                 self.docker_images_dict["gatk"])
Ejemplo n.º 8
0
 def addReadGroups_partial_bam(chromosome, input_bam):
     # Add read groups (GATK)
     parameters_dict = {
         "input": input_bam,
         "output": input_bam.replace(".bam", "_RG.bam"),
         "RGLB": self.lib_ID,
         "RGPL": self.pl_ID,
         "RGPU": self.pu_ID,
         "RGSM": input_bam.replace(".bam", "_RG").split("/")[-1]
     }
     ReadGroups_log = self.output_folder + t_n_sample_name + ".ReadGroups.log"
     gatk_docker("gatk_add_read_groups", parameters_dict,
                 ReadGroups_log, self.ram,
                 self.docker_images_dict["gatk"])
Ejemplo n.º 9
0
		def applyRecalibrator():
			# Base recalibrator - applying model (GATK)
			parameters_dict = {
								"input":sorted_bam,
								"output":BaseRecalibrator_bam,
								"bqsr":BaseRecalibrator_metrics,
								"use-original-qualities":"true",
								"static-quantized-quals":"10",
								"static-quantized-quals":"20",
								"static-quantized-quals":"30"
								}
			ApplyBQSR_log = output_folder+self.sample_name+".ApplyBQSR.log"
			gatk_docker("gatk_apply_recalibrator", parameters_dict,
						ApplyBQSR_log, self.ram,self.docker_images_dict["gatk"])
Ejemplo n.º 10
0
 def markDuplicates(input_bam):
     # Mark duplicates (GATK)
     parameters_dict = {
         "input": input_bam,
         "output": input_bam.replace("bam", "MarkDuplicates.bam"),
         "mark_dupl_metrics": markDuplicates_metrics,
         "optical_duplicate_pixel_dist": "2500",
         "assume_sort_order": "queryname",
         "clear_DT": "false",
         "add_pg_tag_to_reads": "false"
     }
     markDuplicates_log = self.output_folder + self.sample_name + ".MarkDuplicates.log"
     gatk_docker("gatk_mark_duplicates", parameters_dict,
                 markDuplicates_log, self.ram,
                 self.docker_images_dict["gatk"])
Ejemplo n.º 11
0
 def addReadGroups():
     # Add read groups (GATK)
     print("")
     print("STARTING ADDING READGROUPS")
     print("")
     parameters_dict = {
         "input": unsorted_bam,
         "output": ReadGroups_bam,
         "RGLB": self.lib_ID,
         "RGPL": self.pl_ID,
         "RGPU": self.pu_ID,
         "RGSM": self.sample_name
     }
     ReadGroups_log = self.output_folder + self.sample_name + ".ReadGroups.log"
     gatk_docker("gatk_add_read_groups", parameters_dict,
                 ReadGroups_log, self.ram,
                 self.docker_images_dict["broadinstitute/gatk"])