def constructor(self): self.input("bam", BamBai) self.input("intervals", Bed) self.input("sample_name", String) self.input("header_lines", File) self.input("reference", FastaWithDict) # vardict options self.input("allele_freq_threshold", Float, default=0.05) self.input("min_mapping_qual", Int(optional=True)) self.input("filter", String(optional=True)) self.input("no_sv_call", Boolean(optional=True)) self.step( "vardict", VarDictGermline_1_6_0( intervals=self.intervals, bam=self.bam, reference=self.reference, sampleName=self.sample_name, var2vcfSampleName=self.sample_name, alleleFreqThreshold=self.allele_freq_threshold, var2vcfAlleleFreqThreshold=self.allele_freq_threshold, vcfFormat=True, chromColumn=1, regStartCol=2, geneEndCol=3, threads=4, minMappingQual=self.min_mapping_qual, filter=self.filter, noStructuralVariants=self.no_sv_call, ), ) self.step( "annotate", BcfToolsAnnotate_1_5(vcf=self.vardict.out, headerLines=self.header_lines), ) self.step("compressvcf", BGZipLatest(file=self.annotate.out, stdout=True)) self.step("tabixvcf", TabixLatest(inp=self.compressvcf.out)) self.step( "splitnormalisevcf", SplitMultiAllele(vcf=self.annotate.out, reference=self.reference), ) self.step("trim", TrimIUPAC_0_0_5(vcf=self.splitnormalisevcf.out)) self.step( "filterpass", VcfToolsvcftoolsLatest( vcf=self.trim.out, removeFileteredAll=True, recode=True, recodeINFOAll=True, ), ) self.output("variants", source=self.tabixvcf.out) self.output("out", source=self.filterpass.out)
def constructor(self): self.input("bam", BamBai) self.input( "intervals", Bed(optional=True), doc="This optional interval supports processing by regions. If this input resolves " "to null, then GATK will process the whole genome per each tool's spec", ) self.input("reference", FastaWithDict) self.input("snps_dbsnp", VcfTabix) self.input("snps_1000gp", VcfTabix) self.input("known_indels", VcfTabix) self.input("mills_indels", VcfTabix) self.step( "split_bam", gatk4.Gatk4SplitReads_4_1_3(bam=self.bam, intervals=self.intervals), ) self.step( "base_recalibrator", gatk4.Gatk4BaseRecalibrator_4_1_3( bam=self.split_bam, intervals=self.intervals, reference=self.reference, knownSites=[ self.snps_dbsnp, self.snps_1000gp, self.known_indels, self.mills_indels, ], ), ) self.step( "apply_bqsr", gatk4.Gatk4ApplyBqsr_4_1_3( bam=self.split_bam, intervals=self.intervals, recalFile=self.base_recalibrator.out, reference=self.reference, ), ) self.step( "haplotype_caller", gatk4.Gatk4HaplotypeCaller_4_1_3( inputRead=self.apply_bqsr, intervals=self.intervals, reference=self.reference, dbsnp=self.snps_dbsnp, ), ) self.step( "split_multi_allele", SplitMultiAllele(reference=self.reference, vcf=self.haplotype_caller), ) self.output("out", source=self.split_multi_allele)
def constructor(self): self.input("bam", BamBai) self.input("reference", FastaWithDict) # optional self.input("intervals", BedTabix(optional=True)) self.input("is_exome", Boolean(optional=True)) self.input("manta_config", File(optional=True)) self.input("strelka_config", File(optional=True)) self.step( "manta", Manta_1_5_0( bam=self.bam, reference=self.reference, callRegions=self.intervals, exome=self.is_exome, config=self.manta_config, ), ) self.step( "strelka", StrelkaGermline_2_9_10( bam=self.bam, reference=self.reference, callRegions=self.intervals, exome=self.is_exome, config=self.strelka_config, ), ) # normalise and filter "PASS" variants self.step( "splitnormalisevcf", SplitMultiAllele( vcf=self.strelka.variants.as_type(CompressedVcf), reference=self.reference, ), ) self.step( "filterpass", VcfToolsvcftoolsLatest( vcf=self.splitnormalisevcf.out, removeFileteredAll=True, recode=True, recodeINFOAll=True, ), ) self.output("sv", source=self.manta.diploidSV) self.output("variants", source=self.strelka.variants) self.output("out", source=self.filterpass.out)
def constructor(self): self.input("normal_bam", BamBai) self.input("tumor_bam", BamBai) self.input("normal_name", String) self.input("tumor_name", String) self.input("intervals", Bed) self.input("header_lines", File) self.input("reference", FastaWithDict) # vardict options self.input("allele_freq_threshold", Float(), 0.05) self.input("minMappingQual", Int(optional=True)) self.input("filter", String(optional=True)) self.step( "vardict", VarDictSomatic_1_6_0( normalBam=self.normal_bam, tumorBam=self.tumor_bam, intervals=self.intervals, reference=self.reference, normalName=self.normal_name, tumorName=self.tumor_name, alleleFreqThreshold=self.allele_freq_threshold, vcfFormat=True, chromColumn=1, regStartCol=2, geneEndCol=3, threads=4, minMappingQual=self.minMappingQual, filter=self.filter, ), ) self.step( "annotate", BcfToolsAnnotate_1_5(vcf=self.vardict.out, headerLines=self.header_lines), ) self.step("compressvcf", BGZipLatest(file=self.annotate.out, stdout=True)) self.step("tabixvcf", TabixLatest(inp=self.compressvcf.out)) self.step( "splitnormalisevcf", SplitMultiAllele(vcf=self.annotate.out, reference=self.reference), ) self.step("trim", TrimIUPAC_0_0_5(vcf=self.splitnormalisevcf.out)) self.step("filterpass", FilterVardictSomaticVcf(vcf=self.trim.out)) self.output("variants", source=self.tabixvcf.out) self.output("out", source=self.filterpass.out)
def constructor(self): self.input("normal_bam", BamBai) self.input("tumor_bam", BamBai) self.input("normal_name", String) self.input("tumor_name", String) self.input("intervals", Bed) self.input("allele_freq_threshold", Float(), 0.05) self.input("header_lines", File) self.input("reference", FastaWithDict) self.step( "vardict", VarDictSomatic_1_6_0( normalBam=self.normal_bam, tumorBam=self.tumor_bam, intervals=self.intervals, reference=self.reference, normalName=self.normal_name, tumorName=self.tumor_name, alleleFreqThreshold=self.allele_freq_threshold, chromNamesAreNumbers=True, vcfFormat=True, chromColumn=1, regStartCol=2, geneEndCol=3, ), ) self.step( "annotate", BcfToolsAnnotate_1_5(file=self.vardict.out, headerLines=self.header_lines), ) self.step( "split_multi_allele", SplitMultiAllele(reference=self.reference, vcf=self.annotate.out), ) self.step("trim", TrimIUPAC_0_0_5(vcf=self.split_multi_allele.out)) self.output("vardict_variants", source=self.vardict.out) self.output("out", source=self.trim.out)
def constructor(self): self.input("bam", BamBai) self.input("reference", FastaWithDict) self.input("intervals", BedTabix(optional=True)) self.input("is_exome", Boolean(optional=True)) self.step( "manta", Manta_1_5_0( bam=self.bam, reference=self.reference, callRegions=self.intervals, exome=self.is_exome, ), ) self.step( "strelka", StrelkaGermline_2_9_10( bam=self.bam, reference=self.reference, indelCandidates=self.manta.candidateSmallIndels, callRegions=self.intervals, exome=self.is_exome, ), ) self.step( "bcfview", BcfToolsView_1_5(file=self.strelka.variants, applyFilters=["PASS"]), ) self.step( "split_multi_allele", SplitMultiAllele(vcf=self.bcfview.out, reference=self.reference), ) self.output("diploid", source=self.manta.diploidSV) self.output("variants", source=self.strelka.variants) self.output("out", source=self.split_multi_allele.out)
def constructor(self): self.input("bam", BamBai) self.input("intervals", Bed) self.input("sample_name", String) self.input("allele_freq_threshold", Float, default=0.5) self.input("header_lines", File) self.input("reference", FastaWithDict) self.step( "vardict", VarDictGermline_1_6_0( intervals=self.intervals, bam=self.bam, reference=self.reference, sampleName=self.sample_name, var2vcfSampleName=self.sample_name, alleleFreqThreshold=self.allele_freq_threshold, var2vcfAlleleFreqThreshold=self.allele_freq_threshold, chromNamesAreNumbers=True, vcfFormat=True, chromColumn=1, regStartCol=2, geneEndCol=3, ), ) self.step( "annotate", BcfToolsAnnotate_1_5(file=self.vardict.out, headerLines=self.header_lines), ) self.step( "split_multi_allele", SplitMultiAllele(vcf=self.annotate.out, reference=self.reference), ) self.step("trim", TrimIUPAC_0_0_5(vcf=self.split_multi_allele.out)) self.output("vardict_variants", source=self.vardict.out) self.output("out", source=self.trim.out)
def constructor(self): self.input("bam", BamBai) self.input( "intervals", Bed(optional=True), doc= "This optional interval supports processing by regions. If this input resolves " "to null, then GATK will process the whole genome per each tool's spec", ) self.input("reference", FastaWithDict) self.input("snps_dbsnp", VcfTabix) self.step( "split_bam", gatk4.Gatk4SplitReads_4_1_3(bam=self.bam, intervals=self.intervals), ) self.step( "haplotype_caller", gatk4.Gatk4HaplotypeCaller_4_1_3( inputRead=self.split_bam.out, intervals=self.intervals, reference=self.reference, dbsnp=self.snps_dbsnp, pairHmmImplementation="LOGLESS_CACHING", ), ) self.step("uncompressvcf", UncompressArchive(file=self.haplotype_caller.out)) self.step( "splitnormalisevcf", SplitMultiAllele(vcf=self.uncompressvcf.out, reference=self.reference), ) self.output("variants", source=self.haplotype_caller.out) self.output("out_bam", source=self.haplotype_caller.bam) self.output("out", source=self.splitnormalisevcf.out)
def constructor(self): self.input("normal_bam", BamBai) self.input("tumor_bam", BamBai) self.input("normal_name", str) self.input("tumor_name", str) self.input( "intervals", Bed(optional=True), doc= "This optional interval supports processing by regions. If this input resolves " "to null, then GATK will process the whole genome per each tool's spec", ) self.input("reference", FastaWithDict) self.input("snps_dbsnp", VcfTabix) self.input("snps_1000gp", VcfTabix) self.input("known_indels", VcfTabix) self.input("mills_indels", VcfTabix) self.step( "base_recalibrator_normal", gatk4.Gatk4BaseRecalibrator_4_0(), ignore_missing=True, ) self.step( "base_recalibrator_tumor", gatk4.Gatk4BaseRecalibrator_4_0(), ignore_missing=True, ) self.step("apply_bqsr_normal", gatk4.Gatk4ApplyBqsr_4_0(), ignore_missing=True) self.step("apply_bqsr_tumor", gatk4.Gatk4ApplyBqsr_4_0(), ignore_missing=True) # S1: BaseRecalibrator(s) for inp, baseRecal, applyBQSR in [ (self.normal_bam, self.base_recalibrator_normal, self.apply_bqsr_normal), (self.tumor_bam, self.base_recalibrator_tumor, self.apply_bqsr_tumor), ]: baseRecal["bam"] = inp baseRecal["intervals"] = self.intervals baseRecal["reference"] = self.reference baseRecal["knownSites"] = [ self.snps_dbsnp, self.snps_1000gp, self.known_indels, self.mills_indels, ] applyBQSR["recalFile"] = baseRecal.out applyBQSR["bam"] = inp applyBQSR["intervals"] = self.intervals applyBQSR["reference"] = self.reference self.step( "mutect2", gatk4.GatkMutect2_4_0( normal=self.apply_bqsr_normal.out, tumor=self.apply_bqsr_tumor.out, normalName=self.normal_name, tumorName=self.tumor_name, intervals=self.intervals, reference=self.reference, ), ) self.step( "split_multi_allele", SplitMultiAllele(reference=self.reference, vcf=self.mutect2.out), ) self.output("out", source=self.split_multi_allele.out)
def constructor(self): self.input("normal_bam", BamBai) self.input("tumor_bam", BamBai) self.input("reference", FastaWithDict) # optional self.input("intervals", BedTabix(optional=True)) self.input("is_exome", Boolean(optional=True)) self.input("manta_config", File(optional=True)) self.input("strelka_config", File(optional=True)) self.step( "manta", Manta_1_5_0( bam=self.normal_bam, tumorBam=self.tumor_bam, reference=self.reference, callRegions=self.intervals, exome=self.is_exome, config=self.manta_config, ), ) self.step( "strelka", StrelkaSomatic_2_9_10( indelCandidates=self.manta.candidateSmallIndels, normalBam=self.normal_bam, tumorBam=self.tumor_bam, reference=self.reference, callRegions=self.intervals, exome=self.is_exome, config=self.strelka_config, ), ) self.step( "concatvcf", ConcatStrelkaSomaticVcf( headerVcfs=[self.strelka.snvs, self.strelka.indels], contentVcfs=[self.strelka.snvs, self.strelka.indels], ), ) self.step("sortvcf", BcfToolsSort_1_9(vcf=self.concatvcf.out)) self.step( "splitnormalisevcf", SplitMultiAllele(vcf=self.sortvcf.out, reference=self.reference), ) self.step( "extractaddp", ExtractStrelkaSomaticADDP_0_1_1(vcf=self.splitnormalisevcf.out), ) self.step( "filterpass", VcfToolsvcftoolsLatest( vcf=self.extractaddp.out, removeFileteredAll=True, recode=True, recodeINFOAll=True, ), ) self.output("tumor_sv", source=self.manta.somaticSV) self.output("normal_sv", source=self.manta.diploidSV) self.output("variants", source=self.sortvcf.out) self.output("out", source=self.filterpass.out)
def constructor(self): self.input("normal_bam", BamBai) self.input("tumor_bam", BamBai) self.input("normal_name", String(optional=True)) self.input( "intervals", Bed(optional=True), doc= "This optional intervals file supports processing by regions. If this file resolves " "to null, then GATK will process the whole genome per each tool's spec", ) self.input("reference", FastaWithDict) self.input("gnomad", VcfTabix) self.input("panel_of_normals", VcfTabix(optional=True)) # split normal and tumor bam self.step( "normal_split_bam", self.process_subpipeline(bam=self.normal_bam, intervals=self.intervals), ) self.step( "tumor_split_bam", self.process_subpipeline(bam=self.tumor_bam, intervals=self.intervals), ) # variant calling + learn read orientation model self.step( "mutect2", gatk4.GatkMutect2_4_1_3( normalBams=[self.normal_split_bam.out], tumorBams=[self.tumor_split_bam.out], normalSample=self.normal_name, intervals=self.intervals, reference=self.reference, germlineResource=self.gnomad, panelOfNormals=self.panel_of_normals, ), ) self.step( "learnorientationmodel", gatk4.Gatk4LearnReadOrientationModelLatest( f1r2CountsFiles=self.mutect2.f1f2r_out, ), ) # calculate contamination and segmentation self.step( "getpileupsummaries", gatk4.Gatk4GetPileUpSummariesLatest( bam=self.tumor_split_bam.out, sites=self.gnomad, intervals=self.intervals, ), ) self.step( "calculatecontamination", gatk4.Gatk4CalculateContaminationLatest( pileupTable=self.getpileupsummaries.out, ), ) self.step( "filtermutect2calls", gatk4.Gatk4FilterMutectCallsLatest( vcf=self.mutect2.out, reference=self.reference, segmentationFile=self.calculatecontamination.segOut, contaminationTable=self.calculatecontamination.contOut, readOrientationModel=self.learnorientationmodel.out, statsFile=self.mutect2.stats, ), ) # normalise and filter "PASS" variants self.step("uncompressvcf", UncompressArchive(file=self.filtermutect2calls.out)) self.step( "splitnormalisevcf", SplitMultiAllele(vcf=self.uncompressvcf.out, reference=self.reference), ) self.step( "filterpass", VcfToolsvcftoolsLatest( vcf=self.splitnormalisevcf.out, removeFileteredAll=True, recode=True, recodeINFOAll=True, ), ) self.output("variants", source=self.filtermutect2calls.out) self.output("out_bam", source=self.mutect2.bam) self.output("out", source=self.filterpass.out)