def constructor(self):

        self.input("bam", BamBai)
        self.input("intervals", Bed)
        self.input("sample_name", String)
        self.input("header_lines", File)
        self.input("reference", FastaWithDict)

        # vardict options
        self.input("allele_freq_threshold", Float, default=0.05)
        self.input("min_mapping_qual", Int(optional=True))
        self.input("filter", String(optional=True))
        self.input("no_sv_call", Boolean(optional=True))

        self.step(
            "vardict",
            VarDictGermline_1_6_0(
                intervals=self.intervals,
                bam=self.bam,
                reference=self.reference,
                sampleName=self.sample_name,
                var2vcfSampleName=self.sample_name,
                alleleFreqThreshold=self.allele_freq_threshold,
                var2vcfAlleleFreqThreshold=self.allele_freq_threshold,
                vcfFormat=True,
                chromColumn=1,
                regStartCol=2,
                geneEndCol=3,
                threads=4,
                minMappingQual=self.min_mapping_qual,
                filter=self.filter,
                noStructuralVariants=self.no_sv_call,
            ),
        )
        self.step(
            "annotate",
            BcfToolsAnnotate_1_5(vcf=self.vardict.out,
                                 headerLines=self.header_lines),
        )
        self.step("compressvcf",
                  BGZipLatest(file=self.annotate.out, stdout=True))
        self.step("tabixvcf", TabixLatest(inp=self.compressvcf.out))

        self.step(
            "splitnormalisevcf",
            SplitMultiAllele(vcf=self.annotate.out, reference=self.reference),
        )
        self.step("trim", TrimIUPAC_0_0_5(vcf=self.splitnormalisevcf.out))
        self.step(
            "filterpass",
            VcfToolsvcftoolsLatest(
                vcf=self.trim.out,
                removeFileteredAll=True,
                recode=True,
                recodeINFOAll=True,
            ),
        )

        self.output("variants", source=self.tabixvcf.out)
        self.output("out", source=self.filterpass.out)
Ejemplo n.º 2
0
    def constructor(self):

        self.input("bam", BamBai)
        self.input(
            "intervals",
            Bed(optional=True),
            doc="This optional interval supports processing by regions. If this input resolves "
            "to null, then GATK will process the whole genome per each tool's spec",
        )

        self.input("reference", FastaWithDict)

        self.input("snps_dbsnp", VcfTabix)
        self.input("snps_1000gp", VcfTabix)
        self.input("known_indels", VcfTabix)
        self.input("mills_indels", VcfTabix)

        self.step(
            "split_bam",
            gatk4.Gatk4SplitReads_4_1_3(bam=self.bam, intervals=self.intervals),
        )

        self.step(
            "base_recalibrator",
            gatk4.Gatk4BaseRecalibrator_4_1_3(
                bam=self.split_bam,
                intervals=self.intervals,
                reference=self.reference,
                knownSites=[
                    self.snps_dbsnp,
                    self.snps_1000gp,
                    self.known_indels,
                    self.mills_indels,
                ],
            ),
        )
        self.step(
            "apply_bqsr",
            gatk4.Gatk4ApplyBqsr_4_1_3(
                bam=self.split_bam,
                intervals=self.intervals,
                recalFile=self.base_recalibrator.out,
                reference=self.reference,
            ),
        )
        self.step(
            "haplotype_caller",
            gatk4.Gatk4HaplotypeCaller_4_1_3(
                inputRead=self.apply_bqsr,
                intervals=self.intervals,
                reference=self.reference,
                dbsnp=self.snps_dbsnp,
            ),
        )
        self.step(
            "split_multi_allele",
            SplitMultiAllele(reference=self.reference, vcf=self.haplotype_caller),
        )

        self.output("out", source=self.split_multi_allele)
    def constructor(self):

        self.input("bam", BamBai)
        self.input("reference", FastaWithDict)

        # optional
        self.input("intervals", BedTabix(optional=True))
        self.input("is_exome", Boolean(optional=True))
        self.input("manta_config", File(optional=True))
        self.input("strelka_config", File(optional=True))

        self.step(
            "manta",
            Manta_1_5_0(
                bam=self.bam,
                reference=self.reference,
                callRegions=self.intervals,
                exome=self.is_exome,
                config=self.manta_config,
            ),
        )

        self.step(
            "strelka",
            StrelkaGermline_2_9_10(
                bam=self.bam,
                reference=self.reference,
                callRegions=self.intervals,
                exome=self.is_exome,
                config=self.strelka_config,
            ),
        )

        # normalise and filter "PASS" variants
        self.step(
            "splitnormalisevcf",
            SplitMultiAllele(
                vcf=self.strelka.variants.as_type(CompressedVcf),
                reference=self.reference,
            ),
        )

        self.step(
            "filterpass",
            VcfToolsvcftoolsLatest(
                vcf=self.splitnormalisevcf.out,
                removeFileteredAll=True,
                recode=True,
                recodeINFOAll=True,
            ),
        )

        self.output("sv", source=self.manta.diploidSV)
        self.output("variants", source=self.strelka.variants)
        self.output("out", source=self.filterpass.out)
    def constructor(self):

        self.input("normal_bam", BamBai)
        self.input("tumor_bam", BamBai)
        self.input("normal_name", String)
        self.input("tumor_name", String)
        self.input("intervals", Bed)
        self.input("header_lines", File)
        self.input("reference", FastaWithDict)

        # vardict options
        self.input("allele_freq_threshold", Float(), 0.05)
        self.input("minMappingQual", Int(optional=True))
        self.input("filter", String(optional=True))

        self.step(
            "vardict",
            VarDictSomatic_1_6_0(
                normalBam=self.normal_bam,
                tumorBam=self.tumor_bam,
                intervals=self.intervals,
                reference=self.reference,
                normalName=self.normal_name,
                tumorName=self.tumor_name,
                alleleFreqThreshold=self.allele_freq_threshold,
                vcfFormat=True,
                chromColumn=1,
                regStartCol=2,
                geneEndCol=3,
                threads=4,
                minMappingQual=self.minMappingQual,
                filter=self.filter,
            ),
        )
        self.step(
            "annotate",
            BcfToolsAnnotate_1_5(vcf=self.vardict.out,
                                 headerLines=self.header_lines),
        )
        self.step("compressvcf",
                  BGZipLatest(file=self.annotate.out, stdout=True))
        self.step("tabixvcf", TabixLatest(inp=self.compressvcf.out))

        self.step(
            "splitnormalisevcf",
            SplitMultiAllele(vcf=self.annotate.out, reference=self.reference),
        )
        self.step("trim", TrimIUPAC_0_0_5(vcf=self.splitnormalisevcf.out))
        self.step("filterpass", FilterVardictSomaticVcf(vcf=self.trim.out))

        self.output("variants", source=self.tabixvcf.out)
        self.output("out", source=self.filterpass.out)
    def constructor(self):

        self.input("normal_bam", BamBai)
        self.input("tumor_bam", BamBai)

        self.input("normal_name", String)
        self.input("tumor_name", String)

        self.input("intervals", Bed)

        self.input("allele_freq_threshold", Float(), 0.05)
        self.input("header_lines", File)

        self.input("reference", FastaWithDict)

        self.step(
            "vardict",
            VarDictSomatic_1_6_0(
                normalBam=self.normal_bam,
                tumorBam=self.tumor_bam,
                intervals=self.intervals,
                reference=self.reference,
                normalName=self.normal_name,
                tumorName=self.tumor_name,
                alleleFreqThreshold=self.allele_freq_threshold,
                chromNamesAreNumbers=True,
                vcfFormat=True,
                chromColumn=1,
                regStartCol=2,
                geneEndCol=3,
            ),
        )
        self.step(
            "annotate",
            BcfToolsAnnotate_1_5(file=self.vardict.out,
                                 headerLines=self.header_lines),
        )
        self.step(
            "split_multi_allele",
            SplitMultiAllele(reference=self.reference, vcf=self.annotate.out),
        )
        self.step("trim", TrimIUPAC_0_0_5(vcf=self.split_multi_allele.out))

        self.output("vardict_variants", source=self.vardict.out)
        self.output("out", source=self.trim.out)
Ejemplo n.º 6
0
    def constructor(self):

        self.input("bam", BamBai)
        self.input("reference", FastaWithDict)
        self.input("intervals", BedTabix(optional=True))
        self.input("is_exome", Boolean(optional=True))

        self.step(
            "manta",
            Manta_1_5_0(
                bam=self.bam,
                reference=self.reference,
                callRegions=self.intervals,
                exome=self.is_exome,
            ),
        )

        self.step(
            "strelka",
            StrelkaGermline_2_9_10(
                bam=self.bam,
                reference=self.reference,
                indelCandidates=self.manta.candidateSmallIndels,
                callRegions=self.intervals,
                exome=self.is_exome,
            ),
        )

        self.step(
            "bcfview",
            BcfToolsView_1_5(file=self.strelka.variants,
                             applyFilters=["PASS"]),
        )

        self.step(
            "split_multi_allele",
            SplitMultiAllele(vcf=self.bcfview.out, reference=self.reference),
        )

        self.output("diploid", source=self.manta.diploidSV)
        self.output("variants", source=self.strelka.variants)
        self.output("out", source=self.split_multi_allele.out)
Ejemplo n.º 7
0
    def constructor(self):

        self.input("bam", BamBai)
        self.input("intervals", Bed)

        self.input("sample_name", String)
        self.input("allele_freq_threshold", Float, default=0.5)
        self.input("header_lines", File)

        self.input("reference", FastaWithDict)

        self.step(
            "vardict",
            VarDictGermline_1_6_0(
                intervals=self.intervals,
                bam=self.bam,
                reference=self.reference,
                sampleName=self.sample_name,
                var2vcfSampleName=self.sample_name,
                alleleFreqThreshold=self.allele_freq_threshold,
                var2vcfAlleleFreqThreshold=self.allele_freq_threshold,
                chromNamesAreNumbers=True,
                vcfFormat=True,
                chromColumn=1,
                regStartCol=2,
                geneEndCol=3,
            ),
        )
        self.step(
            "annotate",
            BcfToolsAnnotate_1_5(file=self.vardict.out,
                                 headerLines=self.header_lines),
        )
        self.step(
            "split_multi_allele",
            SplitMultiAllele(vcf=self.annotate.out, reference=self.reference),
        )
        self.step("trim", TrimIUPAC_0_0_5(vcf=self.split_multi_allele.out))

        self.output("vardict_variants", source=self.vardict.out)
        self.output("out", source=self.trim.out)
    def constructor(self):

        self.input("bam", BamBai)
        self.input(
            "intervals",
            Bed(optional=True),
            doc=
            "This optional interval supports processing by regions. If this input resolves "
            "to null, then GATK will process the whole genome per each tool's spec",
        )
        self.input("reference", FastaWithDict)
        self.input("snps_dbsnp", VcfTabix)

        self.step(
            "split_bam",
            gatk4.Gatk4SplitReads_4_1_3(bam=self.bam,
                                        intervals=self.intervals),
        )

        self.step(
            "haplotype_caller",
            gatk4.Gatk4HaplotypeCaller_4_1_3(
                inputRead=self.split_bam.out,
                intervals=self.intervals,
                reference=self.reference,
                dbsnp=self.snps_dbsnp,
                pairHmmImplementation="LOGLESS_CACHING",
            ),
        )
        self.step("uncompressvcf",
                  UncompressArchive(file=self.haplotype_caller.out))
        self.step(
            "splitnormalisevcf",
            SplitMultiAllele(vcf=self.uncompressvcf.out,
                             reference=self.reference),
        )

        self.output("variants", source=self.haplotype_caller.out)
        self.output("out_bam", source=self.haplotype_caller.bam)
        self.output("out", source=self.splitnormalisevcf.out)
    def constructor(self):

        self.input("normal_bam", BamBai)
        self.input("tumor_bam", BamBai)

        self.input("normal_name", str)
        self.input("tumor_name", str)

        self.input(
            "intervals",
            Bed(optional=True),
            doc=
            "This optional interval supports processing by regions. If this input resolves "
            "to null, then GATK will process the whole genome per each tool's spec",
        )
        self.input("reference", FastaWithDict)

        self.input("snps_dbsnp", VcfTabix)
        self.input("snps_1000gp", VcfTabix)
        self.input("known_indels", VcfTabix)
        self.input("mills_indels", VcfTabix)

        self.step(
            "base_recalibrator_normal",
            gatk4.Gatk4BaseRecalibrator_4_0(),
            ignore_missing=True,
        )
        self.step(
            "base_recalibrator_tumor",
            gatk4.Gatk4BaseRecalibrator_4_0(),
            ignore_missing=True,
        )

        self.step("apply_bqsr_normal",
                  gatk4.Gatk4ApplyBqsr_4_0(),
                  ignore_missing=True)
        self.step("apply_bqsr_tumor",
                  gatk4.Gatk4ApplyBqsr_4_0(),
                  ignore_missing=True)

        # S1: BaseRecalibrator(s)

        for inp, baseRecal, applyBQSR in [
            (self.normal_bam, self.base_recalibrator_normal,
             self.apply_bqsr_normal),
            (self.tumor_bam, self.base_recalibrator_tumor,
             self.apply_bqsr_tumor),
        ]:
            baseRecal["bam"] = inp
            baseRecal["intervals"] = self.intervals
            baseRecal["reference"] = self.reference
            baseRecal["knownSites"] = [
                self.snps_dbsnp,
                self.snps_1000gp,
                self.known_indels,
                self.mills_indels,
            ]

            applyBQSR["recalFile"] = baseRecal.out
            applyBQSR["bam"] = inp
            applyBQSR["intervals"] = self.intervals
            applyBQSR["reference"] = self.reference

        self.step(
            "mutect2",
            gatk4.GatkMutect2_4_0(
                normal=self.apply_bqsr_normal.out,
                tumor=self.apply_bqsr_tumor.out,
                normalName=self.normal_name,
                tumorName=self.tumor_name,
                intervals=self.intervals,
                reference=self.reference,
            ),
        )
        self.step(
            "split_multi_allele",
            SplitMultiAllele(reference=self.reference, vcf=self.mutect2.out),
        )

        self.output("out", source=self.split_multi_allele.out)
    def constructor(self):

        self.input("normal_bam", BamBai)
        self.input("tumor_bam", BamBai)
        self.input("reference", FastaWithDict)

        # optional
        self.input("intervals", BedTabix(optional=True))
        self.input("is_exome", Boolean(optional=True))
        self.input("manta_config", File(optional=True))
        self.input("strelka_config", File(optional=True))

        self.step(
            "manta",
            Manta_1_5_0(
                bam=self.normal_bam,
                tumorBam=self.tumor_bam,
                reference=self.reference,
                callRegions=self.intervals,
                exome=self.is_exome,
                config=self.manta_config,
            ),
        )
        self.step(
            "strelka",
            StrelkaSomatic_2_9_10(
                indelCandidates=self.manta.candidateSmallIndels,
                normalBam=self.normal_bam,
                tumorBam=self.tumor_bam,
                reference=self.reference,
                callRegions=self.intervals,
                exome=self.is_exome,
                config=self.strelka_config,
            ),
        )
        self.step(
            "concatvcf",
            ConcatStrelkaSomaticVcf(
                headerVcfs=[self.strelka.snvs, self.strelka.indels],
                contentVcfs=[self.strelka.snvs, self.strelka.indels],
            ),
        )
        self.step("sortvcf", BcfToolsSort_1_9(vcf=self.concatvcf.out))
        self.step(
            "splitnormalisevcf",
            SplitMultiAllele(vcf=self.sortvcf.out, reference=self.reference),
        )
        self.step(
            "extractaddp",
            ExtractStrelkaSomaticADDP_0_1_1(vcf=self.splitnormalisevcf.out),
        )

        self.step(
            "filterpass",
            VcfToolsvcftoolsLatest(
                vcf=self.extractaddp.out,
                removeFileteredAll=True,
                recode=True,
                recodeINFOAll=True,
            ),
        )

        self.output("tumor_sv", source=self.manta.somaticSV)
        self.output("normal_sv", source=self.manta.diploidSV)
        self.output("variants", source=self.sortvcf.out)
        self.output("out", source=self.filterpass.out)
    def constructor(self):

        self.input("normal_bam", BamBai)
        self.input("tumor_bam", BamBai)
        self.input("normal_name", String(optional=True))
        self.input(
            "intervals",
            Bed(optional=True),
            doc=
            "This optional intervals file supports processing by regions. If this file resolves "
            "to null, then GATK will process the whole genome per each tool's spec",
        )
        self.input("reference", FastaWithDict)
        self.input("gnomad", VcfTabix)
        self.input("panel_of_normals", VcfTabix(optional=True))

        # split normal and tumor bam
        self.step(
            "normal_split_bam",
            self.process_subpipeline(bam=self.normal_bam,
                                     intervals=self.intervals),
        )
        self.step(
            "tumor_split_bam",
            self.process_subpipeline(bam=self.tumor_bam,
                                     intervals=self.intervals),
        )

        # variant calling + learn read orientation model
        self.step(
            "mutect2",
            gatk4.GatkMutect2_4_1_3(
                normalBams=[self.normal_split_bam.out],
                tumorBams=[self.tumor_split_bam.out],
                normalSample=self.normal_name,
                intervals=self.intervals,
                reference=self.reference,
                germlineResource=self.gnomad,
                panelOfNormals=self.panel_of_normals,
            ),
        )
        self.step(
            "learnorientationmodel",
            gatk4.Gatk4LearnReadOrientationModelLatest(
                f1r2CountsFiles=self.mutect2.f1f2r_out, ),
        )

        # calculate contamination and segmentation
        self.step(
            "getpileupsummaries",
            gatk4.Gatk4GetPileUpSummariesLatest(
                bam=self.tumor_split_bam.out,
                sites=self.gnomad,
                intervals=self.intervals,
            ),
        )
        self.step(
            "calculatecontamination",
            gatk4.Gatk4CalculateContaminationLatest(
                pileupTable=self.getpileupsummaries.out, ),
        )
        self.step(
            "filtermutect2calls",
            gatk4.Gatk4FilterMutectCallsLatest(
                vcf=self.mutect2.out,
                reference=self.reference,
                segmentationFile=self.calculatecontamination.segOut,
                contaminationTable=self.calculatecontamination.contOut,
                readOrientationModel=self.learnorientationmodel.out,
                statsFile=self.mutect2.stats,
            ),
        )

        # normalise and filter "PASS" variants
        self.step("uncompressvcf",
                  UncompressArchive(file=self.filtermutect2calls.out))
        self.step(
            "splitnormalisevcf",
            SplitMultiAllele(vcf=self.uncompressvcf.out,
                             reference=self.reference),
        )
        self.step(
            "filterpass",
            VcfToolsvcftoolsLatest(
                vcf=self.splitnormalisevcf.out,
                removeFileteredAll=True,
                recode=True,
                recodeINFOAll=True,
            ),
        )

        self.output("variants", source=self.filtermutect2calls.out)
        self.output("out_bam", source=self.mutect2.bam)
        self.output("out", source=self.filterpass.out)