コード例 #1
0
    def constructor(self):

        self.input("normalBam", CramCrai)
        self.input("tumorBam", CramCrai)

        self.input("reference", FastaFai)
        self.input("callRegions", BedTabix(optional=True))
        self.input("exome", Boolean(optional=True), default=False)
        self.input("configStrelka", File(optional=True))

        self.step(
            "manta",
            Manta(
                bam=self.normalBam,
                tumorBam=self.tumorBam,
                reference=self.reference,
                callRegions=self.callRegions,
                exome=self.exome,
            ),
        )
        self.step(
            "strelka",
            Strelka(
                indelCandidates=self.manta.candidateSmallIndels,
                normalBam=self.normalBam,
                tumorBam=self.tumorBam,
                reference=self.reference,
                callRegions=self.callRegions,
                exome=self.exome,
                config=self.configStrelka,
            ),
        )
        self.step(
            "normaliseSNVs",
            BcfToolsNorm(vcf=self.strelka.snvs, reference=self.reference),
        )
        self.step("indexSNVs", BcfToolsIndex(vcf=self.normaliseSNVs.out))

        self.step(
            "normaliseINDELs",
            BcfToolsNorm(vcf=self.strelka.indels, reference=self.reference),
        )
        self.step("indexINDELs", BcfToolsIndex(vcf=self.normaliseINDELs.out))

        self.output("diploid", source=self.manta.diploidSV)
        self.output("candIndels", source=self.manta.candidateSmallIndels)
        self.output("indels", source=self.indexINDELs.out)
        self.output("snvs", source=self.indexSNVs.out)
        self.output("somaticSVs", source=self.manta.somaticSVs)
コード例 #2
0
    def constructor(self):

        self.input("normalBam", self.getStrelka2InputType())
        self.input("tumorBam", self.getStrelka2InputType())

        self.input("reference", FastaFai)
        self.input("callRegions", BedTabix(optional=True))
        self.input("exome", Boolean(optional=True), default=False)
        self.input("configStrelka", File(optional=True))

        self.input("indelCandidates", Array(VcfTabix))
        self.input("strelkaSNVs", Array(VcfTabix))
        # self.input("strelkaIndels", Array(VcfTabix))

        self.step(
            "strelka2pass",
            self.getStrelka2Tool()(
                indelCandidates=self.indelCandidates,
                # indelCandidates=self.strelkaIndels,
                forcedgt=self.strelkaSNVs,
                normalBam=self.normalBam,
                tumorBam=self.tumorBam,
                reference=self.reference,
                callRegions=self.callRegions,
                exome=self.exome,
                config=self.configStrelka,
            ),
        )
        self.step(
            "normaliseSNVs",
            BcfToolsNorm(vcf=self.strelka2pass.snvs, reference=self.reference),
        )
        self.step("indexSNVs", BcfToolsIndex(vcf=self.normaliseSNVs.out))

        self.step(
            "normaliseINDELs",
            BcfToolsNorm(vcf=self.strelka2pass.indels,
                         reference=self.reference),
        )
        self.step("indexINDELs", BcfToolsIndex(vcf=self.normaliseINDELs.out))

        self.output("indels", source=self.indexINDELs.out)
        self.output("snvs", source=self.indexSNVs.out)
コード例 #3
0
    def constructor(self):

        # we have to split the bam into the ones of the normal sample (can be multiple) and the
        # tumor, because some tools only work with the tumor bams
        self.input(
            "normalBams",
            Array(self.getMutect2InputType()),
            doc=
            "The bams that make up the normal sample. Generally Mutect will expect one bam per sample, but as long as the sample ids in the bam header are set appropriatly, multiple bams per sample will work",
        )
        self.input(
            "tumorBams",
            Array(self.getMutect2InputType()),
            doc=
            "The bams that contain the tumour samples. Generally Mutect will expect one bam per sample, but as long as the sample ids in the bam header are set appropriatly, multiple bams per sample will work",
        )

        # we also need the name of the normal sample (needs to be the name in the bams as well)
        self.input(
            "normalName",
            String,
            doc=
            "The sample id of the normal sample. This id will be used to distingiush reads from this sample from all other samples. This id needs to tbe the one set in the bam header",
        )

        self.input(
            "biallelicSites",
            VcfTabix,
            doc=
            "A vcf of common biallalic sites from a population. This will be used to estimate sample contamination.",
        )

        self.input(
            "reference",
            FastaWithDict,
            doc=
            "A fasta and dict indexed reference, which needs to be the reference, the bams were aligned to.",
        )

        self.input(
            "regionSize",
            int,
            default=10000000,
            doc=
            "The size of the regions over which to parallelise the analysis. This should be adjusted, if there are lots of samples or a very high sequencing depth. default: 10M bp",
        )

        self.input(
            "panelOfNormals",
            VcfTabix,
            doc=
            "The panel of normals, which summarises the technical and biological sites of errors. Its usually a good idea to generate this for your own cohort, but GATK suggests around 30 normals, so their panel is usually a good idea.",
        )

        self.input(
            "germlineResource",
            VcfTabix,
            doc=
            "Vcf of germline variants. GATK provides this as well, but it can easily substituted with the newst gnomad etc vcf.",
        )

        self.step(
            "createCallRegions",
            CreateCallRegions(
                reference=self.reference,
                regionSize=self.regionSize,
                equalize=True,
            ),
        )

        self.step(
            "mutect2",
            self.getMutect2Tool()(
                tumorBams=self.tumorBams,
                normalBams=self.normalBams,
                normalSample=self.normalName,
                intervals=self.createCallRegions.regions,
                reference=self.reference,
                panelOfNormals=self.panelOfNormals,
                germlineResource=self.germlineResource,
            ),
            scatter="intervals",
        )

        self.step("concat", BcfToolsConcat(vcf=self.mutect2.out))
        self.step("indexUnfiltered", BcfToolsIndex(vcf=self.concat.out))

        self.step(
            "learn",
            LearnReadOrientationModel(f1r2CountsFiles=self.mutect2.f1f2r_out))

        self.step("mergeMutect2",
                  MergeMutectStats(statsFiles=self.mutect2.stats))

        self.step(
            "pileup",
            self.getPileUpTool()(
                bam=self.tumorBams,
                sites=self.biallelicSites,
                intervals=self.biallelicSites,
                reference=self.reference,
            ),
        )

        self.step("contamination",
                  CalculateContamination(pileupTable=self.pileup.out))

        self.step(
            "filtering",
            FilterMutectCalls(
                vcf=self.indexUnfiltered.out,
                reference=self.reference,
                segmentationFile=self.contamination.segOut,
                contaminationTable=self.contamination.contOut,
                readOrientationModel=self.learn.out,
                statsFile=self.mergeMutect2.out,
            ),
        )

        self.step(
            "normalise",
            BcfToolsNorm(vcf=self.filtering.out, reference=self.reference))
        self.step("indexFiltered", BcfToolsIndex(vcf=self.normalise.out))
        self.output("out", source=self.indexFiltered.out)
コード例 #4
0
    def constructor(self):

        # we have to split the bam into the ones of the normal sample (can be multiple) and the
        # tumor, because some tools only work with the tumor bams
        self.input("normalBams", Array(BamBai))
        self.input("tumorBams", Array(BamBai))

        # we also need the name of the normal sample (needs to be the name in the bams as well)
        self.input("normalName", String)

        self.input("biallelicSites", VcfTabix)

        self.input("reference", FastaWithDict)

        self.input("regionSize", int, default=10000000)

        self.input("panelOfNormals", VcfTabix)

        self.input("germlineResource", VcfTabix)

        self.step(
            "createCallRegions",
            CreateCallRegions(reference=self.reference,
                              regionSize=self.regionSize,
                              equalize=True),
        )

        self.step(
            "mutect2",
            Mutect2(
                tumorBams=self.tumorBams,
                normalBams=self.normalBams,
                normalSample=self.normalName,
                intervals=self.createCallRegions.regions,
                reference=self.reference,
                panelOfNormals=self.panelOfNormals,
                germlineResource=self.germlineResource,
            ),
            scatter="intervals",
        )

        self.step("concat", BcfToolsConcat(vcf=self.mutect2.out))
        self.step("indexUnfiltered", BcfToolsIndex(vcf=self.concat.out))

        self.step(
            "learn",
            LearnReadOrientationModel(f1r2CountsFiles=self.mutect2.f1f2r_out))

        self.step("mergeMutect2",
                  MergeMutectStats(statsFiles=self.mutect2.stats))

        self.step(
            "pileup",
            GetPileUpSummaries(
                bam=self.tumorBams,
                sites=self.biallelicSites,
                intervals=self.biallelicSites,
                reference=self.reference,
            ),
        )

        self.step("contamination",
                  CalculateContamination(pileupTable=self.pileup.out))

        self.step(
            "filtering",
            FilterMutectCalls(
                vcf=self.indexUnfiltered.out,
                reference=self.reference,
                segmentationFile=self.contamination.segOut,
                contaminationTable=self.contamination.contOut,
                readOrientationModel=self.learn.out,
                statsFile=self.mergeMutect2.out,
            ),
        )

        self.step(
            "normalise",
            BcfToolsNorm(vcf=self.filtering.out, reference=self.reference))
        self.step("indexFiltered", BcfToolsIndex(vcf=self.normalise.out))
        self.output("out", source=self.indexFiltered.out)