Ejemplo n.º 1
0
 def tumor_normal_inputs():
     return [
         ToolInput(
             "tumor",
             BamBai(),
             position=6,
             prefix="-I",
             doc="BAM/SAM/CRAM file containing reads",
         ),
         ToolInput(
             "tumorName",
             String(),
             position=6,
             prefix="-tumor",
             doc=
             "BAM sample name of tumor. May be URL-encoded as output by GetSampleName with -encode.",
         ),
         ToolInput(
             "normal",
             BamBai(),
             position=5,
             prefix="-I",
             doc="BAM/SAM/CRAM file containing reads",
         ),
         ToolInput(
             "normalName",
             String(),
             position=6,
             prefix="-normal",
             doc=
             "BAM sample name of normal. May be URL-encoded as output by GetSampleName with -encode.",
         ),
     ]
Ejemplo n.º 2
0
 def inputs(self):
     return [
         ToolInput(
             "ubam",
             BamBai(),
             prefix="--UNMAPPED_BAM",
             prefix_applies_to_all_elements=True,
             doc=
             "Original SAM or BAM file of unmapped reads, which must be in queryname order.",
             position=10,
         ),
         ToolInput(
             "bam",
             Array(BamBai()),
             prefix="--ALIGNED_BAM",
             prefix_applies_to_all_elements=True,
             doc="SAM or BAM file(s) with alignment data.",
             position=10,
         ),
         ToolInput(
             "reference",
             FastaWithDict(optional=True),
             prefix="--REFERENCE_SEQUENCE",
             position=10,
             doc="Reference sequence file.",
         ),
         ToolInput(
             "outputFilename",
             Filename(extension=".bam"),
             position=10,
             prefix="--OUTPUT",
             doc="Merged SAM or BAM file to write to.",
         ),
         *self.additional_args,
     ]
    def tests(self) -> Optional[List[TTestCase]]:
        parent_dir = "https://swift.rc.nectar.org.au/v1/AUTH_4df6e734a509497692be237549bbe9af/janis-test-data/bioinformatics"
        germline_data = f"{parent_dir}/wgsgermline_data"
        somatic_data = f"{parent_dir}/wgssomatic_data"

        return [
            TTestCase(
                name="basic",
                input={
                    "normal_inputs": [[
                        f"{somatic_data}/NA24385-BRCA1_R1.fastq.gz",
                        f"{somatic_data}/NA24385-BRCA1_R21.fastq.gz",
                    ]],
                    "normal_name":
                    "NA24385-BRCA1",
                    "tumor_inputs": [[
                        f"{somatic_data}/NA12878-NA24385-mixture-BRCA1_R1.fastq.gz",
                        f"{somatic_data}/NA12878-NA24385-mixture-BRCA1_R2.fastq.gz",
                    ]],
                    "tumor_name":
                    "NA12878-NA24385-mixture",
                    "reference":
                    f"{germline_data}/Homo_sapiens_assembly38.chr17.fasta",
                    "gridss_blacklist":
                    f"{somatic_data}/consensusBlacklist.hg38.chr17.bed",
                    "gnomad":
                    f"{somatic_data}/af-only-gnomad.hg38.BRCA1.vcf.gz",
                    "gatk_intervals": [f"{germline_data}/BRCA1.hg38.bed"],
                    "known_indels":
                    f"{germline_data}/Homo_sapiens_assembly38.known_indels.BRCA1.vcf.gz",
                    "mills_indels":
                    f"{germline_data}/Mills_and_1000G_gold_standard.indels.hg38.BRCA1.vcf.gz",
                    "snps_1000gp":
                    f"{germline_data}/1000G_phase1.snps.high_confidence.hg38.BRCA1.vcf.gz",
                    "snps_dbsnp":
                    f"{germline_data}/Homo_sapiens_assembly38.dbsnp138.BRCA1.vcf.gz",
                    "cutadapt_adapters":
                    f"{germline_data}/contaminant_list.txt",
                },
                output=BamBai.basic_test("out_normal_bam", 3265300, 49500) +
                BamBai.basic_test("out_tumor_bam", 3341700, 49000) +
                TextFile.basic_test(
                    "out_normal_performance_summary",
                    950,
                    md5="e3205735e5fe8c900f05050f8ed73f19",
                ) + TextFile.basic_test(
                    "out_tumor_performance_summary",
                    950,
                    md5="122bfa2ece90c0f030015feba4ba7d84",
                ) +
                FastqGzPair.basic_test("out_normal_fastqc_reports", 881300) +
                FastqGzPair.basic_test("out_tumor_fastqc_reports", 874900),
            )
        ]
Ejemplo n.º 4
0
 def tests(self):
     remote_dir = "https://swift.rc.nectar.org.au/v1/AUTH_4df6e734a509497692be237549bbe9af/janis-test-data/bioinformatics/wgsgermline_data"
     return [
         TTestCase(
             name="basic",
             input={
                 "bams": [
                     f"{remote_dir}/NA12878-BRCA1.sorted.bam",
                 ],
                 "maxRecordsInRam": 5000000,
                 "createIndex": True,
                 "mergeSamFiles_useThreading": True,
                 "mergeSamFiles_validationStringency": "SILENT",
             },
             output=BamBai.basic_test(
                 "out",
                 2829000,
                 3780,
                 f"{remote_dir}/NA12878-BRCA1.markduped.bam.flagstat",
             ),
         ),
         TTestCase(
             name="minimal",
             input={
                 "bams": [
                     f"{remote_dir}/NA12878-BRCA1.sorted.bam",
                 ],
                 "maxRecordsInRam": 5000000,
                 "createIndex": True,
                 "mergeSamFiles_useThreading": True,
                 "mergeSamFiles_validationStringency": "SILENT",
             },
             output=self.minimal_test(),
         ),
     ]
Ejemplo n.º 5
0
 def inputs(self):
     return [
         *super(Gatk4CollectInsertSizeMetricsBase, self).inputs(),
         ToolInput(
             "bam",
             BamBai(optional=False),
             prefix="-I",
             doc="Input SAM or BAM file.  Required.",
             position=10,
         ),
         ToolInput(
             "outputFilename",
             Filename(
                 prefix=InputSelector("bam", remove_file_extension=True),
                 extension=".txt",
                 suffix=".metrics",
             ),
             prefix="-O",
             doc="File to write the output to.  Required.",
         ),
         ToolInput(
             "outputHistogram",
             Filename(
                 prefix=InputSelector("bam", remove_file_extension=True),
                 extension=".pdf",
                 suffix=".histogram",
             ),
             prefix="-H",
             doc="File to write insert size Histogram chart to.  Required. ",
         ),
         *Gatk4CollectInsertSizeMetricsBase.additional_args,
     ]
Ejemplo n.º 6
0
 def inputs(self):
     return [
         *super(Gatk4DepthOfCoverageBase, self).inputs(),
         ToolInput(
             "bam",
             BamBai(),
             prefix="-I",
             doc="The SAM/BAM/CRAM file containing reads.",
             secondaries_present_as={".bai": "^.bai"},
         ),
         ToolInput(
             "reference", FastaWithDict(), prefix="-R", doc="Reference sequence"
         ),
         ToolInput(
             "outputPrefix",
             String(),
             prefix="-O",
             doc="An output file created by the walker. Will overwrite contents if file exists",
         ),
         ToolInput(
             "intervals",
             Array(Bed),
             prefix="--intervals",
             doc="-L (BASE) One or more genomic intervals over which to operate",
             prefix_applies_to_all_elements=True,
         ),
         *self.additional_args,
     ]
Ejemplo n.º 7
0
 def inputs(self):
     return [
         ToolInput("bams", Array(BamBai()), position=10),
         ToolInput("reference",
                   FastaWithDict(),
                   position=1,
                   prefix="--reference"),
         ToolInput(
             "outputFilename",
             Filename(suffix=".svs", extension=".vcf"),
             position=2,
             prefix="--output",
         ),
         ToolInput(
             "assemblyFilename",
             Filename(suffix=".assembled", extension=".bam"),
             position=3,
             prefix="--assembly",
         ),
         ToolInput("threads",
                   Int(optional=True),
                   default=CpuSelector(),
                   prefix="--threads"),
         ToolInput("blacklist",
                   Bed(optional=True),
                   position=4,
                   prefix="--blacklist"),
         ToolInput("tmpdir",
                   String(optional=True),
                   default="./TMP",
                   prefix="--workingdir"),
     ]
Ejemplo n.º 8
0
 def inputs(self):
     return [
         *super().inputs(),
         *Gatk4GetPileUpSummariesBase.additional_args,
         ToolInput(
             "bam",
             Array(BamBai()),
             prefix="-I",
             prefix_applies_to_all_elements=True,
             doc="The SAM/BAM/CRAM file containing reads.",
             position=0,
         ),
         ToolInput(
             "sites",
             VcfTabix(),
             prefix="-V",
             doc="sites of common biallelic variants",
         ),
         ToolInput(
             "intervals",
             VcfTabix(optional=True),
             prefix="--intervals",
             doc=
             "-L (BASE) One or more genomic intervals over which to operate",
         ),
         ToolInput("pileupTableOut",
                   Filename(extension=".txt"),
                   position=1,
                   prefix="-O"),
     ]
Ejemplo n.º 9
0
 def tests(self):
     return [
         TTestCase(
             name="basic",
             input={
                 "bam": [
                     os.path.join(
                         BioinformaticsTool.test_data_path(),
                         "wgsgermline_data",
                         "NA12878-BRCA1.merged.bam",
                     )
                 ],
                 "javaOptions": ["-Xmx6G"],
                 "maxRecordsInRam": 5000000,
                 "createIndex": True,
                 "tmpDir": "./tmp",
             },
             output=BamBai.basic_test(
                 "out",
                 2829000,
                 3780,
                 os.path.join(
                     BioinformaticsTool.test_data_path(),
                     "wgsgermline_data",
                     "NA12878-BRCA1.markduped.bam.flagstat",
                 ),
             )
             + TextFile.basic_test(
                 "metrics",
                 3700,
                 "NA12878-BRCA1\t193\t9468\t164\t193\t46\t7\t1\t0.003137\t7465518",
                 112,
             ),
         )
     ]
 def inputs(self) -> List[ToolInput]:
     return [
         ToolInput("intervals", Bed(), position=2, shell_quote=False),
         ToolInput(
             "outputFilename",
             Filename(extension=".vcf", suffix=".vardict"),
             prefix=">",
             position=6,
             shell_quote=False,
         ),
         ToolInput(
             "bam",
             BamBai(),
             prefix="-b",
             position=1,
             shell_quote=False,
             doc="The indexed BAM file",
         ),
         ToolInput(
             "reference",
             FastaFai(),
             prefix="-G",
             position=1,
             shell_quote=False,
             doc="The reference fasta. Should be indexed (.fai). "
             "Defaults to: /ngs/reference_data/genomes/Hsapiens/hg19/seq/hg19.fa",
         ),
         *VarDictGermlineBase.vardict_inputs,
         *VarDictGermlineBase.var2vcf_inputs,
     ]
Ejemplo n.º 11
0
 def tests(self):
     return [
         TTestCase(
             name="basic",
             input={
                 "bams": [
                     os.path.join(
                         BioinformaticsTool.test_data_path(),
                         "wgsgermline_data",
                         "NA12878-BRCA1.sorted.bam",
                     )
                 ],
                 "createIndex": True,
                 "validationStringency": "SILENT",
                 "javaOptions": ["-Xmx6G"],
                 "maxRecordsInRam": 5000000,
                 "tmpDir": "./tmp",
                 "useThreading": True,
             },
             output=BamBai.basic_test(
                 "out",
                 2826968,
                 49688,
                 os.path.join(
                     BioinformaticsTool.test_data_path(),
                     "wgsgermline_data",
                     "NA12878-BRCA1.bam.flagstat",
                 ),
                 "963a51f7feed5b829319b947961b8a3e",
                 "231c10d0e43766170f5a7cd1b8a6d14e",
             ),
         )
     ]
Ejemplo n.º 12
0
 def inputs(self) -> List[ToolInput]:
     return [
         ToolInput("piscesVersion", String()),
         ToolInput(
             "inputBam",
             BamBai(),
             prefix="-b",
             position=4,
             shell_quote=False,
             doc="Input BAM file",
         ),
         ToolInput(
             "outputDir",
             String(),
             prefix="--outfolder",
             position=4,
             shell_quote=False,
             doc="Output Folder",
         ),
         ToolInput(
             "referenceFolder",
             Directory(),
             prefix="--genomefolders",
             position=5,
             shell_quote=False,
             doc="Folder containing reference genome files",
         ),
         *self.additional_hygea_args,
     ]
Ejemplo n.º 13
0
 def tests(self):
     return [
         TTestCase(
             name="basic",
             input={
                 "bam": os.path.join(
                     BioinformaticsTool.test_data_path(),
                     "wgsgermline_data",
                     "NA12878-BRCA1.recalibrated.bam",
                 ),
                 "intervals": os.path.join(
                     BioinformaticsTool.test_data_path(),
                     "wgsgermline_data",
                     "BRCA1.hg38.bed",
                 ),
                 "javaOptions": ["-Xmx3G"],
                 "outputFilename": ".",
             },
             output=BamBai.basic_test(
                 "out",
                 2600900,
                 21300,
                 os.path.join(
                     BioinformaticsTool.test_data_path(),
                     "wgsgermline_data",
                     "NA12878-BRCA1.split.flagstat",
                 ),
             ),
         )
     ]
Ejemplo n.º 14
0
 def tests(self):
     remote_dir = "https://swift.rc.nectar.org.au/v1/AUTH_4df6e734a509497692be237549bbe9af/janis-test-data/bioinformatics/wgsgermline_data"
     return [
         TTestCase(
             name="basic",
             input={
                 "inputRead": f"{remote_dir}/NA12878-BRCA1.split.bam",
                 "reference": f"{remote_dir}/Homo_sapiens_assembly38.chr17.fasta",
                 "intervals": f"{remote_dir}/BRCA1.hg38.bed",
                 "dbsnp": f"{remote_dir}/Homo_sapiens_assembly38.dbsnp138.BRCA1.vcf.gz",
                 "javaOptions": ["-Xmx6G"],
                 "pairHmmImplementation": "LOGLESS_CACHING",
             },
             output=VcfTabix.basic_test(
                 "out",
                 12800,
                 270,
                 214,
                 ["GATKCommandLine"],
                 "0224e24e5fc27286ee90c8d3c63373a7",
             )
             + BamBai.basic_test(
                 "bam",
                 596698,
                 21272,
                 f"{remote_dir}/NA12878-BRCA1.haplotyped.flagstat",
                 "d83b4c0d8eab24a3be1cc6af4f827753",
                 "b4bb4028b8679a3a635e3ad87126a097",
             ),
         )
     ]
Ejemplo n.º 15
0
 def tests(self):
     remote_dir = "https://swift.rc.nectar.org.au/v1/AUTH_4df6e734a509497692be237549bbe9af/janis-test-data/bioinformatics/wgsgermline_data"
     return [
         TTestCase(
             name="basic",
             input={
                 "bams": [
                     f"{remote_dir}/NA12878-BRCA1.sorted.bam",
                 ],
                 "createIndex": True,
                 "validationStringency": "SILENT",
                 "javaOptions": ["-Xmx6G"],
                 "maxRecordsInRam": 5000000,
                 "tmpDir": "./tmp",
                 "useThreading": True,
             },
             output=BamBai.basic_test(
                 "out",
                 2826968,
                 49688,
                 f"{remote_dir}/NA12878-BRCA1.bam.flagstat",
                 "963a51f7feed5b829319b947961b8a3e",
                 "231c10d0e43766170f5a7cd1b8a6d14e",
             ),
         )
     ]
Ejemplo n.º 16
0
 def inputs(self):
     return [
         *super(Gatk4ApplyBqsrBase, self).inputs(),
         ToolInput(
             "bam",
             BamBai(),
             prefix="-I",
             doc="The SAM/BAM/CRAM file containing reads.",
             secondaries_present_as={".bai": "^.bai"},
             position=10,
         ),
         ToolInput(
             "reference", FastaWithDict(), prefix="-R", doc="Reference sequence"
         ),
         ToolInput(
             "outputFilename",
             Filename(extension=".bam"),
             prefix="-O",
             doc="Write output to this file",
         ),
         ToolInput(
             "recalFile",
             Tsv(optional=True),
             prefix="--bqsr-recal-file",
             doc="Input recalibration table for BQSR",
         ),
         ToolInput(
             "intervals",
             Bed(optional=True),
             prefix="--intervals",
             doc="-L (BASE) One or more genomic intervals over which to operate",
         ),
         *self.additional_args,
     ]
Ejemplo n.º 17
0
 def inputs(self):
     return [
         ToolInput(
             "bam",
             BamBai(),
             prefix="-I",
             doc="Input file containing sequence  data (BAM or CRAM)",
             secondaries_present_as={".bai": "^.bai"},
             position=10,
         ),
         ToolInput("reference",
                   FastaWithDict(),
                   prefix="-R",
                   doc="Reference sequence file"),
         ToolInput(
             "outputPrefix",
             String(),
             prefix="-o",
             doc=
             "An output file created by the walker. Will overwrite contents if file exists",
         ),
         ToolInput(
             "intervals",
             File(optional=True),
             prefix="-L",
             doc="One or more genomic intervals over which to operate",
         ),
         ToolInput(
             "excludeIntervals",
             File(optional=True),
             prefix="--excludeIntervals",
             doc="One or more genomic intervals to exclude from processing",
         ),
         *self.additional_args,
     ]
Ejemplo n.º 18
0
 def tests(self):
     remote_dir = "https://swift.rc.nectar.org.au/v1/AUTH_4df6e734a509497692be237549bbe9af/janis-test-data/bioinformatics/wgsgermline_data"
     return [
         TTestCase(
             name="basic",
             input={
                 "bam": [f"{remote_dir}/NA12878-BRCA1.merged.bam"],
                 "javaOptions": ["-Xmx6G"],
                 "maxRecordsInRam": 5000000,
                 "createIndex": True,
                 "tmpDir": "./tmp",
             },
             output=BamBai.basic_test(
                 "out",
                 2829000,
                 3780,
                 f"{remote_dir}/NA12878-BRCA1.markduped.bam.flagstat",
             ) + TextFile.basic_test(
                 "metrics",
                 3700,
                 "NA12878-BRCA1\t193\t9468\t164\t193\t46\t7\t1\t0.003137\t7465518",
                 112,
             ),
         )
     ]
Ejemplo n.º 19
0
 def tests(self):
     remote_dir = "https://swift.rc.nectar.org.au/v1/AUTH_4df6e734a509497692be237549bbe9af/janis-test-data/bioinformatics/wgsgermline_data"
     return [
         TTestCase(
             name="basic",
             input={
                 "bam": f"{remote_dir}/NA12878-BRCA1.markduped.bam",
                 "reference":
                 f"{remote_dir}/Homo_sapiens_assembly38.chr17.fasta",
                 "recalFile": f"{remote_dir}/NA12878-BRCA1.markduped.table",
                 "intervals": f"{remote_dir}/BRCA1.hg38.bed",
             },
             output=BamBai.basic_test(
                 "out",
                 2600000,
                 21000,
                 f"{remote_dir}/NA12878-BRCA1.recalibrated.flagstat",
             ),
         ),
         TTestCase(
             name="minimal",
             input={
                 "bam": f"{remote_dir}/NA12878-BRCA1.markduped.bam",
                 "reference":
                 f"{remote_dir}/Homo_sapiens_assembly38.chr17.fasta",
                 "recalFile": f"{remote_dir}/NA12878-BRCA1.markduped.table",
                 "intervals": f"{remote_dir}/BRCA1.hg38.bed",
             },
             output=self.minimal_test(),
         ),
     ]
Ejemplo n.º 20
0
 def inputs(self):
     return [
         *super().inputs(),
         ToolInput(
             "bams",
             Array(BamBai()),
             prefix="-I",
             prefix_applies_to_all_elements=True,
             doc="The SAM/BAM file to sort.",
             position=10,
         ),
         ToolInput(
             "sampleName",
             String(optional=True),
             doc="Used for naming purposes only",
         ),
         ToolInput(
             "outputFilename",
             Filename(
                 prefix=InputSelector("sampleName"),
                 suffix=".merged",
                 extension=".bam",
             ),
             position=10,
             prefix="-O",
             doc="SAM/BAM file to write merged result to",
         ),
         *self.additional_args,
     ]
Ejemplo n.º 21
0
 def outputs(self):
     return [
         ToolOutput(
             "out",
             VcfTabix,
             glob=InputSelector("outputFilename"),
             doc="To determine type",
         ),
         ToolOutput(
             "stats",
             TextFile(extension=".stats"),
             glob=InputSelector("outputFilename") + ".stats",
             doc="To determine type",
         ),
         ToolOutput(
             "f1f2r_out",
             TarFileGz,
             glob=InputSelector("f1r2TarGz_outputFilename"),
             doc="To determine type",
         ),
         ToolOutput(
             "bam",
             BamBai(optional=True),
             glob=InputSelector("outputBamName"),
             doc="File to which assembled haplotypes should be written",
             secondaries_present_as={".bai": "^.bai"},
         ),
     ]
Ejemplo n.º 22
0
    def constructor(self):

        self.input("bams", Array(BamBai()))
        self.input("createIndex", Boolean, default=True)
        self.input("maxRecordsInRam", Int, default=5000000)
        self.input("sampleName", String(optional=True))

        self.step(
            "mergeSamFiles",
            Gatk4MergeSamFiles_4_1_2(
                bams=self.bams,
                useThreading=True,
                createIndex=self.createIndex,
                maxRecordsInRam=self.maxRecordsInRam,
                validationStringency="SILENT",
                sampleName=self.sampleName,
            ),
        )

        self.step(
            "markDuplicates",
            Gatk4MarkDuplicates_4_1_2(
                bam=self.mergeSamFiles.out,
                createIndex=self.createIndex,
                maxRecordsInRam=self.maxRecordsInRam,
            ),
        )
        self.output("out", source=self.markDuplicates.out)
Ejemplo n.º 23
0
 def inputs(self):
     return [
         ToolInput(
             "bam",
             BamBai(),
             prefix="-I",
             position=10,
             secondaries_present_as={".bai": "^.bai"},
             doc=
             "One or more input SAM or BAM files to analyze. Must be coordinate sorted.",
         ),
         ToolInput(
             "outputFilename",
             Filename(extension=".bam"),
             position=10,
             prefix="-O",
             doc="File to write duplication metrics to",
         ),
         ToolInput(
             "metricsFilename",
             Filename(extension=".metrics.txt"),
             position=10,
             prefix="-M",
             doc="The output file to write marked records to.",
         ),
         *super(Gatk4MarkDuplicatesBase, self).inputs(),
         *self.additional_args,
     ]
Ejemplo n.º 24
0
 def inputs(self):
     return [
         *super().inputs(),
         *Gatk4GetPileUpSummariesBase.additional_args,
         ToolInput(
             "bam",
             Array(BamBai()),
             prefix="-I",
             prefix_applies_to_all_elements=True,
             doc="The SAM/BAM/CRAM file containing reads.",
             position=0,
         ),
         ToolInput(
             "sampleName", String(optional=True), doc="Used for naming purposes"
         ),
         ToolInput(
             "sites",
             VcfTabix(),
             prefix="-V",
             doc="sites of common biallelic variants",
         ),
         ToolInput(
             "intervals",
             Bed(optional=True),
             prefix="--intervals",
             doc="-L (BASE) One or more genomic intervals over which to operate",
         ),
         ToolInput(
             "pileupTableOut",
             Filename(
                 prefix=JoinOperator(
                     FilterNullOperator(
                         [
                             FirstOperator(
                                 [InputSelector("sampleName"), "generated"]
                             ),
                             # If(
                             #     IsDefined(InputSelector("intervals")),
                             #     InputSelector(
                             #         "intervals", remove_file_extension=True
                             #     ),
                             #     "",
                             # ),
                         ]
                     ),
                     ".",
                 ),
                 extension=".txt",
             ),
             position=1,
             prefix="-O",
         ),
         ToolInput(
             "reference",
             FastaWithDict(optional=True),
             prefix="-R",
             doc="reference to use when decoding CRAMS",
         ),
     ]
Ejemplo n.º 25
0
 def inputs(self):
     return [
         *super(Gatk4BaseRecalibratorBase, self).inputs(),
         *Gatk4BaseRecalibratorBase.additional_args,
         ToolInput(
             "bam",
             BamBai(),
             position=6,
             prefix="-I",
             doc="BAM/SAM/CRAM file containing reads",
             secondaries_present_as={".bai": "^.bai"},
         ),
         ToolInput(
             "knownSites",
             Array(VcfTabix()),
             prefix="--known-sites",
             position=28,
             prefix_applies_to_all_elements=True,
             doc=
             "**One or more databases of known polymorphic sites used to exclude "
             "regions around known polymorphisms from analysis.** "
             "This algorithm treats every reference mismatch as an indication of error. However, real "
             "genetic variation is expected to mismatch the reference, so it is critical that a "
             "database of known polymorphic sites is given to the tool in order to skip over those sites. "
             "This tool accepts any number of Feature-containing files (VCF, BCF, BED, etc.) for use as "
             "this database. For users wishing to exclude an interval list of known variation simply "
             "use -XL my.interval.list to skip over processing those sites. Please note however "
             "that the statistics reported by the tool will not accurately reflected those sites "
             "skipped by the -XL argument.",
         ),
         ToolInput(
             "reference",
             FastaWithDict(),
             position=5,
             prefix="-R",
             doc="Reference sequence file",
         ),
         ToolInput(
             "outputFilename",
             Filename(prefix=InputSelector("bam"), extension=".table"),
             position=8,
             prefix="-O",
             doc="**The output recalibration table filename to create.** "
             "After the header, data records occur one per line until the end of the file. The first "
             "several items on a line are the values of the individual covariates and will change "
             "depending on which covariates were specified at runtime. The last three items are the "
             "data- that is, number of observations for this combination of covariates, number of "
             "reference mismatches, and the raw empirical quality score calculated by phred-scaling "
             "the mismatch rate. Use '/dev/stdout' to print to standard out.",
         ),
         ToolInput(
             "intervals",
             Bed(optional=True),
             prefix="--intervals",
             doc=
             "-L (BASE) One or more genomic intervals over which to operate",
         ),
     ]
Ejemplo n.º 26
0
 def outputs(self):
     return [
         ToolOutput(
             "out",
             BamBai(),
             glob=InputSelector("outputFilename"),
             secondaries_present_as={".bai": "^.bai"},
         )
     ]
Ejemplo n.º 27
0
 def outputs(self) -> List[ToolOutput]:
     return [
         ToolOutput("vcf", Vcf(), glob=InputSelector("outputFilename")),
         ToolOutput(
             "assembly",
             BamBai(),
             glob=InputSelector("assemblyFilename"),
             secondaries_present_as={".bai": "^.bai"},
         ),
     ]
Ejemplo n.º 28
0
 def outputs(self):
     return [
         ToolOutput(
             "out",
             BamBai(),
             glob=InputSelector("outputFilename"),
             doc="BAM to write extracted reads to",
             secondaries_present_as={".bai": "^.bai"},
         )
     ]
Ejemplo n.º 29
0
 def inputs(self):
     return [
         *super(Gatk4HaplotypeCallerBase, self).inputs(),
         *Gatk4HaplotypeCallerBase.optional_args,
         ToolInput(
             "inputRead",
             BamBai(),
             doc="BAM/SAM/CRAM file containing reads",
             prefix="--input",
             secondaries_present_as={".bai": "^.bai"},
         ),
         ToolInput(
             "reference",
             FastaWithDict(),
             position=5,
             prefix="--reference",
             doc="Reference sequence file",
         ),
         ToolInput(
             "outputFilename",
             Filename(
                 prefix=InputSelector("inputRead",
                                      remove_file_extension=True),
                 extension=".vcf.gz",
             ),
             position=8,
             prefix="--output",
             doc="File to which variants should be written",
         ),
         ToolInput(
             "dbsnp",
             VcfTabix(optional=True),
             position=7,
             prefix="--dbsnp",
             doc="(Also: -D) A dbSNP VCF file.",
         ),
         ToolInput(
             "intervals",
             Bed(optional=True),
             prefix="--intervals",
             doc=
             "-L (BASE) One or more genomic intervals over which to operate",
         ),
         ToolInput(
             "outputBamName",
             Filename(
                 prefix=InputSelector("inputRead",
                                      remove_file_extension=True),
                 extension=".bam",
             ),
             position=8,
             prefix="-bamout",
             doc="File to which assembled haplotypes should be written",
         ),
     ]
Ejemplo n.º 30
0
 def outputs(self):
     return [
         ToolOutput(
             "out",
             BamBai(),
             glob=InputSelector("outputFilename"),
             doc=
             "BAM file with reads split at N CIGAR elements and CIGAR strings updated.",
             secondaries_present_as={".bai": "^.bai"},
         )
     ]