Esempio n. 1
0
 def tests(self):
     remote_dir = "https://swift.rc.nectar.org.au/v1/AUTH_4df6e734a509497692be237549bbe9af/janis-test-data/bioinformatics/wgsgermline_data"
     return [
         TTestCase(
             name="basic",
             input={
                 "reads": [
                     f"{remote_dir}/NA12878-BRCA1_R1.fastq.gz",
                     f"{remote_dir}/NA12878-BRCA1_R2.fastq.gz",
                 ],
                 "threads":
                 1,
             },
             output=FastqGzPair.basic_test("out", 824000, 408000, 416000) +
             Array.array_wrapper([TextFile.basic_test(
                 "datafile",
                 81000,
             )]),
         ),
         TTestCase(
             name="minimal",
             input={
                 "reads": [
                     f"{remote_dir}/NA12878-BRCA1_R1.fastq.gz",
                     f"{remote_dir}/NA12878-BRCA1_R2.fastq.gz",
                 ],
                 "threads":
                 1,
             },
             output=self.minimal_test(),
         ),
     ]
Esempio n. 2
0
 def tests(self):
     remote_dir = "https://swift.rc.nectar.org.au/v1/AUTH_4df6e734a509497692be237549bbe9af/janis-test-data/bioinformatics/wgsgermline_data"
     return [
         TTestCase(
             name="basic",
             input={
                 "bam": [f"{remote_dir}/NA12878-BRCA1.merged.bam"],
                 "javaOptions": ["-Xmx6G"],
                 "maxRecordsInRam": 5000000,
                 "createIndex": True,
                 "tmpDir": "./tmp",
             },
             output=BamBai.basic_test(
                 "out",
                 2829000,
                 3780,
                 f"{remote_dir}/NA12878-BRCA1.markduped.bam.flagstat",
             ) + TextFile.basic_test(
                 "metrics",
                 3700,
                 "NA12878-BRCA1\t193\t9468\t164\t193\t46\t7\t1\t0.003137\t7465518",
                 112,
             ),
         )
     ]
Esempio n. 3
0
 def tests(self):
     remote_dir = "https://swift.rc.nectar.org.au/v1/AUTH_4df6e734a509497692be237549bbe9af/janis-test-data/bioinformatics/wgsgermline_data"
     return [
         TTestCase(
             name="basic",
             input={
                 "positions":
                 f"{remote_dir}/NA12878-BRCA1.sorted.uncompressed.stdout",
                 "reference":
                 f"{remote_dir}/Homo_sapiens_assembly38.chr17.fasta",
                 "bam": f"{remote_dir}/NA12878-BRCA1.markduped.bam",
                 "countOrphans": True,
                 "noBAQ": True,
                 "maxDepth": 10000,
                 "minBQ": 0,
             },
             output=TextFile.basic_test(
                 "out",
                 19900,
                 "chr17\t43044391\tG\t19\tA,A,,A.a,,A,,A..,,a\tDJCJ:FHDDBJBBJJIDDB",
                 187,
                 "53c3e03c20730ff45411087444379b1b",
             ),
         )
     ]
Esempio n. 4
0
 def tests(self):
     # The first 5 lines of the file include headers that change with every run (time, etc)
     with open(
         os.path.join(
             BioinformaticsTool.test_data_path(),
             "wgsgermline_data",
             "NA12878-BRCA1.markduped.metrics.txt",
         ),
         "r",
     ) as f:
         for i in range(5):
             next(f)
         expected_content = f.read()
     return [
         TTestCase(
             name="basic",
             input={
                 "bam": os.path.join(
                     BioinformaticsTool.test_data_path(),
                     "wgsgermline_data",
                     "NA12878-BRCA1.markduped.bam",
                 ),
                 "javaOptions": ["-Xmx6G"],
             },
             output=TextFile.basic_test("out", 7260, expected_content, 905)
             + [
                 TTestExpectedOutput(
                     tag="outHistogram",
                     preprocessor=TTestPreprocessor.FileSize,
                     operator=operator.ge,
                     expected_value=15600,
                 ),
             ],
         )
     ]
Esempio n. 5
0
 def tests(self):
     return [
         TTestCase(
             name="basic",
             input={
                 "bam": [
                     os.path.join(
                         BioinformaticsTool.test_data_path(),
                         "wgsgermline_data",
                         "NA12878-BRCA1.merged.bam",
                     )
                 ],
                 "javaOptions": ["-Xmx6G"],
                 "maxRecordsInRam": 5000000,
                 "createIndex": True,
                 "tmpDir": "./tmp",
             },
             output=BamBai.basic_test(
                 "out",
                 2829000,
                 3780,
                 os.path.join(
                     BioinformaticsTool.test_data_path(),
                     "wgsgermline_data",
                     "NA12878-BRCA1.markduped.bam.flagstat",
                 ),
             )
             + TextFile.basic_test(
                 "metrics",
                 3700,
                 "NA12878-BRCA1\t193\t9468\t164\t193\t46\t7\t1\t0.003137\t7465518",
                 112,
             ),
         )
     ]
Esempio n. 6
0
 def tests(self):
     return [
         TTestCase(
             name="basic",
             input={
                 "reference":
                 os.path.join(
                     BioinformaticsTool.test_data_path(),
                     "wgsgermline_data",
                     "Homo_sapiens_assembly38.chr17.fasta",
                 ),
             },
             output=TextFile.basic_test("out", 15, "chr17\t83257441\n", 1),
         ),
         TTestCase(
             name="minimal",
             input={
                 "reference":
                 os.path.join(
                     BioinformaticsTool.test_data_path(),
                     "wgsgermline_data",
                     "Homo_sapiens_assembly38.chr17.fasta",
                 ),
             },
             output=self.minimal_test(),
         ),
     ]
Esempio n. 7
0
 def tests(self):
     remote_dir = "https://swift.rc.nectar.org.au/v1/AUTH_4df6e734a509497692be237549bbe9af/janis-test-data/bioinformatics/wgsgermline_data"
     # The first 5 lines of the file include headers that change with every run (time, etc)
     with open(
             os.path.join(
                 BioinformaticsTool.test_data_path(),
                 "NA12878-BRCA1.markduped.metrics.txt",
             ),
             "r",
     ) as f:
         for i in range(5):
             next(f)
         expected_content = f.read()
     return [
         TTestCase(
             name="basic",
             input={
                 "bam": f"{remote_dir}/NA12878-BRCA1.markduped.bam",
                 "javaOptions": ["-Xmx6G"],
             },
             output=TextFile.basic_test("out", 7260, expected_content, 905)
             + [
                 TTestExpectedOutput(
                     tag="outHistogram",
                     preprocessor=TTestPreprocessor.FileSize,
                     operator=operator.ge,
                     expected_value=15600,
                 ),
             ],
         )
     ]
Esempio n. 8
0
 def tests(self):
     remote_dir = "https://swift.rc.nectar.org.au/v1/AUTH_4df6e734a509497692be237549bbe9af/janis-test-data/bioinformatics/wgsgermline_data"
     return [
         TTestCase(
             name="basic",
             input={
                 "flagstat":
                 f"{remote_dir}/NA12878-BRCA1.markduped.bam.flagstat",
                 "collectInsertSizeMetrics":
                 f"{remote_dir}/NA12878-BRCA1.markduped.metrics.txt",
                 "coverage":
                 f"{remote_dir}/NA12878-BRCA1.genomeCoverageBed.stdout",
                 "rmdupFlagstat":
                 f"{remote_dir}/NA12878-BRCA1.markduped.bam.bam.flagstat",
                 "genome": True,
             },
             output=TextFile.basic_test(
                 tag="out",
                 min_size=948,
                 line_count=2,
                 md5="575354942cfb8d0367725f9020181443",
                 expected_file_path=
                 f"{remote_dir}/NA12878-BRCA1_performance_summary.csv",
             ),
         )
     ]
Esempio n. 9
0
 def tests(self):
     return [
         TTestCase(
             name="basic",
             input={
                 "inputBam":
                 os.path.join(
                     BioinformaticsTool.test_data_path(),
                     "wgsgermline_data",
                     "NA12878-BRCA1.markduped.bam.bam",
                 ),
                 "genome":
                 os.path.join(
                     BioinformaticsTool.test_data_path(),
                     "wgsgermline_data",
                     "NA12878-BRCA1.genome_file.txt",
                 ),
             },
             output=TextFile.basic_test(
                 "out",
                 7432,
                 "chr17\t0\t83144233\t83257441\t0.99864",
                 220,
                 "f2007353bbd18f0a04eae9499d7c6a91",
             ),
         )
     ]
Esempio n. 10
0
 def tests(self):
     return [
         TTestCase(
             name="basic",
             input={
                 "positions": os.path.join(
                     BioinformaticsTool.test_data_path(),
                     "wgsgermline_data",
                     "NA12878-BRCA1.sorted.uncompressed.stdout",
                 ),
                 "reference": os.path.join(
                     BioinformaticsTool.test_data_path(),
                     "wgsgermline_data",
                     "Homo_sapiens_assembly38.chr17.fasta",
                 ),
                 "bam": os.path.join(
                     BioinformaticsTool.test_data_path(),
                     "wgsgermline_data",
                     "NA12878-BRCA1.markduped.bam",
                 ),
                 "countOrphans": True,
                 "noBAQ": True,
                 "maxDepth": 10000,
                 "minBQ": 0,
             },
             output=TextFile.basic_test(
                 "out",
                 19900,
                 "chr17\t43044391\tG\t19\tA,A,,A.a,,A,,A..,,a\tDJCJ:FHDDBJBBJJIDDB",
                 187,
                 "53c3e03c20730ff45411087444379b1b",
             ),
         )
     ]
Esempio n. 11
0
 def tests(self):
     parent_dir = "https://swift.rc.nectar.org.au/v1/AUTH_4df6e734a509497692be237549bbe9af/janis-test-data/bioinformatics"
     somatic_data = f"{parent_dir}/wgssomatic_data"
     return [
         TTestCase(
             name="basic",
             input={
                 "javaOptions": ["-Xmx6G"],
                 "pileupTable": f"{somatic_data}/generated.txt",
                 "segmentationFileOut": "generated.txt.mutect2_segments",
             },
             output=TextFile.basic_test(
                 "contOut",
                 59,
                 "sample\tcontamination\terror\nNA12878-NA24385-mixture\t0.0\t0.0",
             )
             + TextFile.basic_test(
                 "segOut",
                 125,
                 "contig\tstart\tend\tminor_allele_fraction\nchr17\t43045941\t43098543\t0.28541019662496847",
             ),
         ),
     ]
Esempio n. 12
0
 def tests(self):
     return [
         TTestCase(
             name="basic",
             input={
                 "bam": os.path.join(
                     BioinformaticsTool.test_data_path(),
                     "wgsgermline_data",
                     "NA12878-BRCA1.markduped.bam",
                 ),
                 "reference": os.path.join(
                     BioinformaticsTool.test_data_path(),
                     "wgsgermline_data",
                     "Homo_sapiens_assembly38.chr17.fasta",
                 ),
                 "knownSites": [
                     os.path.join(
                         BioinformaticsTool.test_data_path(),
                         "wgsgermline_data",
                         "Homo_sapiens_assembly38.known_indels.BRCA1.vcf.gz",
                     ),
                     os.path.join(
                         BioinformaticsTool.test_data_path(),
                         "wgsgermline_data",
                         "Homo_sapiens_assembly38.dbsnp138.BRCA1.vcf.gz",
                     ),
                     os.path.join(
                         BioinformaticsTool.test_data_path(),
                         "wgsgermline_data",
                         "Mills_and_1000G_gold_standard.indels.hg38.BRCA1.vcf.gz",
                     ),
                     os.path.join(
                         BioinformaticsTool.test_data_path(),
                         "wgsgermline_data",
                         "1000G_phase1.snps.high_confidence.hg38.BRCA1.vcf.gz",
                     ),
                 ],
                 "intervals": os.path.join(
                     BioinformaticsTool.test_data_path(),
                     "wgsgermline_data",
                     "BRCA1.hg38.bed",
                 ),
                 "javaOptions": ["-Xmx12G"],
             },
             output=TextFile.basic_test(
                 "out", 1131758, "#:GATKReport.v1.1:5", 10376
             ),
         )
     ]
Esempio n. 13
0
 def tests(self):
     remote_dir = "https://swift.rc.nectar.org.au/v1/AUTH_4df6e734a509497692be237549bbe9af/janis-test-data/bioinformatics/wgsgermline_data"
     return [
         TTestCase(
             name="basic",
             input={
                 "bam": f"{remote_dir}/NA12878-BRCA1.markduped.bam",
             },
             output=TextFile.basic_test(
                 "out",
                 410,
                 "19486 + 0 in total (QC-passed reads + QC-failed reads)",
                 13,
                 "ddbcfe52e60b925d222fb8bc1517a7a0",
             ),
         )
     ]
Esempio n. 14
0
 def tests(self):
     remote_dir = "https://swift.rc.nectar.org.au/v1/AUTH_4df6e734a509497692be237549bbe9af/janis-test-data/bioinformatics/wgsgermline_data"
     return [
         TTestCase(
             name="basic",
             input={
                 "inputBam":
                 f"{remote_dir}/NA12878-BRCA1.markduped.bam.bam",
                 "genome": f"{remote_dir}/NA12878-BRCA1.genome_file.txt",
             },
             output=TextFile.basic_test(
                 "out",
                 7432,
                 "chr17\t0\t83144233\t83257441\t0.99864",
                 220,
                 "f2007353bbd18f0a04eae9499d7c6a91",
             ),
         )
     ]
 def tests(self):
     remote_dir = "https://swift.rc.nectar.org.au/v1/AUTH_4df6e734a509497692be237549bbe9af/janis-test-data/bioinformatics/wgsgermline_data"
     return [
         TTestCase(
             name="basic",
             input={
                 "reference":
                 f"{remote_dir}/Homo_sapiens_assembly38.chr17.fasta",
             },
             output=TextFile.basic_test("out", 15, "chr17\t83257441\n", 1),
         ),
         TTestCase(
             name="minimal",
             input={
                 "reference":
                 f"{remote_dir}/Homo_sapiens_assembly38.chr17.fasta",
             },
             output=self.minimal_test(),
         ),
     ]
Esempio n. 16
0
 def tests(self):
     return [
         TTestCase(
             name="basic",
             input={
                 "bam":
                 os.path.join(
                     BioinformaticsTool.test_data_path(),
                     "wgsgermline_data",
                     "NA12878-BRCA1.markduped.bam",
                 ),
             },
             output=TextFile.basic_test(
                 "out",
                 410,
                 "19486 + 0 in total (QC-passed reads + QC-failed reads)",
                 13,
                 "ddbcfe52e60b925d222fb8bc1517a7a0",
             ),
         )
     ]
Esempio n. 17
0
 def tests(self):
     return [
         TTestCase(
             name="basic",
             input={
                 "flagstat": os.path.join(
                     BioinformaticsTool.test_data_path(),
                     "wgsgermline_data",
                     "NA12878-BRCA1.markduped.bam.flagstat",
                 ),
                 "collectInsertSizeMetrics": os.path.join(
                     BioinformaticsTool.test_data_path(),
                     "wgsgermline_data",
                     "NA12878-BRCA1.markduped.metrics.txt",
                 ),
                 "coverage": os.path.join(
                     BioinformaticsTool.test_data_path(),
                     "wgsgermline_data",
                     "NA12878-BRCA1.genomeCoverageBed.stdout",
                 ),
                 "rmdupFlagstat": os.path.join(
                     BioinformaticsTool.test_data_path(),
                     "wgsgermline_data",
                     "NA12878-BRCA1.markduped.bam.bam.flagstat",
                 ),
                 "genome": True,
             },
             output=TextFile.basic_test(
                 tag="out",
                 min_size=948,
                 line_count=2,
                 md5="575354942cfb8d0367725f9020181443",
                 expected_file_path=os.path.join(
                     BioinformaticsTool.test_data_path(),
                     "wgsgermline_data",
                     "NA12878-BRCA1_performance_summary.csv",
                 ),
             ),
         )
     ]
Esempio n. 18
0
 def tests(self):
     parent_dir = "https://swift.rc.nectar.org.au/v1/AUTH_4df6e734a509497692be237549bbe9af/janis-test-data/bioinformatics"
     germline_data = f"{parent_dir}/wgsgermline_data"
     somatic_data = f"{parent_dir}/wgssomatic_data"
     return [
         TTestCase(
             name="basic",
             input={
                 "javaOptions": ["-Xmx48G"],
                 "bam": [
                     f"{somatic_data}/NA12878-NA24385-mixture.markduped.recalibrated.bam"
                 ],
                 "sites": f"{somatic_data}/af-only-gnomad.hg38.BRCA1.vcf.gz",
                 "intervals": f"{germline_data}/BRCA1.hg38.bed",
             },
             output=TextFile.basic_test(
                 "out",
                 2592,
                 md5="54672b8b13d46aaef25c56351c82a3f4",
             ),
         ),
     ]
Esempio n. 19
0
 def tests(self):
     remote_dir = "https://swift.rc.nectar.org.au/v1/AUTH_4df6e734a509497692be237549bbe9af/janis-test-data/bioinformatics/wgsgermline_data"
     return [
         TTestCase(
             name="basic",
             input={
                 "bam":
                 f"{remote_dir}/NA12878-BRCA1.markduped.bam",
                 "reference":
                 f"{remote_dir}/Homo_sapiens_assembly38.chr17.fasta",
                 "knownSites": [
                     f"{remote_dir}/Homo_sapiens_assembly38.known_indels.BRCA1.vcf.gz",
                     f"{remote_dir}/Homo_sapiens_assembly38.dbsnp138.BRCA1.vcf.gz",
                     f"{remote_dir}/Mills_and_1000G_gold_standard.indels.hg38.BRCA1.vcf.gz",
                     f"{remote_dir}/1000G_phase1.snps.high_confidence.hg38.BRCA1.vcf.gz",
                 ],
                 "intervals":
                 f"{remote_dir}/BRCA1.hg38.bed",
                 "javaOptions": ["-Xmx12G"],
             },
             output=TextFile.basic_test("out", 1131758,
                                        "#:GATKReport.v1.1:5", 10376),
         )
     ]