def tests(self):
     return [
         TTestCase(
             name="basic",
             input={
                 "vcf":
                 os.path.join(
                     BioinformaticsTool.test_data_path(),
                     "wgsgermline_data",
                     "NA12878-BRCA1.haplotype_uncompressed.stdout",
                 ),
                 "reference":
                 os.path.join(
                     BioinformaticsTool.test_data_path(),
                     "wgsgermline_data",
                     "Homo_sapiens_assembly38.chr17.fasta",
                 ),
             },
             output=Vcf.basic_test(
                 "out",
                 51462,
                 221,
                 ["GATKCommandLine"],
                 "5e48624cb5ef379a7d6d39cec44bc856",
             ),
         )
     ]
 def tests(self):
     remote_dir = "https://swift.rc.nectar.org.au/v1/AUTH_4df6e734a509497692be237549bbe9af/janis-test-data/bioinformatics/wgsgermline_data"
     return [
         TTestCase(
             name="basic",
             input={
                 "bam": f"{remote_dir}/NA12878-BRCA1.markduped.bam",
                 "reference":
                 f"{remote_dir}/Homo_sapiens_assembly38.chr17.fasta",
                 "vcf":
                 f"{remote_dir}/NA12878-BRCA1.sorted.uncompressed.stdout",
                 "samtoolsmpileup_countOrphans": True,
                 "samtoolsmpileup_noBAQ": True,
                 "samtoolsmpileup_maxDepth": 10000,
                 "samtoolsmpileup_minBQ": 0,
                 "addbamstats_type": "germline",
             },
             output=Vcf.basic_test(
                 "out",
                 69225,
                 230,
                 ["GATKCommandLine"],
                 "db09c6c37c52771bd058e32d5c6b94c1",
             ),
         )
     ]
Esempio n. 3
0
 def tests(self):
     return [
         TTestCase(
             name="basic",
             input={
                 "inputVcf":
                 os.path.join(
                     BioinformaticsTool.test_data_path(),
                     "wgsgermline_data",
                     "NA12878-BRCA1.sorted.uncompressed.stdout",
                 ),
                 "mpileup":
                 os.path.join(
                     BioinformaticsTool.test_data_path(),
                     "wgsgermline_data",
                     "NA12878-BRCA1.mpileup.stdout",
                 ),
                 "type":
                 "germline",
             },
             output=Vcf.basic_test(
                 "out",
                 69225,
                 230,
                 ["GATKCommandLine"],
                 "db09c6c37c52771bd058e32d5c6b94c1",
             ),
         )
     ]
 def tests(self):
     parent_dir = "https://swift.rc.nectar.org.au/v1/AUTH_4df6e734a509497692be237549bbe9af/janis-test-data/bioinformatics"
     germline_data = f"{parent_dir}/wgsgermline_data"
     somatic_data = f"{parent_dir}/wgssomatic_data"
     return [
         TTestCase(
             name="basic",
             input={
                 "normal_id": "NA24385-BRCA1",
                 "tumor_id": "NA12878-NA24385-mixture",
                 "normal_bam":
                 f"{somatic_data}/NA24385-BRCA1.markduped.bam",
                 "tumor_bam":
                 f"{somatic_data}/NA12878-NA24385-mixture.markduped.bam",
                 "reference":
                 f"{germline_data}/Homo_sapiens_assembly38.chr17.fasta",
                 "vcf": f"{somatic_data}/uncompressed.stdout",
             },
             output=Vcf.basic_test(
                 "out",
                 44094,
                 156,
                 ["GATKCommandLine"],
                 "5fc0e861893e0a23f974808265a6917e",
             ),
         )
     ]
    def tests(self) -> Optional[List[TTestCase]]:
        bioinf_base = "https://swift.rc.nectar.org.au/v1/AUTH_4df6e734a509497692be237549bbe9af/janis-test-data/bioinformatics"
        hg38 = f"{bioinf_base}/hg38"
        chr17 = f"{bioinf_base}/petermac_testdata"

        return [
            TTestCase(
                name="brca1",
                input={
                    "sample_name":
                    "NA12878",
                    "reference":
                    f"{chr17}/Homo_sapiens_assembly38.chr17.fasta",
                    "fastqs": [[
                        f"{chr17}/NA12878-BRCA1_R1.fastq.gz",
                        f"{chr17}/NA12878-BRCA1_R2.fastq.gz",
                    ]],
                    "gatk_intervals": [f"{chr17}/BRCA1.hg38.bed"],
                    "known_indels":
                    f"{chr17}/Homo_sapiens_assembly38.known_indels.BRCA1.vcf.gz",
                    "mills_indels":
                    f"{chr17}/Mills_and_1000G_gold_standard.indels.hg38.BRCA1.vcf.gz",
                    "snps_1000gp":
                    f"{chr17}/1000G_phase1.snps.high_confidence.hg38.BRCA1.vcf.gz",
                    "snps_dbsnp":
                    f"{chr17}/Homo_sapiens_assembly38.dbsnp138.BRCA1.vcf.gz",
                    "cutadapt_adapters":
                    f"{chr17}/contaminant_list.txt",
                },
                output=Vcf.basic_test("out_variants_bamstats", 51300, 230) +
                Vcf.basic_test("out_variants_gatk_split", 51300, 221) +
                BamBai.basic_test("out_bam", 2822000, 49600) +
                TextFile.basic_test(
                    "out_performance_summary",
                    948,
                    md5="575354942cfb8d0367725f9020181443",
                ) + Array.array_wrapper([
                    ZipFile.basic_test("out_fastqc_reports", 408000),
                    ZipFile.basic_test("out_fastqc_reports", 416000),
                ]),
            )
        ]
Esempio n. 6
0
 def tests(self):
     return [
         TTestCase(
             name="basic",
             input={
                 "bam":
                 os.path.join(
                     BioinformaticsTool.test_data_path(),
                     "wgsgermline_data",
                     "NA12878-BRCA1.markduped.bam",
                 ),
                 "reference":
                 os.path.join(
                     BioinformaticsTool.test_data_path(),
                     "wgsgermline_data",
                     "Homo_sapiens_assembly38.chr17.fasta",
                 ),
                 "vcf":
                 os.path.join(
                     BioinformaticsTool.test_data_path(),
                     "wgsgermline_data",
                     "NA12878-BRCA1.sorted.uncompressed.stdout",
                 ),
                 "samtoolsmpileup_countOrphans":
                 True,
                 "samtoolsmpileup_noBAQ":
                 True,
                 "samtoolsmpileup_maxDepth":
                 10000,
                 "samtoolsmpileup_minBQ":
                 0,
                 "addbamstats_type":
                 "germline",
             },
             output=Vcf.basic_test(
                 "out",
                 69225,
                 230,
                 ["GATKCommandLine"],
                 "db09c6c37c52771bd058e32d5c6b94c1",
             ),
         )
     ]
Esempio n. 7
0
 def tests(self):
     parent_dir = "https://swift.rc.nectar.org.au/v1/AUTH_4df6e734a509497692be237549bbe9af/janis-test-data/bioinformatics"
     germline_data = f"{parent_dir}/wgsgermline_data"
     somatic_data = f"{parent_dir}/wgssomatic_data"
     return [
         TTestCase(
             name="basic",
             input={
                 "normal_bam": f"{somatic_data}/NA24385-BRCA1.markduped.recalibrated.bam",
                 "tumor_bam": f"{somatic_data}/NA12878-NA24385-mixture.markduped.recalibrated.bam",
                 "reference": f"{germline_data}/Homo_sapiens_assembly38.chr17.fasta",
                 "gnomad": f"{somatic_data}/af-only-gnomad.hg38.BRCA1.vcf.gz",
                 "intervals": f"{germline_data}/BRCA1.hg38.bed",
                 "normal_name": "NA24385-BRCA1",
                 "filterpass_removeFileteredAll": True,
                 "filterpass_recode": True,
                 "filterpass_recodeINFOAll": True,
                 "output_bam_name": "mutect2.bam",
             },
             output=Vcf.basic_test(
                 "out",
                 33000,
                 147,
                 ["GATKCommandLine"],
                 "c083775bc8c49397fb65ec12cd435688",
             )
             + VcfTabix.basic_test(
                 "variants",
                 13000,
                 260,
                 182,
                 ["GATKCommandLine"],
                 "6cfd70dda8599a270978868166ab6545",
             )
             + BamBai.basic_test(
                 "out_bam",
                 813200,
                 21200,
                 f"{somatic_data}/somatic_variant_caller.flagstat",
             ),
         ),
     ]
Esempio n. 8
0
 def tests(self):
     remote_dir = "https://swift.rc.nectar.org.au/v1/AUTH_4df6e734a509497692be237549bbe9af/janis-test-data/bioinformatics/wgsgermline_data"
     return [
         TTestCase(
             name="basic",
             input={
                 "javaOptions": ["-Xmx6G"],
                 "vcfs": [
                     f"{remote_dir}/NA12878-BRCA1.norm.vcf",
                 ],
             },
             output=Vcf.basic_test(
                 "out",
                 51615,
                 221,
                 ["GATKCommandLine"],
                 "b7acb0a9900713cc7da7aeed5160c971",
             ),
         )
     ]
Esempio n. 9
0
 def tests(self):
     remote_dir = "https://swift.rc.nectar.org.au/v1/AUTH_4df6e734a509497692be237549bbe9af/janis-test-data/bioinformatics/wgsgermline_data"
     return [
         TTestCase(
             name="basic",
             input={
                 "inputVcf":
                 f"{remote_dir}/NA12878-BRCA1.sorted.uncompressed.stdout",
                 "mpileup": f"{remote_dir}/NA12878-BRCA1.mpileup.stdout",
                 "type": "germline",
             },
             output=Vcf.basic_test(
                 "out",
                 69225,
                 230,
                 ["GATKCommandLine"],
                 "db09c6c37c52771bd058e32d5c6b94c1",
             ),
         )
     ]
Esempio n. 10
0
 def tests(self):
     return [
         TTestCase(
             name="basic",
             input={
                 "bam":
                 os.path.join(
                     BioinformaticsTool.test_data_path(),
                     "wgsgermline_data",
                     "NA12878-BRCA1.recalibrated.bam",
                 ),
                 "intervals":
                 os.path.join(
                     BioinformaticsTool.test_data_path(),
                     "wgsgermline_data",
                     "BRCA1.hg38.bed",
                 ),
                 "reference":
                 os.path.join(
                     BioinformaticsTool.test_data_path(),
                     "wgsgermline_data",
                     "Homo_sapiens_assembly38.chr17.fasta",
                 ),
                 "snps_dbsnp":
                 os.path.join(
                     BioinformaticsTool.test_data_path(),
                     "wgsgermline_data",
                     "Homo_sapiens_assembly38.dbsnp138.BRCA1.vcf.gz",
                 ),
                 "haplotype_caller_pairHmmImplementation":
                 "LOGLESS_CACHING",
             },
             output=Vcf.basic_test(
                 "out",
                 51000,
                 221,
                 ["GATKCommandLine"],
                 "5e48624cb5ef379a7d6d39cec44bc856",
             ),
         )
     ]
Esempio n. 11
0
 def tests(self):
     parent_dir = "https://swift.rc.nectar.org.au/v1/AUTH_4df6e734a509497692be237549bbe9af/janis-test-data/bioinformatics"
     somatic_data = f"{parent_dir}/wgssomatic_data"
     return [
         TTestCase(
             name="basic",
             input={
                 "vcf": f"{somatic_data}/stdout.norm.vcf",
                 "removeFileteredAll": True,
                 "recode": True,
                 "recodeINFOAll": True,
             },
             output=Vcf.basic_test(
                 "out",
                 34393,
                 147,
                 ["GATKCommandLine"],
                 "c083775bc8c49397fb65ec12cd435688",
             ),
         ),
     ]
Esempio n. 12
0
 def tests(self):
     return [
         TTestCase(
             name="basic",
             input={
                 "javaOptions": ["-Xmx6G"],
                 "vcfs": [
                     os.path.join(
                         BioinformaticsTool.test_data_path(),
                         "wgsgermline_data",
                         "NA12878-BRCA1.norm.vcf",
                     )
                 ],
             },
             output=Vcf.basic_test(
                 "out",
                 51615,
                 221,
                 ["GATKCommandLine"],
                 "b7acb0a9900713cc7da7aeed5160c971",
             ),
         )
     ]
Esempio n. 13
0
 def tests(self):
     remote_dir = "https://swift.rc.nectar.org.au/v1/AUTH_4df6e734a509497692be237549bbe9af/janis-test-data/bioinformatics/wgsgermline_data"
     return [
         TTestCase(
             name="basic",
             input={
                 "bam": f"{remote_dir}/NA12878-BRCA1.recalibrated.bam",
                 "intervals": f"{remote_dir}/BRCA1.hg38.bed",
                 "reference":
                 f"{remote_dir}/Homo_sapiens_assembly38.chr17.fasta",
                 "snps_dbsnp":
                 f"{remote_dir}/Homo_sapiens_assembly38.dbsnp138.BRCA1.vcf.gz",
                 "haplotype_caller_pairHmmImplementation":
                 "LOGLESS_CACHING",
             },
             output=Vcf.basic_test(
                 "out",
                 51000,
                 221,
                 ["GATKCommandLine"],
                 "5e48624cb5ef379a7d6d39cec44bc856",
             ),
         )
     ]
Esempio n. 14
0
    def tests(self) -> Optional[List[TTestCase]]:
        bioinf_base = "https://swift.rc.nectar.org.au/v1/AUTH_4df6e734a509497692be237549bbe9af/janis-test-data/bioinformatics"
        chr17 = f"{bioinf_base}/petermac_testdata"

        return [
            TTestCase(
                name="basic",
                input={
                    "normal_inputs": [[
                        f"{chr17}/NA24385-BRCA1_R1.fastq.gz",
                        f"{chr17}/NA24385-BRCA1_R21.fastq.gz",
                    ]],
                    "normal_name":
                    "NA24385-BRCA1",
                    "tumor_inputs": [[
                        f"{chr17}/NA12878-NA24385-mixture-BRCA1_R1.fastq.gz",
                        f"{chr17}/NA12878-NA24385-mixture-BRCA1_R2.fastq.gz",
                    ]],
                    "tumor_name":
                    "NA12878-NA24385-mixture",
                    "reference":
                    f"{chr17}/Homo_sapiens_assembly38.chr17.fasta",
                    "gridss_blacklist":
                    f"{chr17}/consensusBlacklist.hg38.chr17.bed",
                    "gnomad":
                    f"{chr17}/af-only-gnomad.hg38.BRCA1.vcf.gz",
                    "gatk_intervals": [f"{chr17}/BRCA1.hg38.bed"],
                    "known_indels":
                    f"{chr17}/Homo_sapiens_assembly38.known_indels.BRCA1.vcf.gz",
                    "mills_indels":
                    f"{chr17}/Mills_and_1000G_gold_standard.indels.hg38.BRCA1.vcf.gz",
                    "snps_1000gp":
                    f"{chr17}/1000G_phase1.snps.high_confidence.hg38.BRCA1.vcf.gz",
                    "snps_dbsnp":
                    f"{chr17}/Homo_sapiens_assembly38.dbsnp138.BRCA1.vcf.gz",
                    "cutadapt_adapters":
                    f"{chr17}/contaminant_list.txt",
                },
                output=BamBai.basic_test("out_normal_bam", 3265300, 49500) +
                BamBai.basic_test("out_tumor_bam", 3341700, 49000) +
                TextFile.basic_test(
                    "out_normal_performance_summary",
                    950,
                    md5="e3205735e5fe8c900f05050f8ed73f19",
                ) + TextFile.basic_test(
                    "out_tumor_performance_summary",
                    950,
                    md5="122bfa2ece90c0f030015feba4ba7d84",
                ) + CompressedVcf.basic_test(
                    "out_variants_gatk",
                    9040,
                    147,
                    ["GATKCommandLine"],
                    "a2e4f96c451754ef8cba80494ed98a70",
                ) + Vcf.basic_test(
                    "out_variants",
                    44090,
                    156,
                    ["GATKCommandLine"],
                    "5fc0e861893e0a23f974808265a6917e",
                ) + Array.array_wrapper([
                    Vcf.basic_test(
                        "out_variants_split",
                        34390,
                        147,
                        ["GATKCommandLine"],
                        "c083775bc8c49397fb65ec12cd435688",
                    )
                ]) + FastqGzPair.basic_test("out_normal_fastqc_reports",
                                            441500, 439800) +
                FastqGzPair.basic_test("out_tumor_fastqc_reports", 434900,
                                       440000),
            )
        ]