def tests(self) -> Optional[List[TTestCase]]: parent_dir = "https://swift.rc.nectar.org.au/v1/AUTH_4df6e734a509497692be237549bbe9af/janis-test-data/bioinformatics" germline_data = f"{parent_dir}/wgsgermline_data" somatic_data = f"{parent_dir}/wgssomatic_data" return [ TTestCase( name="basic", input={ "normal_inputs": [[ f"{somatic_data}/NA24385-BRCA1_R1.fastq.gz", f"{somatic_data}/NA24385-BRCA1_R21.fastq.gz", ]], "normal_name": "NA24385-BRCA1", "tumor_inputs": [[ f"{somatic_data}/NA12878-NA24385-mixture-BRCA1_R1.fastq.gz", f"{somatic_data}/NA12878-NA24385-mixture-BRCA1_R2.fastq.gz", ]], "tumor_name": "NA12878-NA24385-mixture", "reference": f"{germline_data}/Homo_sapiens_assembly38.chr17.fasta", "gridss_blacklist": f"{somatic_data}/consensusBlacklist.hg38.chr17.bed", "gnomad": f"{somatic_data}/af-only-gnomad.hg38.BRCA1.vcf.gz", "gatk_intervals": [f"{germline_data}/BRCA1.hg38.bed"], "known_indels": f"{germline_data}/Homo_sapiens_assembly38.known_indels.BRCA1.vcf.gz", "mills_indels": f"{germline_data}/Mills_and_1000G_gold_standard.indels.hg38.BRCA1.vcf.gz", "snps_1000gp": f"{germline_data}/1000G_phase1.snps.high_confidence.hg38.BRCA1.vcf.gz", "snps_dbsnp": f"{germline_data}/Homo_sapiens_assembly38.dbsnp138.BRCA1.vcf.gz", "cutadapt_adapters": f"{germline_data}/contaminant_list.txt", }, output=BamBai.basic_test("out_normal_bam", 3265300, 49500) + BamBai.basic_test("out_tumor_bam", 3341700, 49000) + TextFile.basic_test( "out_normal_performance_summary", 950, md5="e3205735e5fe8c900f05050f8ed73f19", ) + TextFile.basic_test( "out_tumor_performance_summary", 950, md5="122bfa2ece90c0f030015feba4ba7d84", ) + FastqGzPair.basic_test("out_normal_fastqc_reports", 881300) + FastqGzPair.basic_test("out_tumor_fastqc_reports", 874900), ) ]
def tests(self): remote_dir = "https://swift.rc.nectar.org.au/v1/AUTH_4df6e734a509497692be237549bbe9af/janis-test-data/bioinformatics/wgsgermline_data" return [ TTestCase( name="basic", input={ "bam": [f"{remote_dir}/NA12878-BRCA1.merged.bam"], "javaOptions": ["-Xmx6G"], "maxRecordsInRam": 5000000, "createIndex": True, "tmpDir": "./tmp", }, output=BamBai.basic_test( "out", 2829000, 3780, f"{remote_dir}/NA12878-BRCA1.markduped.bam.flagstat", ) + TextFile.basic_test( "metrics", 3700, "NA12878-BRCA1\t193\t9468\t164\t193\t46\t7\t1\t0.003137\t7465518", 112, ), ) ]
def tests(self): remote_dir = "https://swift.rc.nectar.org.au/v1/AUTH_4df6e734a509497692be237549bbe9af/janis-test-data/bioinformatics/wgsgermline_data" return [ TTestCase( name="basic", input={ "bam": f"{remote_dir}/NA12878-BRCA1.markduped.bam", "reference": f"{remote_dir}/Homo_sapiens_assembly38.chr17.fasta", "recalFile": f"{remote_dir}/NA12878-BRCA1.markduped.table", "intervals": f"{remote_dir}/BRCA1.hg38.bed", }, output=BamBai.basic_test( "out", 2600000, 21000, f"{remote_dir}/NA12878-BRCA1.recalibrated.flagstat", ), ), TTestCase( name="minimal", input={ "bam": f"{remote_dir}/NA12878-BRCA1.markduped.bam", "reference": f"{remote_dir}/Homo_sapiens_assembly38.chr17.fasta", "recalFile": f"{remote_dir}/NA12878-BRCA1.markduped.table", "intervals": f"{remote_dir}/BRCA1.hg38.bed", }, output=self.minimal_test(), ), ]
def tests(self): remote_dir = "https://swift.rc.nectar.org.au/v1/AUTH_4df6e734a509497692be237549bbe9af/janis-test-data/bioinformatics/wgsgermline_data" return [ TTestCase( name="basic", input={ "bams": [ f"{remote_dir}/NA12878-BRCA1.sorted.bam", ], "createIndex": True, "validationStringency": "SILENT", "javaOptions": ["-Xmx6G"], "maxRecordsInRam": 5000000, "tmpDir": "./tmp", "useThreading": True, }, output=BamBai.basic_test( "out", 2826968, 49688, f"{remote_dir}/NA12878-BRCA1.bam.flagstat", "963a51f7feed5b829319b947961b8a3e", "231c10d0e43766170f5a7cd1b8a6d14e", ), ) ]
def tests(self): return [ TTestCase( name="basic", input={ "bam": [ os.path.join( BioinformaticsTool.test_data_path(), "wgsgermline_data", "NA12878-BRCA1.merged.bam", ) ], "javaOptions": ["-Xmx6G"], "maxRecordsInRam": 5000000, "createIndex": True, "tmpDir": "./tmp", }, output=BamBai.basic_test( "out", 2829000, 3780, os.path.join( BioinformaticsTool.test_data_path(), "wgsgermline_data", "NA12878-BRCA1.markduped.bam.flagstat", ), ) + TextFile.basic_test( "metrics", 3700, "NA12878-BRCA1\t193\t9468\t164\t193\t46\t7\t1\t0.003137\t7465518", 112, ), ) ]
def tests(self): remote_dir = "https://swift.rc.nectar.org.au/v1/AUTH_4df6e734a509497692be237549bbe9af/janis-test-data/bioinformatics/wgsgermline_data" return [ TTestCase( name="basic", input={ "bams": [ f"{remote_dir}/NA12878-BRCA1.sorted.bam", ], "maxRecordsInRam": 5000000, "createIndex": True, "mergeSamFiles_useThreading": True, "mergeSamFiles_validationStringency": "SILENT", }, output=BamBai.basic_test( "out", 2829000, 3780, f"{remote_dir}/NA12878-BRCA1.markduped.bam.flagstat", ), ), TTestCase( name="minimal", input={ "bams": [ f"{remote_dir}/NA12878-BRCA1.sorted.bam", ], "maxRecordsInRam": 5000000, "createIndex": True, "mergeSamFiles_useThreading": True, "mergeSamFiles_validationStringency": "SILENT", }, output=self.minimal_test(), ), ]
def tests(self): return [ TTestCase( name="basic", input={ "bams": [ os.path.join( BioinformaticsTool.test_data_path(), "wgsgermline_data", "NA12878-BRCA1.sorted.bam", ) ], "createIndex": True, "validationStringency": "SILENT", "javaOptions": ["-Xmx6G"], "maxRecordsInRam": 5000000, "tmpDir": "./tmp", "useThreading": True, }, output=BamBai.basic_test( "out", 2826968, 49688, os.path.join( BioinformaticsTool.test_data_path(), "wgsgermline_data", "NA12878-BRCA1.bam.flagstat", ), "963a51f7feed5b829319b947961b8a3e", "231c10d0e43766170f5a7cd1b8a6d14e", ), ) ]
def tests(self): return [ TTestCase( name="basic", input={ "bam": os.path.join( BioinformaticsTool.test_data_path(), "wgsgermline_data", "NA12878-BRCA1.recalibrated.bam", ), "intervals": os.path.join( BioinformaticsTool.test_data_path(), "wgsgermline_data", "BRCA1.hg38.bed", ), "javaOptions": ["-Xmx3G"], "outputFilename": ".", }, output=BamBai.basic_test( "out", 2600900, 21300, os.path.join( BioinformaticsTool.test_data_path(), "wgsgermline_data", "NA12878-BRCA1.split.flagstat", ), ), ) ]
def tests(self): remote_dir = "https://swift.rc.nectar.org.au/v1/AUTH_4df6e734a509497692be237549bbe9af/janis-test-data/bioinformatics/wgsgermline_data" return [ TTestCase( name="basic", input={ "inputRead": f"{remote_dir}/NA12878-BRCA1.split.bam", "reference": f"{remote_dir}/Homo_sapiens_assembly38.chr17.fasta", "intervals": f"{remote_dir}/BRCA1.hg38.bed", "dbsnp": f"{remote_dir}/Homo_sapiens_assembly38.dbsnp138.BRCA1.vcf.gz", "javaOptions": ["-Xmx6G"], "pairHmmImplementation": "LOGLESS_CACHING", }, output=VcfTabix.basic_test( "out", 12800, 270, 214, ["GATKCommandLine"], "0224e24e5fc27286ee90c8d3c63373a7", ) + BamBai.basic_test( "bam", 596698, 21272, f"{remote_dir}/NA12878-BRCA1.haplotyped.flagstat", "d83b4c0d8eab24a3be1cc6af4f827753", "b4bb4028b8679a3a635e3ad87126a097", ), ) ]
def tests(self): return [ TTestCase( name="basic", input={ "inputRead": os.path.join( BioinformaticsTool.test_data_path(), "wgsgermline_data", "NA12878-BRCA1.split.bam", ), "reference": os.path.join( BioinformaticsTool.test_data_path(), "wgsgermline_data", "Homo_sapiens_assembly38.chr17.fasta", ), "intervals": os.path.join( BioinformaticsTool.test_data_path(), "wgsgermline_data", "BRCA1.hg38.bed", ), "dbsnp": os.path.join( BioinformaticsTool.test_data_path(), "wgsgermline_data", "Homo_sapiens_assembly38.dbsnp138.BRCA1.vcf.gz", ), "javaOptions": ["-Xmx6G"], "pairHmmImplementation": "LOGLESS_CACHING", }, output=VcfTabix.basic_test( "out", 12800, 270, 214, ["GATKCommandLine"], "0224e24e5fc27286ee90c8d3c63373a7", ) + BamBai.basic_test( "bam", 596698, 21272, os.path.join( BioinformaticsTool.test_data_path(), "wgsgermline_data", "NA12878-BRCA1.haplotyped.flagstat", ), "d83b4c0d8eab24a3be1cc6af4f827753", "b4bb4028b8679a3a635e3ad87126a097", ), ) ]
def tests(self): return [ TTestCase( name="basic", input={ "bams": [ os.path.join( BioinformaticsTool.test_data_path(), "wgsgermline_data", "NA12878-BRCA1.sorted.bam", ) ], "maxRecordsInRam": 5000000, "createIndex": True, "mergeSamFiles_useThreading": True, "mergeSamFiles_validationStringency": "SILENT", }, output=BamBai.basic_test( "out", 2829000, 3780, os.path.join( BioinformaticsTool.test_data_path(), "wgsgermline_data", "NA12878-BRCA1.markduped.bam.flagstat", ), ), ), TTestCase( name="minimal", input={ "bams": [ os.path.join( BioinformaticsTool.test_data_path(), "wgsgermline_data", "NA12878-BRCA1.sorted.bam", ) ], "maxRecordsInRam": 5000000, "createIndex": True, "mergeSamFiles_useThreading": True, "mergeSamFiles_validationStringency": "SILENT", }, output=self.minimal_test(), ), ]
def tests(self): return [ TTestCase( name="basic", input={ "bam": os.path.join( BioinformaticsTool.test_data_path(), "wgsgermline_data", "NA12878-BRCA1.markduped.bam", ), "reference": os.path.join( BioinformaticsTool.test_data_path(), "wgsgermline_data", "Homo_sapiens_assembly38.chr17.fasta", ), "snps_dbsnp": os.path.join( BioinformaticsTool.test_data_path(), "wgsgermline_data", "Homo_sapiens_assembly38.dbsnp138.BRCA1.vcf.gz", ), "snps_1000gp": os.path.join( BioinformaticsTool.test_data_path(), "wgsgermline_data", "1000G_phase1.snps.high_confidence.hg38.BRCA1.vcf.gz", ), "known_indels": os.path.join( BioinformaticsTool.test_data_path(), "wgsgermline_data", "Homo_sapiens_assembly38.known_indels.BRCA1.vcf.gz", ), "mills_indels": os.path.join( BioinformaticsTool.test_data_path(), "wgsgermline_data", "Mills_and_1000G_gold_standard.indels.hg38.BRCA1.vcf.gz", ), "intervals": os.path.join( BioinformaticsTool.test_data_path(), "wgsgermline_data", "BRCA1.hg38.bed", ), }, output=BamBai.basic_test( "out", 2600000, 21000, os.path.join( BioinformaticsTool.test_data_path(), "wgsgermline_data", "NA12878-BRCA1.recalibrated.flagstat", ), ), ) ]
def tests(self): return [ TTestCase( name="basic", input={ "bam": os.path.join( BioinformaticsTool.test_data_path(), "wgsgermline_data", "NA12878-BRCA1.bam", ), "sortOrder": "coordinate", "createIndex": True, "maxRecordsInRam": 5000000, "tmpDir": "./tmp", "validationStringency": "SILENT", "javaOptions": ["-Xmx6G"], }, output=BamBai.basic_test( "out", 2826980, 49688, os.path.join( BioinformaticsTool.test_data_path(), "wgsgermline_data", "NA12878-BRCA1.bam.flagstat", ), "15eb0f8168b42e8ce3ab8b9bc9199e3c", "a9042025f29f7a08e5f56ce8d11469a1", ), ), TTestCase( name="minimal", input={ "bam": os.path.join( BioinformaticsTool.test_data_path(), "wgsgermline_data", "NA12878-BRCA1.bam", ), "sortOrder": "coordinate", "createIndex": True, "maxRecordsInRam": 5000000, "tmpDir": "./tmp", "validationStringency": "SILENT", "javaOptions": ["-Xmx6G"], }, output=self.minimal_test(), ), ]
def tests(self): remote_dir = "https://swift.rc.nectar.org.au/v1/AUTH_4df6e734a509497692be237549bbe9af/janis-test-data/bioinformatics/wgsgermline_data" return [ TTestCase( name="basic", input={ "bam": f"{remote_dir}/NA12878-BRCA1.recalibrated.bam", "intervals": f"{remote_dir}/BRCA1.hg38.bed", "javaOptions": ["-Xmx3G"], "outputFilename": ".", }, output=BamBai.basic_test( "out", 2600900, 21300, f"{remote_dir}/NA12878-BRCA1.split.flagstat", ), ) ]
def tests(self) -> Optional[List[TTestCase]]: bioinf_base = "https://swift.rc.nectar.org.au/v1/AUTH_4df6e734a509497692be237549bbe9af/janis-test-data/bioinformatics" hg38 = f"{bioinf_base}/hg38" chr17 = f"{bioinf_base}/petermac_testdata" return [ TTestCase( name="brca1", input={ "sample_name": "NA12878", "reference": f"{chr17}/Homo_sapiens_assembly38.chr17.fasta", "fastqs": [[ f"{chr17}/NA12878-BRCA1_R1.fastq.gz", f"{chr17}/NA12878-BRCA1_R2.fastq.gz", ]], "gatk_intervals": [f"{chr17}/BRCA1.hg38.bed"], "known_indels": f"{chr17}/Homo_sapiens_assembly38.known_indels.BRCA1.vcf.gz", "mills_indels": f"{chr17}/Mills_and_1000G_gold_standard.indels.hg38.BRCA1.vcf.gz", "snps_1000gp": f"{chr17}/1000G_phase1.snps.high_confidence.hg38.BRCA1.vcf.gz", "snps_dbsnp": f"{chr17}/Homo_sapiens_assembly38.dbsnp138.BRCA1.vcf.gz", "cutadapt_adapters": f"{chr17}/contaminant_list.txt", }, output=Vcf.basic_test("out_variants_bamstats", 51300, 230) + Vcf.basic_test("out_variants_gatk_split", 51300, 221) + BamBai.basic_test("out_bam", 2822000, 49600) + TextFile.basic_test( "out_performance_summary", 948, md5="575354942cfb8d0367725f9020181443", ) + Array.array_wrapper([ ZipFile.basic_test("out_fastqc_reports", 408000), ZipFile.basic_test("out_fastqc_reports", 416000), ]), ) ]
def tests(self): parent_dir = "https://swift.rc.nectar.org.au/v1/AUTH_4df6e734a509497692be237549bbe9af/janis-test-data/bioinformatics" germline_data = f"{parent_dir}/wgsgermline_data" somatic_data = f"{parent_dir}/wgssomatic_data" return [ TTestCase( name="basic", input={ "normal_bam": f"{somatic_data}/NA24385-BRCA1.markduped.recalibrated.bam", "tumor_bam": f"{somatic_data}/NA12878-NA24385-mixture.markduped.recalibrated.bam", "reference": f"{germline_data}/Homo_sapiens_assembly38.chr17.fasta", "gnomad": f"{somatic_data}/af-only-gnomad.hg38.BRCA1.vcf.gz", "intervals": f"{germline_data}/BRCA1.hg38.bed", "normal_name": "NA24385-BRCA1", "filterpass_removeFileteredAll": True, "filterpass_recode": True, "filterpass_recodeINFOAll": True, "output_bam_name": "mutect2.bam", }, output=Vcf.basic_test( "out", 33000, 147, ["GATKCommandLine"], "c083775bc8c49397fb65ec12cd435688", ) + VcfTabix.basic_test( "variants", 13000, 260, 182, ["GATKCommandLine"], "6cfd70dda8599a270978868166ab6545", ) + BamBai.basic_test( "out_bam", 813200, 21200, f"{somatic_data}/somatic_variant_caller.flagstat", ), ), ]
def tests(self): remote_dir = "https://swift.rc.nectar.org.au/v1/AUTH_4df6e734a509497692be237549bbe9af/janis-test-data/bioinformatics/wgsgermline_data" return [ TTestCase( name="basic", input={ "bam": f"{remote_dir}/NA12878-BRCA1.markduped.bam", "reference": f"{remote_dir}/Homo_sapiens_assembly38.chr17.fasta", "snps_dbsnp": f"{remote_dir}/Homo_sapiens_assembly38.dbsnp138.BRCA1.vcf.gz", "snps_1000gp": f"{remote_dir}/1000G_phase1.snps.high_confidence.hg38.BRCA1.vcf.gz", "known_indels": f"{remote_dir}/Homo_sapiens_assembly38.known_indels.BRCA1.vcf.gz", "mills_indels": f"{remote_dir}/Mills_and_1000G_gold_standard.indels.hg38.BRCA1.vcf.gz", "intervals": f"{remote_dir}/BRCA1.hg38.bed", }, output=BamBai.basic_test( "out", 2600000, 21000, f"{remote_dir}/NA12878-BRCA1.recalibrated.flagstat", ), ) ]
def tests(self): remote_dir = "https://swift.rc.nectar.org.au/v1/AUTH_4df6e734a509497692be237549bbe9af/janis-test-data/bioinformatics/wgsgermline_data" return [ TTestCase( name="basic", input={ "bam": f"{remote_dir}/NA12878-BRCA1.bam", "sortOrder": "coordinate", "createIndex": True, "maxRecordsInRam": 5000000, "tmpDir": "./tmp", "validationStringency": "SILENT", "javaOptions": ["-Xmx6G"], }, output=BamBai.basic_test( "out", 2826980, 49688, f"{remote_dir}/NA12878-BRCA1.bam.flagstat", "15eb0f8168b42e8ce3ab8b9bc9199e3c", "a9042025f29f7a08e5f56ce8d11469a1", ), ), TTestCase( name="minimal", input={ "bam": f"{remote_dir}/NA12878-BRCA1.bam", "sortOrder": "coordinate", "createIndex": True, "maxRecordsInRam": 5000000, "tmpDir": "./tmp", "validationStringency": "SILENT", "javaOptions": ["-Xmx6G"], }, output=self.minimal_test(), ), ]
def tests(self): return [ TTestCase( name="basic", input={ "bam": os.path.join( BioinformaticsTool.test_data_path(), "wgsgermline_data", "NA12878-BRCA1.markduped.bam", ), "reference": os.path.join( BioinformaticsTool.test_data_path(), "wgsgermline_data", "Homo_sapiens_assembly38.chr17.fasta", ), "recalFile": os.path.join( BioinformaticsTool.test_data_path(), "wgsgermline_data", "NA12878-BRCA1.markduped.table", ), "intervals": os.path.join( BioinformaticsTool.test_data_path(), "wgsgermline_data", "BRCA1.hg38.bed", ), }, output=BamBai.basic_test( "out", 2600000, 21000, os.path.join( BioinformaticsTool.test_data_path(), "wgsgermline_data", "NA12878-BRCA1.recalibrated.flagstat", ), ), ), TTestCase( name="minimal", input={ "bam": os.path.join( BioinformaticsTool.test_data_path(), "wgsgermline_data", "NA12878-BRCA1.markduped.bam", ), "reference": os.path.join( BioinformaticsTool.test_data_path(), "wgsgermline_data", "Homo_sapiens_assembly38.chr17.fasta", ), "recalFile": os.path.join( BioinformaticsTool.test_data_path(), "wgsgermline_data", "NA12878-BRCA1.markduped.table", ), "intervals": os.path.join( BioinformaticsTool.test_data_path(), "wgsgermline_data", "BRCA1.hg38.bed", ), }, output=self.minimal_test(), ), ]
def tests(self) -> Optional[List[TTestCase]]: bioinf_base = "https://swift.rc.nectar.org.au/v1/AUTH_4df6e734a509497692be237549bbe9af/janis-test-data/bioinformatics" chr17 = f"{bioinf_base}/petermac_testdata" return [ TTestCase( name="basic", input={ "normal_inputs": [[ f"{chr17}/NA24385-BRCA1_R1.fastq.gz", f"{chr17}/NA24385-BRCA1_R21.fastq.gz", ]], "normal_name": "NA24385-BRCA1", "tumor_inputs": [[ f"{chr17}/NA12878-NA24385-mixture-BRCA1_R1.fastq.gz", f"{chr17}/NA12878-NA24385-mixture-BRCA1_R2.fastq.gz", ]], "tumor_name": "NA12878-NA24385-mixture", "reference": f"{chr17}/Homo_sapiens_assembly38.chr17.fasta", "gridss_blacklist": f"{chr17}/consensusBlacklist.hg38.chr17.bed", "gnomad": f"{chr17}/af-only-gnomad.hg38.BRCA1.vcf.gz", "gatk_intervals": [f"{chr17}/BRCA1.hg38.bed"], "known_indels": f"{chr17}/Homo_sapiens_assembly38.known_indels.BRCA1.vcf.gz", "mills_indels": f"{chr17}/Mills_and_1000G_gold_standard.indels.hg38.BRCA1.vcf.gz", "snps_1000gp": f"{chr17}/1000G_phase1.snps.high_confidence.hg38.BRCA1.vcf.gz", "snps_dbsnp": f"{chr17}/Homo_sapiens_assembly38.dbsnp138.BRCA1.vcf.gz", "cutadapt_adapters": f"{chr17}/contaminant_list.txt", }, output=BamBai.basic_test("out_normal_bam", 3265300, 49500) + BamBai.basic_test("out_tumor_bam", 3341700, 49000) + TextFile.basic_test( "out_normal_performance_summary", 950, md5="e3205735e5fe8c900f05050f8ed73f19", ) + TextFile.basic_test( "out_tumor_performance_summary", 950, md5="122bfa2ece90c0f030015feba4ba7d84", ) + CompressedVcf.basic_test( "out_variants_gatk", 9040, 147, ["GATKCommandLine"], "a2e4f96c451754ef8cba80494ed98a70", ) + Vcf.basic_test( "out_variants", 44090, 156, ["GATKCommandLine"], "5fc0e861893e0a23f974808265a6917e", ) + Array.array_wrapper([ Vcf.basic_test( "out_variants_split", 34390, 147, ["GATKCommandLine"], "c083775bc8c49397fb65ec12cd435688", ) ]) + FastqGzPair.basic_test("out_normal_fastqc_reports", 441500, 439800) + FastqGzPair.basic_test("out_tumor_fastqc_reports", 434900, 440000), ) ]