def tests(self): remote_dir = "https://swift.rc.nectar.org.au/v1/AUTH_4df6e734a509497692be237549bbe9af/janis-test-data/bioinformatics/wgsgermline_data" return [ TTestCase( name="basic", input={ "reads": [ f"{remote_dir}/NA12878-BRCA1_R1.fastq.gz", f"{remote_dir}/NA12878-BRCA1_R2.fastq.gz", ], "threads": 1, }, output=FastqGzPair.basic_test("out", 824000, 408000, 416000) + Array.array_wrapper([TextFile.basic_test( "datafile", 81000, )]), ), TTestCase( name="minimal", input={ "reads": [ f"{remote_dir}/NA12878-BRCA1_R1.fastq.gz", f"{remote_dir}/NA12878-BRCA1_R2.fastq.gz", ], "threads": 1, }, output=self.minimal_test(), ), ]
def tests(self): remote_dir = "https://swift.rc.nectar.org.au/v1/AUTH_4df6e734a509497692be237549bbe9af/janis-test-data/bioinformatics/wgsgermline_data" return [ TTestCase( name="basic", input={ "bam": [f"{remote_dir}/NA12878-BRCA1.merged.bam"], "javaOptions": ["-Xmx6G"], "maxRecordsInRam": 5000000, "createIndex": True, "tmpDir": "./tmp", }, output=BamBai.basic_test( "out", 2829000, 3780, f"{remote_dir}/NA12878-BRCA1.markduped.bam.flagstat", ) + TextFile.basic_test( "metrics", 3700, "NA12878-BRCA1\t193\t9468\t164\t193\t46\t7\t1\t0.003137\t7465518", 112, ), ) ]
def tests(self): remote_dir = "https://swift.rc.nectar.org.au/v1/AUTH_4df6e734a509497692be237549bbe9af/janis-test-data/bioinformatics/wgsgermline_data" return [ TTestCase( name="basic", input={ "positions": f"{remote_dir}/NA12878-BRCA1.sorted.uncompressed.stdout", "reference": f"{remote_dir}/Homo_sapiens_assembly38.chr17.fasta", "bam": f"{remote_dir}/NA12878-BRCA1.markduped.bam", "countOrphans": True, "noBAQ": True, "maxDepth": 10000, "minBQ": 0, }, output=TextFile.basic_test( "out", 19900, "chr17\t43044391\tG\t19\tA,A,,A.a,,A,,A..,,a\tDJCJ:FHDDBJBBJJIDDB", 187, "53c3e03c20730ff45411087444379b1b", ), ) ]
def tests(self): # The first 5 lines of the file include headers that change with every run (time, etc) with open( os.path.join( BioinformaticsTool.test_data_path(), "wgsgermline_data", "NA12878-BRCA1.markduped.metrics.txt", ), "r", ) as f: for i in range(5): next(f) expected_content = f.read() return [ TTestCase( name="basic", input={ "bam": os.path.join( BioinformaticsTool.test_data_path(), "wgsgermline_data", "NA12878-BRCA1.markduped.bam", ), "javaOptions": ["-Xmx6G"], }, output=TextFile.basic_test("out", 7260, expected_content, 905) + [ TTestExpectedOutput( tag="outHistogram", preprocessor=TTestPreprocessor.FileSize, operator=operator.ge, expected_value=15600, ), ], ) ]
def tests(self): return [ TTestCase( name="basic", input={ "bam": [ os.path.join( BioinformaticsTool.test_data_path(), "wgsgermline_data", "NA12878-BRCA1.merged.bam", ) ], "javaOptions": ["-Xmx6G"], "maxRecordsInRam": 5000000, "createIndex": True, "tmpDir": "./tmp", }, output=BamBai.basic_test( "out", 2829000, 3780, os.path.join( BioinformaticsTool.test_data_path(), "wgsgermline_data", "NA12878-BRCA1.markduped.bam.flagstat", ), ) + TextFile.basic_test( "metrics", 3700, "NA12878-BRCA1\t193\t9468\t164\t193\t46\t7\t1\t0.003137\t7465518", 112, ), ) ]
def tests(self): return [ TTestCase( name="basic", input={ "reference": os.path.join( BioinformaticsTool.test_data_path(), "wgsgermline_data", "Homo_sapiens_assembly38.chr17.fasta", ), }, output=TextFile.basic_test("out", 15, "chr17\t83257441\n", 1), ), TTestCase( name="minimal", input={ "reference": os.path.join( BioinformaticsTool.test_data_path(), "wgsgermline_data", "Homo_sapiens_assembly38.chr17.fasta", ), }, output=self.minimal_test(), ), ]
def tests(self): remote_dir = "https://swift.rc.nectar.org.au/v1/AUTH_4df6e734a509497692be237549bbe9af/janis-test-data/bioinformatics/wgsgermline_data" # The first 5 lines of the file include headers that change with every run (time, etc) with open( os.path.join( BioinformaticsTool.test_data_path(), "NA12878-BRCA1.markduped.metrics.txt", ), "r", ) as f: for i in range(5): next(f) expected_content = f.read() return [ TTestCase( name="basic", input={ "bam": f"{remote_dir}/NA12878-BRCA1.markduped.bam", "javaOptions": ["-Xmx6G"], }, output=TextFile.basic_test("out", 7260, expected_content, 905) + [ TTestExpectedOutput( tag="outHistogram", preprocessor=TTestPreprocessor.FileSize, operator=operator.ge, expected_value=15600, ), ], ) ]
def tests(self): remote_dir = "https://swift.rc.nectar.org.au/v1/AUTH_4df6e734a509497692be237549bbe9af/janis-test-data/bioinformatics/wgsgermline_data" return [ TTestCase( name="basic", input={ "flagstat": f"{remote_dir}/NA12878-BRCA1.markduped.bam.flagstat", "collectInsertSizeMetrics": f"{remote_dir}/NA12878-BRCA1.markduped.metrics.txt", "coverage": f"{remote_dir}/NA12878-BRCA1.genomeCoverageBed.stdout", "rmdupFlagstat": f"{remote_dir}/NA12878-BRCA1.markduped.bam.bam.flagstat", "genome": True, }, output=TextFile.basic_test( tag="out", min_size=948, line_count=2, md5="575354942cfb8d0367725f9020181443", expected_file_path= f"{remote_dir}/NA12878-BRCA1_performance_summary.csv", ), ) ]
def tests(self): return [ TTestCase( name="basic", input={ "inputBam": os.path.join( BioinformaticsTool.test_data_path(), "wgsgermline_data", "NA12878-BRCA1.markduped.bam.bam", ), "genome": os.path.join( BioinformaticsTool.test_data_path(), "wgsgermline_data", "NA12878-BRCA1.genome_file.txt", ), }, output=TextFile.basic_test( "out", 7432, "chr17\t0\t83144233\t83257441\t0.99864", 220, "f2007353bbd18f0a04eae9499d7c6a91", ), ) ]
def tests(self): return [ TTestCase( name="basic", input={ "positions": os.path.join( BioinformaticsTool.test_data_path(), "wgsgermline_data", "NA12878-BRCA1.sorted.uncompressed.stdout", ), "reference": os.path.join( BioinformaticsTool.test_data_path(), "wgsgermline_data", "Homo_sapiens_assembly38.chr17.fasta", ), "bam": os.path.join( BioinformaticsTool.test_data_path(), "wgsgermline_data", "NA12878-BRCA1.markduped.bam", ), "countOrphans": True, "noBAQ": True, "maxDepth": 10000, "minBQ": 0, }, output=TextFile.basic_test( "out", 19900, "chr17\t43044391\tG\t19\tA,A,,A.a,,A,,A..,,a\tDJCJ:FHDDBJBBJJIDDB", 187, "53c3e03c20730ff45411087444379b1b", ), ) ]
def tests(self): parent_dir = "https://swift.rc.nectar.org.au/v1/AUTH_4df6e734a509497692be237549bbe9af/janis-test-data/bioinformatics" somatic_data = f"{parent_dir}/wgssomatic_data" return [ TTestCase( name="basic", input={ "javaOptions": ["-Xmx6G"], "pileupTable": f"{somatic_data}/generated.txt", "segmentationFileOut": "generated.txt.mutect2_segments", }, output=TextFile.basic_test( "contOut", 59, "sample\tcontamination\terror\nNA12878-NA24385-mixture\t0.0\t0.0", ) + TextFile.basic_test( "segOut", 125, "contig\tstart\tend\tminor_allele_fraction\nchr17\t43045941\t43098543\t0.28541019662496847", ), ), ]
def tests(self): return [ TTestCase( name="basic", input={ "bam": os.path.join( BioinformaticsTool.test_data_path(), "wgsgermline_data", "NA12878-BRCA1.markduped.bam", ), "reference": os.path.join( BioinformaticsTool.test_data_path(), "wgsgermline_data", "Homo_sapiens_assembly38.chr17.fasta", ), "knownSites": [ os.path.join( BioinformaticsTool.test_data_path(), "wgsgermline_data", "Homo_sapiens_assembly38.known_indels.BRCA1.vcf.gz", ), os.path.join( BioinformaticsTool.test_data_path(), "wgsgermline_data", "Homo_sapiens_assembly38.dbsnp138.BRCA1.vcf.gz", ), os.path.join( BioinformaticsTool.test_data_path(), "wgsgermline_data", "Mills_and_1000G_gold_standard.indels.hg38.BRCA1.vcf.gz", ), os.path.join( BioinformaticsTool.test_data_path(), "wgsgermline_data", "1000G_phase1.snps.high_confidence.hg38.BRCA1.vcf.gz", ), ], "intervals": os.path.join( BioinformaticsTool.test_data_path(), "wgsgermline_data", "BRCA1.hg38.bed", ), "javaOptions": ["-Xmx12G"], }, output=TextFile.basic_test( "out", 1131758, "#:GATKReport.v1.1:5", 10376 ), ) ]
def tests(self): remote_dir = "https://swift.rc.nectar.org.au/v1/AUTH_4df6e734a509497692be237549bbe9af/janis-test-data/bioinformatics/wgsgermline_data" return [ TTestCase( name="basic", input={ "bam": f"{remote_dir}/NA12878-BRCA1.markduped.bam", }, output=TextFile.basic_test( "out", 410, "19486 + 0 in total (QC-passed reads + QC-failed reads)", 13, "ddbcfe52e60b925d222fb8bc1517a7a0", ), ) ]
def tests(self): remote_dir = "https://swift.rc.nectar.org.au/v1/AUTH_4df6e734a509497692be237549bbe9af/janis-test-data/bioinformatics/wgsgermline_data" return [ TTestCase( name="basic", input={ "inputBam": f"{remote_dir}/NA12878-BRCA1.markduped.bam.bam", "genome": f"{remote_dir}/NA12878-BRCA1.genome_file.txt", }, output=TextFile.basic_test( "out", 7432, "chr17\t0\t83144233\t83257441\t0.99864", 220, "f2007353bbd18f0a04eae9499d7c6a91", ), ) ]
def tests(self): remote_dir = "https://swift.rc.nectar.org.au/v1/AUTH_4df6e734a509497692be237549bbe9af/janis-test-data/bioinformatics/wgsgermline_data" return [ TTestCase( name="basic", input={ "reference": f"{remote_dir}/Homo_sapiens_assembly38.chr17.fasta", }, output=TextFile.basic_test("out", 15, "chr17\t83257441\n", 1), ), TTestCase( name="minimal", input={ "reference": f"{remote_dir}/Homo_sapiens_assembly38.chr17.fasta", }, output=self.minimal_test(), ), ]
def tests(self): return [ TTestCase( name="basic", input={ "bam": os.path.join( BioinformaticsTool.test_data_path(), "wgsgermline_data", "NA12878-BRCA1.markduped.bam", ), }, output=TextFile.basic_test( "out", 410, "19486 + 0 in total (QC-passed reads + QC-failed reads)", 13, "ddbcfe52e60b925d222fb8bc1517a7a0", ), ) ]
def tests(self): return [ TTestCase( name="basic", input={ "flagstat": os.path.join( BioinformaticsTool.test_data_path(), "wgsgermline_data", "NA12878-BRCA1.markduped.bam.flagstat", ), "collectInsertSizeMetrics": os.path.join( BioinformaticsTool.test_data_path(), "wgsgermline_data", "NA12878-BRCA1.markduped.metrics.txt", ), "coverage": os.path.join( BioinformaticsTool.test_data_path(), "wgsgermline_data", "NA12878-BRCA1.genomeCoverageBed.stdout", ), "rmdupFlagstat": os.path.join( BioinformaticsTool.test_data_path(), "wgsgermline_data", "NA12878-BRCA1.markduped.bam.bam.flagstat", ), "genome": True, }, output=TextFile.basic_test( tag="out", min_size=948, line_count=2, md5="575354942cfb8d0367725f9020181443", expected_file_path=os.path.join( BioinformaticsTool.test_data_path(), "wgsgermline_data", "NA12878-BRCA1_performance_summary.csv", ), ), ) ]
def tests(self): parent_dir = "https://swift.rc.nectar.org.au/v1/AUTH_4df6e734a509497692be237549bbe9af/janis-test-data/bioinformatics" germline_data = f"{parent_dir}/wgsgermline_data" somatic_data = f"{parent_dir}/wgssomatic_data" return [ TTestCase( name="basic", input={ "javaOptions": ["-Xmx48G"], "bam": [ f"{somatic_data}/NA12878-NA24385-mixture.markduped.recalibrated.bam" ], "sites": f"{somatic_data}/af-only-gnomad.hg38.BRCA1.vcf.gz", "intervals": f"{germline_data}/BRCA1.hg38.bed", }, output=TextFile.basic_test( "out", 2592, md5="54672b8b13d46aaef25c56351c82a3f4", ), ), ]
def tests(self): remote_dir = "https://swift.rc.nectar.org.au/v1/AUTH_4df6e734a509497692be237549bbe9af/janis-test-data/bioinformatics/wgsgermline_data" return [ TTestCase( name="basic", input={ "bam": f"{remote_dir}/NA12878-BRCA1.markduped.bam", "reference": f"{remote_dir}/Homo_sapiens_assembly38.chr17.fasta", "knownSites": [ f"{remote_dir}/Homo_sapiens_assembly38.known_indels.BRCA1.vcf.gz", f"{remote_dir}/Homo_sapiens_assembly38.dbsnp138.BRCA1.vcf.gz", f"{remote_dir}/Mills_and_1000G_gold_standard.indels.hg38.BRCA1.vcf.gz", f"{remote_dir}/1000G_phase1.snps.high_confidence.hg38.BRCA1.vcf.gz", ], "intervals": f"{remote_dir}/BRCA1.hg38.bed", "javaOptions": ["-Xmx12G"], }, output=TextFile.basic_test("out", 1131758, "#:GATKReport.v1.1:5", 10376), ) ]