def test_run_worflow_one_maf(self): """ Test that the workflow works correctly when run with a single maf """ self.input = { "is_impact": True, "argos_version_string": "2.x", "analysis_gene_cna_filename": "Proj_08390_G.gene.cna.txt", "analysis_mutations_filename": "Proj_08390_G.muts.maf", "analysis_mutations_share_filename": "Proj_08390_G.muts.share.maf", "analysis_segment_cna_filename": "Proj_08390_G.seg.cna.txt", "analysis_sv_filename": "Proj_08390_G.svs.maf", "helix_filter_version": "20.06.1", "IMPACT_gene_list": { "class": "File", "path": IMPACT_FILE }, "targets_list": { "path": DATA_SETS['Proj_08390_G']["targets_list"], "class": "File" }, "mutation_maf_files": [ { "path": os.path.join(DATA_SETS['Proj_08390_G']['MAF_DIR'], "Sample1.Sample2.muts.maf"), "class": "File" } ], "mutation_svs_maf_files": [ { "path": os.path.join(DATA_SETS['Proj_08390_G']['MAF_DIR'], "Sample1.Sample2.svs.pass.vep.maf"), "class": "File" } ], "facets_hisens_cncf_files": [ { "path": os.path.join(DATA_SETS['Proj_08390_G']['FACETS_DIR'], "Sample2.rg.md.abra.printreads__Sample1.rg.md.abra.printreads_hisens.cncf.txt"), "class": "File" } ], "facets_hisens_seg_files": [ { "path": os.path.join(DATA_SETS['Proj_08390_G']['FACETS_DIR'], "Sample2.rg.md.abra.printreads__Sample1.rg.md.abra.printreads_hisens.seg"), "class": "File" } ], } output_json, output_dir = self.run_cwl() expected_output = { 'analysis_dir': ODir(name='analysis', items=[ OFile(name='Proj_08390_G.gene.cna.txt', size=87905, hash='7cc89d24556de93b9a409812317581e67e5df494'), OFile(name='Proj_08390_G.muts.maf', size=33243, hash='2c8904927a917d6e935ef207582d995680574d16'), OFile(name='Proj_08390_G.muts.share.maf', size=7462, hash='b5af4e0fcd89fecabf8095aa3d7690e5edb8dca1'), OFile(name='Proj_08390_G.seg.cna.txt', size=1632, hash='f0ebb82c34b6530447fa1e70b6dedcc039840d61'), OFile(name='Proj_08390_G.svs.maf', size=23603, hash='df420706bb5b772a79317843c0a01a3c88a9571d')], dir=output_dir) } self.assertCWLDictEqual(output_json, expected_output) self.assertNumMutations(os.path.join(output_dir, 'analysis', 'Proj_08390_G.muts.maf'), 22)
def test_large_maf_file(self): """ Test that a giant maf file with tons of variants gets filtered as expected """ input_maf = os.path.join(DATA_SETS['Proj_08390_G']['MAF_FILTER_DIR'], "Proj_08390_G", "Proj_08390_G.muts.maf") self.input = { "maf_file": { "class": "File", "path": input_maf }, "argos_version_string": "2.x", "is_impact": True, "analysis_mutations_filename": "Proj_08390_G.muts.maf", "cbio_mutation_data_filename": 'data_mutations_extended.txt' } output_json, output_dir = self.run_cwl() expected_output = { 'analysis_mutations_file': OFile(name='Proj_08390_G.muts.maf', size=2386906, hash='4ef341ab4280140f9be15e65a0258a4170ff651d', dir=output_dir), 'cbio_mutation_data_file': OFile(name='data_mutations_extended.txt', size=278458, hash='af36cf815820fdf41f1401578138b5cbd551a217', dir=output_dir), 'rejected_file': OFile(name='rejected.muts.maf', size=1047796463, hash='345953da2c7cb801fa08368260469cf7c153055f', dir=output_dir) } self.assertCWLDictEqual(output_json, expected_output) self.assertNumMutations( expected_output['analysis_mutations_file']['path'], 1662) self.assertNumMutations( expected_output['cbio_mutation_data_file']['path'], 1139) # validate output mutation file contents self.assertCompareMutFiles( expected_output['analysis_mutations_file']['path'], os.path.join(DATA_SETS['Proj_08390_G']['MAF_FILTER_DIR'], "Proj_08390_G", "analyst_file.txt"), muts_only=True, compare_len=True) self.assertCompareMutFiles( expected_output['cbio_mutation_data_file']['path'], os.path.join(DATA_SETS['Proj_08390_G']['MAF_FILTER_DIR'], "Proj_08390_G", "portal_file.txt"), muts_only=True, compare_len=True)
def test_filter_a_maf_file(self): """ Test that a filtered maf file comes out as expected """ input_maf = os.path.join(DATA_SETS['Proj_08390_G']['MAF_DIR'], "Sample1.Sample2.muts.maf") self.assertNumMutations(input_maf, 12514) self.input = { "maf_file": { "class": "File", "path": input_maf }, "argos_version_string": ARGOS_VERSION_STRING, "is_impact": True, "analysis_mutations_filename": "Proj_08390_G.muts.maf", "cbio_mutation_data_filename": 'data_mutations_extended.txt' } output_json, output_dir = self.run_cwl() expected_output = { 'analysis_mutations_file': OFile(name='Proj_08390_G.muts.maf', size=28079, hash='24421ab8d1a39a71f48eecbb0dd167d5d9f5c529', dir=output_dir), 'cbio_mutation_data_file': OFile(name='data_mutations_extended.txt', size=4534, hash='6131494536ce956d741c820378e7e2ce1c714403', dir=output_dir), 'rejected_file': OFile(name='rejected.muts.maf', size=18627626, hash='a06789623715703c5006db6876ecb58b8498f938', dir=output_dir), } self.assertCWLDictEqual(output_json, expected_output) self.assertNumMutations( os.path.join(output_dir, "Proj_08390_G.muts.maf"), 22) # validate output mutation file contents self.assertCompareMutFiles( os.path.join(output_dir, "Proj_08390_G.muts.maf"), os.path.join(DATA_SETS['Proj_08390_G']['MAF_FILTER_DIR'], "Sample1", "analyst_file.txt"), muts_only=True, compare_len=True) self.assertCompareMutFiles( os.path.join(output_dir, 'data_mutations_extended.txt'), os.path.join(DATA_SETS['Proj_08390_G']['MAF_FILTER_DIR'], "Sample1", "portal_file.txt"), muts_only=True, compare_len=True)
def test_maf_filter_argos_3_2_0(self): """ Test the maf filter script results when used with argos_version_string 3.2.0 """ input_maf = os.path.join(DATA_SETS['Proj_08390_G']['MAF_DIR'], "Sample1.Sample2.muts.maf") self.assertNumMutations(input_maf, 12514) self.input = { "maf_file": { "class": "File", "path": input_maf }, "argos_version_string": "3.2.0", "is_impact": True, "analysis_mutations_filename": "Proj_08390_G.muts.maf", "cbio_mutation_data_filename": 'data_mutations_extended.txt' } output_json, output_dir = self.run_cwl() expected_output = { 'analysis_mutations_file': OFile(name='Proj_08390_G.muts.maf', size=28081, hash='fd78842c9410e7e622dee270ec9c0e7628811f18', dir=output_dir), 'cbio_mutation_data_file': OFile(name='data_mutations_extended.txt', size=4536, hash='47e716eabbfda3408b2d9a08b9bb432b2cb8fce8', dir=output_dir), 'rejected_file': OFile(name='rejected.muts.maf', size=18627626, hash='a06789623715703c5006db6876ecb58b8498f938', dir=output_dir) } self.assertCWLDictEqual(output_json, expected_output) self.assertNumMutations( expected_output['analysis_mutations_file']['path'], 22) # validate output mutation file contents self.assertCompareMutFiles( expected_output['analysis_mutations_file']['path'], os.path.join(DATA_SETS['Proj_08390_G']['MAF_FILTER_DIR'], "Sample1", "analyst_file.txt"), muts_only=True, compare_len=True) self.assertCompareMutFiles( expected_output['cbio_mutation_data_file']['path'], os.path.join(DATA_SETS['Proj_08390_G']['MAF_FILTER_DIR'], "Sample1", "portal_file.txt"), muts_only=True, compare_len=True)
def test_run_copy_number_two_files(self): """ Test that Facets geneLevel copy number analysis step runs as expected with two input files """ self.input = { "portal_CNA_file": "data_CNA.txt", "targets_list": { "class": "File", "path": DATA_SETS['Proj_08390_G']['targets_list'], }, "hisens_cncfs": [{ "class": "File", "path": os.path.join( DATA_SETS['Proj_08390_G']['FACETS_DIR'], "Sample2.rg.md.abra.printreads__Sample1.rg.md.abra.printreads_hisens.cncf.txt" ) }, { "class": "File", "path": os.path.join( DATA_SETS['Proj_08390_G']['FACETS_DIR'], "Sample9.rg.md.abra.printreads__Sample10.rg.md.abra.printreads_hisens.cncf.txt" ) }], } output_json, output_dir = self.run_cwl() expected_output = { 'output_cna_file': OFile(name='data_CNA.txt', size=143118, hash='6dfa53b8a0fad1156060476bcf445d959f0e6eb2', dir=output_dir), 'output_cna_ascna_file': OFile(name='data_CNA.ascna.txt', size=8658, hash='3953e55b3db85b69209982211c53b9d8f049dc01', dir=output_dir), 'output_cna_scna_file': OFile(name='data_CNA.scna.txt', size=6937, hash='9ddcee42cce0d49aec5745303be480b6c4ef0fe8', dir=output_dir), } self.maxDiff = None self.assertCWLDictEqual(output_json, expected_output)
def test_annotate_demo1(self): """ Test case for running Facets maf annotation on a smaller demo maf file """ input_maf = os.path.join(self.DATA_SETS['demo']['MAF_DIR'], "Sample1.Sample2.muts.maf") input_rds = os.path.join(self.DATA_SETS['demo']['FACETS_DIR'], "Sample1_hisens.rds") self.input = { "maf_file": { "path": input_maf, "class": "File" }, "facets_rds": { "path": input_rds, "class": "File" }, "output_filename": "Sample1_hisens.ccf.maf" } output_json, output_dir = self.run_cwl() expected_output = { 'failed': False, 'output_file': OFile(name='Sample1_hisens.ccf.maf', hash='8cd487056bd86177d19d3dd0fe072747d31fd9b7', size=60230, dir=output_dir), 'stderr_txt': OFile(name='annotate_maf_stderr.txt', dir=output_dir), 'stdout_txt': OFile(name='annotate_maf_stdout.txt', dir=output_dir), } strip_related_keys = [ ('basename', 'annotate_maf_stderr.txt', ['size', 'checksum']), ('basename', 'annotate_maf_stdout.txt', ['size', 'checksum']) ] self.assertCWLDictEqual(output_json, expected_output, related_keys=strip_related_keys) path = os.path.join(output_dir, 'Sample1_hisens.ccf.maf') table_reader = TableReader(path) comments = table_reader.comment_lines fieldnames = table_reader.get_fieldnames() records = [rec for rec in table_reader.read()] self.assertEqual(len(records), 41)
def test_tmb_workflow3(self): """ Test case with a single real maf file """ self.input = { "assay_coverage": '1000', "pairs": [{ "pair_maf": { "path": os.path.join(self.DATA_SETS['Proj_08390_G']['MAF_DIR'], "Sample1.Sample2.muts.maf"), "class": "File" }, "pair_id": "Sample1-T.Sample1-N", "tumor_id": "Sample1-T", "normal_id": "Sample1-N" }] } output_json, output_dir = self.run_cwl() expected_output = { 'pairs': [{ "pair_id": "Sample1-T.Sample1-N", "tumor_id": "Sample1-T", "normal_id": "Sample1-N", "tmb_maf": OFile(name="Sample1-T.Sample1-N.tmb.maf", size=519440, hash="809c3c1ac3bb750aebf22ee2f95a5ebafd41e98f", dir=output_dir), "tmb_tsv": OFile(name="Sample1-T.Sample1-N.tmb.tsv", size=42, hash="8156b9e7a0602ddd7710f002ef9385237a82c5d0", dir=output_dir) }] } self.assertCWLDictEqual(output_json, expected_output) lines = self.read_table(expected_output["pairs"][0]["tmb_tsv"]["path"]) expected_lines = [['CMO_TMB_SCORE', 'SampleID'], ['475000.0', 'Sample1-T']] self.assertEqual(lines, expected_lines)
def test_concat_simple_file_with_header(self): """ Test that a single file with no header comes out looking as expected """ # make a dummy file with some lines input_lines = ["#header", "foo", "bar", "baz"] input_file = os.path.join(self.tmpdir, "input.txt") with open(input_file, "w") as fout: for line in input_lines: fout.write(line + '\n') self.input = {"input_files": [{"class": "File", "path": input_file}]} output_json, output_dir = self.run_cwl() # check the contents of the concatenated file; should be the same as the input output_file = os.path.join(output_dir, 'output.txt') with open(output_file) as fin: output_lines = [line.strip() for line in fin] self.assertEqual(output_lines, ["foo", "bar", "baz"]) expected_output = { 'output_file': OFile(name='output.txt', hash='0562f08aef399135936d6fb4eb0cc7bc1890d5b4', size=12, dir=output_dir) } self.assertCWLDictEqual(output_json, expected_output)
def test_reduce_sig_figs(self): """ Test that significant figures are reduced correctly """ input_lines = ["seg.mean", "3.141592", "2.718281828"] input_file = os.path.join(self.tmpdir, "input.txt") with open(input_file, "w") as fout: for line in input_lines: fout.write(line + '\n') self.input = {"input_file": {"class": "File", "path": input_file}} output_json, output_dir = self.run_cwl() expected_output = { 'output_file': OFile(name="output.txt", size=26, hash="d9f5ec4a9aa27a69ee64edb97eb10d6db65c7ad7", dir=output_dir) } self.assertCWLDictEqual(output_json, expected_output) # check the contents of the file output_file = expected_output['output_file']['path'] with open(output_file) as fin: reader = csv.DictReader(fin) rows = [row for row in reader] self.assertEqual(len(rows), 2) self.assertDictEqual(rows[0], OrderedDict([('seg.mean', '3.1416')])) self.assertDictEqual(rows[1], OrderedDict([('seg.mean', '2.7183')]))
def test_update_caselist_1(self): """ Test simple case list update """ self.input = { "case_list": {"class": "File", "path": self.input_file}, "sample_ids": ["Sample3", "Sample4"], "output_filename": "cases_all.txt" } output_json, output_dir = self.run_cwl() output_file = os.path.join(output_dir, 'cases_all.txt') expected_output = { "output_file": OFile(name = 'cases_all.txt', size = 208, hash = '59aa4b6b6695b7adfd4493390edd4038808a018f', dir = output_dir) } self.assertCWLDictEqual(output_json, expected_output) with open(output_file) as fin: text = fin.read() expected_text = """case_list_category: all_cases_in_study stable_id: pi_123_all case_list_name: All Tumors case_list_description: All tumor samples cancer_study_identifier: pi_123 case_list_ids: Sample1\tSample2\tSample3\tSample4 """ self.assertEqual(text, expected_text)
def test_put_one_file1_in_dir(self): """ Test that one file is put in the dir correctly """ file1 = self.mkstemp(prefix="1.") self.input = { "output_directory_name": "foo", "files": [ { "class": "File", "path": file1 }, ] } output_json, output_dir = self.run_cwl() expected_output = { "directory": ODir(name='foo', dir=output_dir, items=[ OFile(name=os.path.basename(file1), size=0, hash='da39a3ee5e6b4b0d3255bfef95601890afd80709') ]) } self.assertCWLDictEqual(output_json, expected_output)
def test_fusion_filter1(self): """ """ fusion_file = os.path.join(DATA_SETS['Proj_08390_G']['MAF_DIR'], "Sample1.Sample2.svs.pass.vep.portal.txt") self.input = { "fusions_file": { "class": "File", "path": fusion_file }, "output_filename": "data_fusions.txt", "known_fusions_file": { "class": "File", "path": KNOWN_FUSIONS_FILE } } output_json, output_dir = self.run_cwl() expected_output = { "output_file": OFile(name='data_fusions.txt', size=99, hash='c16f763b248813fcdde76f7486f1ddc4e9856038', dir=output_dir) } self.assertCWLDictEqual(output_json, expected_output)
def test_update_caselist_2(self): """ Test update with no files passed """ self.input = { "case_list": {"class": "File", "path": self.input_file}, "sample_ids": [], "output_filename": "cases_all.txt" } output_json, output_dir = self.run_cwl() output_file = os.path.join(output_dir, 'cases_all.txt') expected_output = { "output_file": OFile(name = 'cases_all.txt', size = 192, hash = 'f1ad64f51beac01759ae690b2f787fe3978e8882', dir = output_dir) } self.assertCWLDictEqual(output_json, expected_output) with open(output_file) as fin: text = fin.read() expected_text = """case_list_category: all_cases_in_study stable_id: pi_123_all case_list_name: All Tumors case_list_description: All tumor samples cancer_study_identifier: pi_123 case_list_ids: Sample1\tSample2 """ self.assertEqual(text, expected_text)
def test_snp_pileup1(self): """ """ self.input = { "snps_vcf": { "path": FACETS_SNPS_VCF, "class": "File" }, # the smallest pair of bam files in the test dataset "normal_bam": { "path": os.path.join(DATA_SETS['Proj_08390_G']['BAM_DIR'], "Sample23.rg.md.abra.printreads.bam"), "class": "File" }, "tumor_bam": { "path": os.path.join(DATA_SETS['Proj_08390_G']['BAM_DIR'], "Sample24.rg.md.abra.printreads.bam"), "class": "File" }, "output_prefix": "Sample24.Sample23" } output_json, output_dir = self.run_cwl() expected_output = { 'output_file': OFile(name = "Sample24.Sample23.snp_pileup.gz", size = 34851004, hash = "755a8b64f45c819b4e2c481e64bf2fe36d1f5361", dir = output_dir) } self.maxDiff = None self.assertCWLDictEqual(output_json, expected_output)
def test_fusion_to_sv(self): """ Test fusion to sv conversion """ fusion_file = os.path.join(DATA_SETS['Proj_08390_G']['MAF_DIR'], "Sample1.Sample2.svs.pass.vep.portal.txt") self.input = { "fusion_file": { "class": "File", "path": fusion_file }, "output_filename": "data_SV.txt" } output_json, output_dir = self.run_cwl() expected_output = { 'output_file': OFile( name='data_SV.txt', size=1103, hash='02fda70b7838931321544f6797de4782adaf1a46', dir=output_dir) } self.maxDiff = None strip_related_keys = [ ('basename', 'report.html', ['size', 'checksum']) ] self.assertCWLDictEqual(output_json, expected_output, related_keys = strip_related_keys)
def test_concat1(self): """ Test concat when original file has a comment line """ # make a dummy file with some lines input_lines = ["# comment here", "HEADER", "foo", "bar", "baz"] input_file = os.path.join(self.tmpdir, "input.txt") with open(input_file, "w") as fout: for line in input_lines: fout.write(line + '\n') self.input = { "input_files": [{ "class": "File", "path": input_file }], "comment_label": "comment_label", "comment_value": "comment_value" } output_json, output_dir = self.run_cwl() expected_output = { 'output_file': OFile(name='output.txt', size=64, hash='14ee1247f314dba1e3c28aa8aec9ff7b137a1f41', dir=output_dir) } self.assertCWLDictEqual(output_json, expected_output) # check the contents of the concatenated file; should be the same as the input self.assertFileLinesEqual(expected_output['output_file']['path'], [ '# comment here', '#comment_label: comment_value', "HEADER", 'foo', 'bar', 'baz' ])
def test_concat_0(self): """ Test concat when no comments are present in the original file """ # make a dummy file with some lines input_lines = ["HEADER", "foo", "bar", "baz"] input_file = os.path.join(self.tmpdir, "input.txt") with open(input_file, "w") as fout: for line in input_lines: fout.write(line + '\n') self.input = { "input_files": [{ "class": "File", "path": input_file }], "comment_label": "comment_label", "comment_value": "comment_value" } output_json, output_dir = self.run_cwl() expected_output = { 'output_file': OFile(name='output.txt', size=49, hash='7cef8f6de47289a55de99de77563beb3fa371deb', dir=output_dir) } self.assertCWLDictEqual(output_json, expected_output) # check the contents of the concatenated file; should be the same as the input self.assertFileLinesEqual( expected_output['output_file']['path'], ['#comment_label: comment_value', "HEADER", 'foo', 'bar', 'baz'])
def test_concat_three_files_with_comments(self): """ Test that a three files with headers are concatenated correctly Use three this time to clearly show the ordering of the output iteration """ # make a dummy file with some lines input_lines1 = ["#comment1", "header1", "foo1", "bar1", "baz1"] input_file1 = os.path.join(self.tmpdir, "input1.txt") with open(input_file1, "w") as fout: for line in input_lines1: fout.write(line + '\n') input_lines2 = ["#comment2", "header2", "foo2", "bar2", "baz2"] input_file2 = os.path.join(self.tmpdir, "input2.txt") with open(input_file2, "w") as fout: for line in input_lines2: fout.write(line + '\n') input_lines3 = ["#comment3", "header3", "foo3", "bar3", "baz3"] input_file3 = os.path.join(self.tmpdir, "input3.txt") with open(input_file3, "w") as fout: for line in input_lines3: fout.write(line + '\n') self.input = { "input_files": [{ "class": "File", "path": input_file1 }, { "class": "File", "path": input_file2 }, { "class": "File", "path": input_file3 }] } output_json, output_dir = self.run_cwl() # check the contents of the concatenated file; should be the same as the input output_file = os.path.join(output_dir, 'output.txt') with open(output_file) as fin: output_lines = [line.strip() for line in fin] expected_output_lines = [ "header1", "foo1", "bar1", "baz1", "foo2", "bar2", "baz2", "foo3", "bar3", "baz3" ] self.assertEqual(output_lines, expected_output_lines) # TODO: update this once the above ^^^ passes expected_output = { 'output_file': OFile(name='output.txt', hash='b115b7b40aa8a2e08e30a55abf60d742e05e62b4', size=53, dir=output_dir) } self.assertCWLDictEqual(output_json, expected_output)
def test_tmb_workflow2(self): """ Test case for using a single input pair maf """ self.input = { "assay_coverage": '1000', "pairs": [{ "pair_maf": { "path": self.maf1, "class": "File" }, "pair_id": "Sample1-T.Sample1-N", "tumor_id": "Sample1-T", "normal_id": "Sample1-N" }] } output_json, output_dir = self.run_cwl() expected_output = { 'pairs': [{ "pair_id": "Sample1-T.Sample1-N", "tumor_id": "Sample1-T", "normal_id": "Sample1-N", "tmb_maf": OFile(name="Sample1-T.Sample1-N.tmb.maf", size=352, hash="b019b5b3c6aba861371c135fe47520c969fab5ae", dir=output_dir), "tmb_tsv": OFile(name="Sample1-T.Sample1-N.tmb.tsv", size=40, hash="d6a57cfb5e3001697875e5b5bfae206e0f7f2310", dir=output_dir) }] } self.assertCWLDictEqual(output_json, expected_output) lines = self.read_table(expected_output["pairs"][0]["tmb_tsv"]["path"]) expected_lines = [['CMO_TMB_SCORE', 'SampleID'], ['7000.0', 'Sample1-T']] self.assertEqual(lines, expected_lines)
def test_msi_workflow_demo1(self): """ Test case for running the MSI workflow on single sample """ normal_bam = os.path.join(self.DATA_SETS['demo']['BAM_DIR'], "Sample2.bam") tumor_bam = os.path.join(self.DATA_SETS['demo']['BAM_DIR'], "Sample1.bam") self.input = { "threads": "16", "microsatellites_file": { "class": "File", "path": MICROSATELLITES_LIST }, "pairs": [{ "pair_id": "Sample1.Sample2", "tumor_id": "Sample1", "normal_id": "Sample2" }], "normal_bam_files": [{ "path": normal_bam, "class": "File" }], "tumor_bam_files": [{ "path": tumor_bam, "class": "File" }] } output_json, output_dir = self.run_cwl() expected_output = { 'pairs': [{ "pair_id": "Sample1.Sample2", "tumor_id": "Sample1", "normal_id": "Sample2", "msi_tsv": OFile(name='Sample1.Sample2.msi.tsv', hash="92576a9be4d6a36c67b26d16fdc4134b0d1b9cd9", size=54, dir=output_dir) }] } self.assertCWLDictEqual(output_json, expected_output) output_file = os.path.join(output_dir, 'Sample1.Sample2.msi.tsv') lines = self.read_table(output_file) expected_lines = [['MSI_SCORE', 'MSI_STATUS', 'SAMPLE_ID'], ['20.90', 'Instable', 'Sample1']] self.assertEqual(lines, expected_lines)
def test_add_impact_0(self): """ Test IMPACT CWL with tiny dataset """ maf_lines = [['# comment 1'], ['# comment 2'], ['Hugo_Symbol'], ['SUFU'], ['GOT1'], ['BRCA']] impact_lines = [['BRCA', 'IMPACT468'], ['SUFU', 'IMPACT468'], ['SUFU', 'IMPACT505']] input_maf = self.write_table(tmpdir=self.tmpdir, filename='input.maf', lines=maf_lines) impact_file = self.write_table(tmpdir=self.tmpdir, filename='impact.txt', lines=impact_lines) self.input = { "input_file": { "class": "File", "path": input_maf }, "output_filename": 'output.maf', "IMPACT_file": { "class": "File", "path": impact_file }, } output_json, output_dir = self.run_cwl() expected_output = { 'IMPACT_col_added_file': OFile(name='output.maf', hash='5c61f3977dad29ebc74966e8fc40a0278f9aab12', size=126, dir=output_dir) } self.assertCWLDictEqual(output_json, expected_output) self.assertMutFileContains( filepath=expected_output['IMPACT_col_added_file']['path'], expected_comments=['# comment 1', '# comment 2'], expected_mutations=[{ 'Hugo_Symbol': 'SUFU', 'is_in_impact': 'True', 'impact_assays': 'IMPACT468,IMPACT505' }, { 'Hugo_Symbol': 'GOT1', 'is_in_impact': 'False', 'impact_assays': '.' }, { 'Hugo_Symbol': 'BRCA', 'is_in_impact': 'True', 'impact_assays': 'IMPACT468' }], identical=True)
def test_run_facets_annotation_wrapper(self): """ Test case for running Facets maf annotation """ input_maf = os.path.join(self.DATA_SETS['Proj_08390_G']['MAF_DIR'], "Sample1.Sample2.muts.maf") input_rds = os.path.join( self.DATA_SETS['Proj_08390_G']['FACETS_SUITE_DIR'], "Sample1_hisens.rds") self.input = { "maf_file": { "path": input_maf, "class": "File" }, "facets_rds": { "path": input_rds, "class": "File" }, "output_filename": "Sample1_hisens.ccf.maf" } output_json, output_dir = self.run_cwl() expected_output = { 'failed': False, 'output_file': OFile(name='Sample1_hisens.ccf.maf', hash='7e478a8a44d27735f26e368989c672ed6ef5d52a', size=19217199, dir=output_dir), 'stderr_txt': OFile(name='annotate_maf_stderr.txt', dir=output_dir), 'stdout_txt': OFile(name='annotate_maf_stdout.txt', dir=output_dir), } strip_related_keys = [ ('basename', 'annotate_maf_stderr.txt', ['size', 'checksum']), ('basename', 'annotate_maf_stdout.txt', ['size', 'checksum']) ] self.assertCWLDictEqual(output_json, expected_output, related_keys=strip_related_keys)
def test_example_workflow(self): """ Test case for the example workflow """ self.maxDiff = None self.input = { 'value': "ABC", "samples": [{ "sample_id": "1" }, { "sample_id": "2" }] } output_json, output_dir = self.run_cwl() expected_output = { "output_file": OFile(name='output.concat.tsv', hash='d4297dfdad25ac92ffae2ce61c6cfe12c4089c28', size=27, dir=output_dir), 'env': OFile(name='env.txt', dir=output_dir) } strip_related_keys = [ ('basename', 'env.txt', ['size', 'checksum']), ] self.assertCWLDictEqual(output_json, expected_output, related_keys=strip_related_keys) output_file = os.path.join(output_dir, "output.concat.tsv") with open(output_file) as f: lines = [l.strip() for l in f] expected_lines = [ 'SampleID\tValue', '1\tABC', '2\tABC', ] self.assertEqual(lines, expected_lines)
def test_filter_maf_file_impact_false(self): """ Test that a filtered maf file comes out as expected """ self.maxDiff = None input_maf = os.path.join(DATA_SETS['Proj_08390_G']['MAF_DIR'], "Sample1.Sample2.muts.maf") self.assertNumMutations(input_maf, 12514) self.input = { "maf_file": { "class": "File", "path": input_maf }, "argos_version_string": ARGOS_VERSION_STRING, "is_impact": False, "analysis_mutations_filename": "Proj_08390_G.muts.maf", "cbio_mutation_data_filename": 'data_mutations_extended.txt' } output_json, output_dir = self.run_cwl() expected_output = { 'analysis_mutations_file': OFile(name='Proj_08390_G.muts.maf', size=24524, hash='9fb9d43c71e546750ddec6aea2313dda28547b3a', dir=output_dir), 'cbio_mutation_data_file': OFile(name='data_mutations_extended.txt', size=3931, hash='15ca06249511c32c32e058c246a757ec8df11d83', dir=output_dir), 'rejected_file': OFile(name='rejected.muts.maf', size=18790398, hash='e7441703699e82cef500d9557bfcbd3464ce8eab', dir=output_dir) } self.assertCWLDictEqual(output_json, expected_output) self.assertNumMutations( expected_output['analysis_mutations_file']['path'], 18)
def test_msi_workflow2(self): """ Test case for running the MSI workflow on single sample """ normal_bam = os.path.join(self.DATA_SETS['Proj_08390_G']['BAM_DIR'], "Sample23.rg.md.abra.printreads.bam") tumor_bam = os.path.join(self.DATA_SETS['Proj_08390_G']['BAM_DIR'], "Sample24.rg.md.abra.printreads.bam") self.input = { "microsatellites_file": { "class": "File", "path": MICROSATELLITES_LIST }, "pairs": [{ "pair_id": "Sample1-T.Sample1-N", "tumor_id": "Sample1-T", "normal_id": "Sample1-N" }], "normal_bam_files": [{ "path": normal_bam, "class": "File" }], "tumor_bam_files": [{ "path": tumor_bam, "class": "File" }] } output_json, output_dir = self.run_cwl() expected_output = { "pairs": [{ "pair_id": "Sample1-T.Sample1-N", "tumor_id": "Sample1-T", "normal_id": "Sample1-N", "msi_tsv": OFile(name='Sample1-T.Sample1-N.msi.tsv', hash="bc132f6ab9b779d7cba51e7ddfa82af724134f03", size=56, dir=output_dir) }] } self.assertCWLDictEqual(output_json, expected_output) output_file = os.path.join(output_dir, 'Sample1-T.Sample1-N.msi.tsv') lines = self.read_table(output_file) expected_lines = [['MSI_SCORE', 'MSI_STATUS', 'SAMPLE_ID'], ['21.97', 'Instable', 'Sample1-T']] self.assertEqual(lines, expected_lines)
def test_1(self): maf_file = os.path.join(DATA_SETS["Fillout01"]["OUTPUT_DIR"], "output.maf") comments, mutations = self.load_mutations(maf_file, strip=True) self.assertEqual(len(mutations), 475) self.input = {"input_file": {"class": "File", "path": maf_file}} output_json, output_dir = self.run_cwl() output_data_mutations_extended = os.path.join( output_dir, 'data_mutations_extended.txt') output_data_mutations_uncalled = os.path.join( output_dir, 'data_mutations_uncalled.txt') expected_output = { 'called_file': OFile(name='data_mutations_extended.txt', dir=output_dir, hash='e7430656d9fcbce36fa57eb92460db57742168ae', size=347254), 'uncalled_file': OFile(name='data_mutations_uncalled.txt', dir=output_dir, hash='58129786cc299011202eb078734b3ff513d54081', size=287883), } self.maxDiff = None self.assertCWLDictEqual(output_json, expected_output) comments, mutations = self.load_mutations( output_data_mutations_extended, strip=True) self.assertEqual(len(mutations), 253) comments, mutations = self.load_mutations( output_data_mutations_uncalled, strip=True) self.assertEqual(len(mutations), 222)
def test_add_impact_1(self): """ Test that a maf file with is_in_IMPACT column comes out as expected """ self.maxDiff = None input_maf = os.path.join(DATA_SETS['Proj_08390_G']['MAF_DIR'], "Sample1.Sample2.muts.maf") self.input = { "input_file": { "class": "File", "path": input_maf }, "output_filename": 'output.maf', "IMPACT_file": { "class": "File", "path": IMPACT_FILE }, } output_json, output_dir = self.run_cwl() expected_output = { 'IMPACT_col_added_file': OFile(name="output.maf", size=15629589, hash="1397fade2f877c2bcfca791407e328c5c48e6ff0", dir=output_dir) } self.assertCWLDictEqual(output_json, expected_output) # validate output mutation file contents with open(expected_output['IMPACT_col_added_file']['path']) as fin: output_maf_lines = len(fin.readlines()) self.assertEqual(output_maf_lines, 12518) input_comments, input_mutations = self.load_mutations(input_maf) output_comments, output_mutations = self.load_mutations( expected_output['IMPACT_col_added_file']['path']) true_count = [row['is_in_impact'] for row in output_mutations].count('True') false_count = [row['is_in_impact'] for row in output_mutations].count('False') self.assertTrue(true_count == 8367) self.assertTrue(false_count == 4147) # check that its got two extra columns in the output self.assertTrue( len(input_mutations[1]) + 2 == len(output_mutations[1]))
def test_filter_maf_file_cols(self): """ Filter columns in a tiny demo maf file """ maf_lines = [ ['# comment 1'], # keep the comments ['# comment 2'], ['Hugo_Symbol', 'foo_value'], # foo_value column should be removed in output ['SUFU', '1'], ['GOT1', '2'] ] # run the script in a temporary directory input_maf_file = self.write_table(tmpdir=self.tmpdir, filename='input.maf', lines=maf_lines) self.input = { "input_file": { "class": "File", "path": input_maf_file }, "output_filename": "output.maf" } output_json, output_dir = self.run_cwl() expected_output = { 'output_file': OFile(name='output.maf', hash="e55f7bdaa146f37b48d6c920ed27184e394ef1e6", size=46, dir=output_dir) } self.assertCWLDictEqual(output_json, expected_output) # validate number of lines output with open(expected_output['output_file']['path']) as fin: output_maf_lines = len(fin.readlines()) self.assertEqual(output_maf_lines, 5) # validate file contents self.assertMutFileContains( filepath=expected_output['output_file']['path'], expected_comments=['# comment 1', '# comment 2'], expected_mutations=[{ 'Hugo_Symbol': 'SUFU' }, { 'Hugo_Symbol': 'GOT1' }], identical=True)
def test_put_two_files_in_dir(self): """ Test that two files are put in the dir correctly """ file1 = self.mkstemp(prefix="1.") file2 = self.mkstemp(prefix="2.") # create input data self.input = { "output_directory_name": "foo", "files": [{ "class": "File", "path": file1 }, { "class": "File", "path": file2 }] } output_json, output_dir = self.run_cwl() expected_output = { "directory": ODir(name="foo", dir=output_dir, items=[ OFile(name=os.path.basename(file1), size=0, hash='da39a3ee5e6b4b0d3255bfef95601890afd80709'), OFile(name=os.path.basename(file2), size=0, hash='da39a3ee5e6b4b0d3255bfef95601890afd80709'), ]) } self.assertCWLDictEqual(output_json, expected_output)
def test_concat2(self): """ Test concat when multiple files have comments """ # make a dummy file with some lines input_lines1 = ["# comment 1 here", "HEADER", "foo1", "bar1"] input_file1 = os.path.join(self.tmpdir, "input1.txt") with open(input_file1, "w") as fout: for line in input_lines1: fout.write(line + '\n') input_lines2 = ["# comment 2 here", "HEADER", "foo2", "bar2"] input_file2 = os.path.join(self.tmpdir, "input2.txt") with open(input_file2, "w") as fout: for line in input_lines2: fout.write(line + '\n') self.input = { "input_files": [ { "class": "File", "path": input_file1 }, { "class": "File", "path": input_file2 }, ], "comment_label": "comment_label", "comment_value": "comment_value" } output_json, output_dir = self.run_cwl() expected_output = { 'output_file': OFile(name='output.txt', size=91, hash='5dbce16f9bfef135d6b8288b16350351a33998f3', dir=output_dir) } self.assertCWLDictEqual(output_json, expected_output) self.assertFileLinesEqual(expected_output['output_file']['path'], [ '# comment 1 here', '# comment 2 here', '#comment_label: comment_value', "HEADER", 'foo1', 'bar1', 'foo2', 'bar2' ])