def test_add_impact_1(self): """ Test that a maf file with is_in_IMPACT column comes out as expected """ self.maxDiff = None input_maf = os.path.join(DATA_SETS['Proj_08390_G']['MAF_DIR'], "Sample1.Sample2.muts.maf") with TemporaryDirectory() as tmpdir: input_json = { "input_file": { "class": "File", "path": input_maf }, "output_filename": 'output.maf', "IMPACT_file": { "class": "File", "path": IMPACT_FILE }, } output_json, output_dir = run_cwl(testcase=self, tmpdir=tmpdir, input_json=input_json, cwl_file=cwl_file) expected_output = { 'IMPACT_col_added_file': { 'location': 'file://' + os.path.join(output_dir, 'output.maf'), 'basename': 'output.maf', 'class': 'File', 'checksum': 'sha1$1397fade2f877c2bcfca791407e328c5c48e6ff0', 'size': 15629589, 'path': os.path.join(output_dir, 'output.maf') } } self.assertDictEqual(output_json, expected_output) # validate output mutation file contents with open(output_json['IMPACT_col_added_file']['path']) as fin: output_maf_lines = len(fin.readlines()) self.assertEqual(output_maf_lines, 12518) input_comments, input_mutations = load_mutations(input_maf) output_comments, output_mutations = load_mutations( output_json['IMPACT_col_added_file']['path']) true_count = [row['is_in_impact'] for row in output_mutations].count('True') false_count = [row['is_in_impact'] for row in output_mutations].count('False') self.assertTrue(true_count == 8367) self.assertTrue(false_count == 4147) # check that its got two extra columns in the output self.assertTrue( len(input_mutations[1]) + 2 == len(output_mutations[1]))
def test_add_af(self): """ Test IMPACT CWL with tiny dataset """ maf_lines = [ ['# comment 1'], ['# comment 2'], ['Hugo_Symbol', 't_depth', 't_alt_count'], ['SUFU', '100', '75'], ['GOT1', '100', '1'], ['SOX9', '100', '0'], ] with TemporaryDirectory() as tmpdir: input_maf = write_table(tmpdir=tmpdir, filename='input.maf', lines=maf_lines) input_json = { "input_file": { "class": "File", "path": input_maf }, "output_filename": 'output.maf', } output_json, output_dir = run_cwl(testcase=self, tmpdir=tmpdir, input_json=input_json, cwl_file=cwl_file) expected_output = { 'output_file': { 'location': 'file://' + os.path.join(output_dir, 'output.maf'), 'basename': 'output.maf', 'class': 'File', 'checksum': 'sha1$39de59ad5d736db692504012ce86d3395685112e', 'size': 109, 'path': os.path.join(output_dir, 'output.maf') } } self.assertDictEqual(output_json, expected_output) comments, mutations = load_mutations( output_json['output_file']['path']) expected_comments = ['# comment 1', '# comment 2'] self.assertEqual(comments, expected_comments) expected_mutations = [{ 'Hugo_Symbol': 'SUFU', 't_depth': '100', 't_alt_count': '75', 't_af': '0.75' }, { 'Hugo_Symbol': 'GOT1', 't_depth': '100', 't_alt_count': '1', 't_af': '0.01' }, { 'Hugo_Symbol': 'SOX9', 't_depth': '100', 't_alt_count': '0', 't_af': '0.0' }] self.assertEqual(mutations, expected_mutations)
def test_add_impact_0(self): """ Test IMPACT CWL with tiny dataset """ maf_lines = [['# comment 1'], ['# comment 2'], ['Hugo_Symbol'], ['SUFU'], ['GOT1'], ['BRCA']] impact_lines = [['BRCA', 'IMPACT468'], ['SUFU', 'IMPACT468'], ['SUFU', 'IMPACT505']] with TemporaryDirectory() as tmpdir: input_maf = write_table(tmpdir=tmpdir, filename='input.maf', lines=maf_lines) impact_file = write_table(tmpdir=tmpdir, filename='impact.txt', lines=impact_lines) input_json = { "input_file": { "class": "File", "path": input_maf }, "output_filename": 'output.maf', "IMPACT_file": { "class": "File", "path": impact_file }, } output_json, output_dir = run_cwl(testcase=self, tmpdir=tmpdir, input_json=input_json, cwl_file=cwl_file) expected_output = { 'IMPACT_col_added_file': { 'location': 'file://' + os.path.join(output_dir, 'output.maf'), 'basename': 'output.maf', 'class': 'File', 'checksum': 'sha1$5c61f3977dad29ebc74966e8fc40a0278f9aab12', 'size': 126, 'path': os.path.join(output_dir, 'output.maf') } } self.assertDictEqual(output_json, expected_output) comments, mutations = load_mutations( output_json['IMPACT_col_added_file']['path']) expected_comments = ['# comment 1', '# comment 2'] self.assertEqual(comments, expected_comments) expected_mutations = [{ 'Hugo_Symbol': 'SUFU', 'is_in_impact': 'True', 'impact_assays': 'IMPACT468,IMPACT505' }, { 'Hugo_Symbol': 'GOT1', 'is_in_impact': 'False', 'impact_assays': '.' }, { 'Hugo_Symbol': 'BRCA', 'is_in_impact': 'True', 'impact_assays': 'IMPACT468' }] self.assertEqual(mutations, expected_mutations)
def test_run_worflow_two_mafs(self): """ Test that the workflow works correctly when run with two maf files """ self.skipTest("Fix jenkins error") data_clinical_file = os.path.join( DATA_SETS['Proj_08390_G']['INPUTS_DIR'], "Proj_08390_G_sample_data_clinical.txt") sample_summary_file = os.path.join(DATA_SETS['Proj_08390_G']['QC_DIR'], "Proj_08390_G_SampleSummary.txt") self.input = { "project_id": "Proj_08390_G", "project_name": "Proj_08390_G", "project_short_name": "Proj_08390_G", "project_description": "project", "project_pi": "Dr. Jones", "request_pi": "Dr. Franklin", "is_impact": True, "argos_version_string": "2.x", "cancer_type": "MEL", "cancer_study_identifier": 'Proj_08390_G', "analysis_gene_cna_filename": "Proj_08390_G.gene.cna.txt", "analysis_mutations_filename": "Proj_08390_G.muts.maf", "analysis_mutations_share_filename": "Proj_08390_G.muts.share.maf", "analysis_segment_cna_filename": "Proj_08390_G.seg.cna.txt", "analysis_sv_filename": "Proj_08390_G.svs.maf", "cbio_meta_cna_segments_filename": "Proj_08390_G_meta_cna_hg19_seg.txt", "cbio_segment_data_filename": "Proj_08390_G_data_cna_hg19.seg", "helix_filter_version": "20.06.1", 'IMPACT_gene_list': { "path": IMPACT_FILE, "class": "File" }, "data_clinical_file": { "path": data_clinical_file, "class": "File" }, "sample_summary_file": { "path": sample_summary_file, "class": "File" }, "targets_list": { "path": DATA_SETS['Proj_08390_G']["targets_list"], "class": "File" }, "known_fusions_file": { "path": KNOWN_FUSIONS_FILE, "class": "File" }, "mutation_maf_files": [{ "path": os.path.join(DATA_SETS['Proj_08390_G']['MAF_DIR'], "Sample1.Sample2.muts.maf"), "class": "File" }, { "path": os.path.join(DATA_SETS['Proj_08390_G']['MAF_DIR'], "Sample4.Sample3.muts.maf"), "class": "File" }], "mutation_svs_txt_files": [{ "path": os.path.join(DATA_SETS['Proj_08390_G']['MAF_DIR'], "Sample1.Sample2.svs.pass.vep.portal.txt"), "class": "File" }, { "path": os.path.join(DATA_SETS['Proj_08390_G']['MAF_DIR'], "Sample4.Sample3.svs.pass.vep.portal.txt"), "class": "File" }], "mutation_svs_maf_files": [{ "path": os.path.join(DATA_SETS['Proj_08390_G']['MAF_DIR'], "Sample1.Sample2.svs.pass.vep.maf"), "class": "File" }, { "path": os.path.join(DATA_SETS['Proj_08390_G']['MAF_DIR'], "Sample4.Sample3.svs.pass.vep.maf"), "class": "File" }], "facets_hisens_cncf_files": [{ "path": os.path.join( DATA_SETS['Proj_08390_G']['FACETS_DIR'], "Sample2.rg.md.abra.printreads__Sample1.rg.md.abra.printreads_hisens.cncf.txt" ), "class": "File" }, { "path": os.path.join( DATA_SETS['Proj_08390_G']['FACETS_DIR'], "Sample3.rg.md.abra.printreads__Sample4.rg.md.abra.printreads_hisens.cncf.txt" ), "class": "File" }], "facets_hisens_seg_files": [{ "path": os.path.join( DATA_SETS['Proj_08390_G']['FACETS_DIR'], "Sample2.rg.md.abra.printreads__Sample1.rg.md.abra.printreads_hisens.seg" ), "class": "File" }, { "path": os.path.join( DATA_SETS['Proj_08390_G']['FACETS_DIR'], "Sample3.rg.md.abra.printreads__Sample4.rg.md.abra.printreads_hisens.seg" ), "class": "File" }] } output_json, output_dir = self.run_cwl() self.maxDiff = None expected_output = { 'analysis_dir': ODir(name='analysis', items=[ OFile(name='Proj_08390_G.gene.cna.txt', size=173982, hash='ab17d587ad5ae0a87fd6c6d4dd2d5d1701208ce9'), OFile(name='Proj_08390_G.muts.maf', size=54458, hash='d4352ee2b702877b84db2b632972ccad2441f3e0'), OFile(name='Proj_08390_G.muts.share.maf', size=10956, hash='086ce6517eae68e47160c8740c5f00d7c3454110'), OFile(name='Proj_08390_G.seg.cna.txt', size=3191, hash='f6a77b280c047a7e2082e3a09e8138f861790d3a'), OFile(name='Proj_08390_G.svs.maf', size=35595, hash='5c2a63fc01980550108e58079a8b689d53c97d8c') ], dir=output_dir), 'portal_dir': ODir( name='portal', items=[ OFile(name='meta_clinical_sample.txt', size=140, hash='4c567d81c3b17a76c324fd3e2f73793a6e804f65'), OFile(name='data_clinical_patient.txt', size=643, hash='9417dcabddd6ab2cbe98167bccd9b9e4fa182562'), OFile(name='data_clinical_sample.txt', size=7592, hash='2a0c59593fa7726743b2fe46db9d955dbc625453'), OFile(name='meta_study.txt', size=152, hash='2b0a5fd1a97329adf7c3b1596c84cd6567059a95'), OFile(name='meta_clinical_patient.txt', size=142, hash='9cdc9a7e44a230c012f48b0236bdcf0bbc7de67f'), OFile(name='meta_CNA.txt', size=270, hash='a9bf16f6a0490b19e611e8814b85f7bf1d52417a'), OFile(name='meta_fusions.txt', size=227, hash='77649e888bafc6a4ed61261d1c46d2f238e1c32b'), OFile(name='meta_mutations_extended.txt', size=253, hash='fd04fcd0129b35bb8b8aaef57b2efa16b8f42e1d'), OFile(name='Proj_08390_G_meta_cna_hg19_seg.txt', size=200, hash='59b54d3cd81acdd9fc21df1dc05a71cebfbfe11e'), OFile(name='data_CNA.txt', size=6784, hash='09b4d944e50ea9d0e7567e04ce55b0f21d281255'), OFile(name='data_CNA.ascna.txt', size=8789, hash='d93ffe83137d9a77e2420b40ab3a2e0a1a5ad069'), OFile(name='data_mutations_extended.txt', size=7539, hash='43469aa0f9125d3dca6217ee02641638c3a92e24'), OFile(name='Proj_08390_G_data_cna_hg19.seg', size=3191, hash='f6a77b280c047a7e2082e3a09e8138f861790d3a'), OFile(name='data_fusions.txt', size=99, hash='c16f763b248813fcdde76f7486f1ddc4e9856038'), ODir( name='case_lists', items=[ OFile( name='cases_all.txt', size=616, hash='b9e43289cec5603b0886b5e8507c8d019387c125' ), OFile( name='cases_cnaseq.txt', size=696, hash='b87e2da8dce0fddbadec348efe2986519b2a794b' ), OFile( name='cases_cna.txt', size=628, hash='053481a8299e9430117f8e45e081aa7ec21033a6' ), OFile( name='cases_sequenced.txt', size=641, hash='ef9f5aef03c2527bf576470168660557ca1c7cc9' ) ]) ], dir=output_dir) } self.assertCWLDictEqual(output_json, expected_output) comments, mutations = load_mutations( os.path.join(output_dir, 'analysis', 'Proj_08390_G.muts.maf')) self.assertEqual(len(mutations), 34) comments, mutations = load_mutations( os.path.join(output_dir, 'portal', 'data_mutations_extended.txt')) self.assertEqual(len(mutations), 27) # TODO: fix the Facets column headers so that this passes # load the data_CNA.txt file path = os.path.join(output_dir, 'portal/data_CNA.txt' ) # renamed from the data_CNA.scna.txt file ... with open(path) as f: header = next(f) header_parts = header.split() expected_header_parts = [ 'Hugo_Symbol', 's_C_VJ7F47_P001_d', 's_C_X50T9Y_P001_d' ] self.assertEqual(header_parts, expected_header_parts) path = os.path.join(output_dir, 'portal/data_CNA.ascna.txt') with open(path) as f: header = next(f) header_parts = header.split() expected_header_parts = [ 'Hugo_Symbol', 's_C_VJ7F47_P001_d', 's_C_X50T9Y_P001_d' ] self.assertEqual(header_parts, expected_header_parts)
def test_run_worflow_one_maf(self): """ Test that the workflow works correctly when run with a single maf """ self.skipTest("Fix jenkins error") data_clinical_file = os.path.join( DATA_SETS['Proj_08390_G']['INPUTS_DIR'], "Proj_08390_G_sample_data_clinical.txt") sample_summary_file = os.path.join(DATA_SETS['Proj_08390_G']['QC_DIR'], "Proj_08390_G_SampleSummary.txt") self.input = { "project_id": "Proj_08390_G", "project_name": "Proj_08390_G", "project_short_name": "Proj_08390_G", "project_description": "project", "project_pi": "Dr. Jones", "request_pi": "Dr. Franklin", "is_impact": True, "argos_version_string": "2.x", "cancer_type": "MEL", "cancer_study_identifier": 'Proj_08390_G', "analysis_gene_cna_filename": "Proj_08390_G.gene.cna.txt", "analysis_mutations_filename": "Proj_08390_G.muts.maf", "analysis_mutations_share_filename": "Proj_08390_G.muts.share.maf", "analysis_segment_cna_filename": "Proj_08390_G.seg.cna.txt", "analysis_sv_filename": "Proj_08390_G.svs.maf", "cbio_meta_cna_segments_filename": "Proj_08390_G_meta_cna_hg19_seg.txt", "cbio_segment_data_filename": "Proj_08390_G_data_cna_hg19.seg", "helix_filter_version": "20.06.1", 'IMPACT_gene_list': { "path": IMPACT_FILE, "class": "File" }, "data_clinical_file": { "path": data_clinical_file, "class": "File" }, "sample_summary_file": { "path": sample_summary_file, "class": "File" }, "targets_list": { "path": DATA_SETS['Proj_08390_G']["targets_list"], "class": "File" }, "known_fusions_file": { "path": KNOWN_FUSIONS_FILE, "class": "File" }, "mutation_maf_files": [{ "path": os.path.join(DATA_SETS['Proj_08390_G']['MAF_DIR'], "Sample1.Sample2.muts.maf"), "class": "File" }], "mutation_svs_txt_files": [{ "path": os.path.join(DATA_SETS['Proj_08390_G']['MAF_DIR'], "Sample1.Sample2.svs.pass.vep.portal.txt"), "class": "File" }], "mutation_svs_maf_files": [{ "path": os.path.join(DATA_SETS['Proj_08390_G']['MAF_DIR'], "Sample1.Sample2.svs.pass.vep.maf"), "class": "File" }], "facets_hisens_cncf_files": [{ "path": os.path.join( DATA_SETS['Proj_08390_G']['FACETS_DIR'], "Sample2.rg.md.abra.printreads__Sample1.rg.md.abra.printreads_hisens.cncf.txt" ), "class": "File" }], "facets_hisens_seg_files": [{ "path": os.path.join( DATA_SETS['Proj_08390_G']['FACETS_DIR'], "Sample2.rg.md.abra.printreads__Sample1.rg.md.abra.printreads_hisens.seg" ), "class": "File" }], } output_json, output_dir = self.run_cwl() expected_output = { 'analysis_dir': ODir(name='analysis', items=[ OFile(name='Proj_08390_G.gene.cna.txt', size=87905, hash='7cc89d24556de93b9a409812317581e67e5df494'), OFile(name='Proj_08390_G.muts.maf', size=33243, hash='2c8904927a917d6e935ef207582d995680574d16'), OFile(name='Proj_08390_G.muts.share.maf', size=7462, hash='b5af4e0fcd89fecabf8095aa3d7690e5edb8dca1'), OFile(name='Proj_08390_G.seg.cna.txt', size=1632, hash='f0ebb82c34b6530447fa1e70b6dedcc039840d61'), OFile(name='Proj_08390_G.svs.maf', size=23603, hash='df420706bb5b772a79317843c0a01a3c88a9571d') ], dir=output_dir), 'portal_dir': ODir( name='portal', items=[ OFile(name='meta_clinical_sample.txt', size=140, hash='4c567d81c3b17a76c324fd3e2f73793a6e804f65'), OFile(name='data_clinical_patient.txt', size=643, hash='9417dcabddd6ab2cbe98167bccd9b9e4fa182562'), OFile(name='data_clinical_sample.txt', size=7592, hash='2a0c59593fa7726743b2fe46db9d955dbc625453'), OFile(name='meta_study.txt', size=152, hash='2b0a5fd1a97329adf7c3b1596c84cd6567059a95'), OFile(name='meta_clinical_patient.txt', size=142, hash='9cdc9a7e44a230c012f48b0236bdcf0bbc7de67f'), OFile(name='meta_CNA.txt', size=270, hash='a9bf16f6a0490b19e611e8814b85f7bf1d52417a'), OFile(name='meta_fusions.txt', size=227, hash='77649e888bafc6a4ed61261d1c46d2f238e1c32b'), OFile(name='meta_mutations_extended.txt', size=253, hash='fd04fcd0129b35bb8b8aaef57b2efa16b8f42e1d'), OFile(name='Proj_08390_G_meta_cna_hg19_seg.txt', size=200, hash='59b54d3cd81acdd9fc21df1dc05a71cebfbfe11e'), OFile(name='data_CNA.txt', size=5365, hash='931d82412733d7f93dd4117cd955f35e5dcbacc1'), OFile(name='data_CNA.ascna.txt', size=6164, hash='452d5ddef12a44693d5a98a05f5d300801734cfe'), OFile(name='data_mutations_extended.txt', size=5106, hash='e713516cf04750a3e3f1ef932b1c7202d4b75bf2'), OFile(name='Proj_08390_G_data_cna_hg19.seg', size=1632, hash='f0ebb82c34b6530447fa1e70b6dedcc039840d61'), OFile(name='data_fusions.txt', size=99, hash='c16f763b248813fcdde76f7486f1ddc4e9856038'), ODir( name='case_lists', items=[ OFile( name='cases_all.txt', size=616, hash='b9e43289cec5603b0886b5e8507c8d019387c125' ), OFile( name='cases_cnaseq.txt', size=696, hash='b87e2da8dce0fddbadec348efe2986519b2a794b' ), OFile( name='cases_cna.txt', size=628, hash='053481a8299e9430117f8e45e081aa7ec21033a6' ), OFile( name='cases_sequenced.txt', size=641, hash='ef9f5aef03c2527bf576470168660557ca1c7cc9' ) ]) ], dir=output_dir) } self.assertCWLDictEqual(output_json, expected_output) comments, mutations = load_mutations( os.path.join(output_dir, 'analysis', 'Proj_08390_G.muts.maf')) self.assertEqual(len(mutations), 22) comments, mutations = load_mutations( os.path.join(output_dir, 'portal', 'data_mutations_extended.txt')) self.assertEqual(len(mutations), 17) # load the data_CNA.txt file path = os.path.join(output_dir, 'portal/data_CNA.txt' ) # renamed from the data_CNA.scna.txt file ... with open(path) as f: header = next(f) header_parts = header.split() expected_header_parts = ['Hugo_Symbol', 's_C_VJ7F47_P001_d'] self.assertEqual(header_parts, expected_header_parts) path = os.path.join(output_dir, 'portal/data_CNA.ascna.txt') with open(path) as f: header = next(f) header_parts = header.split() expected_header_parts = ['Hugo_Symbol', 's_C_VJ7F47_P001_d'] self.assertEqual(header_parts, expected_header_parts)
def test_run_worflow_mixed_mafs(self): """ Test that the workflow works correctly when run with a mix of Argos muts.maf files and Facets Suite annotated maf files The Facets Suite maf files have extra columns that need to be retained in the output """ input_json = { "is_impact": True, "argos_version_string": "2.x", "analysis_gene_cna_filename": "Proj_08390_G.gene.cna.txt", "analysis_mutations_filename": "Proj_08390_G.muts.maf", "analysis_mutations_share_filename": "Proj_08390_G.muts.share.maf", "analysis_segment_cna_filename": "Proj_08390_G.seg.cna.txt", "analysis_sv_filename": "Proj_08390_G.svs.maf", "helix_filter_version": "20.06.1", "IMPACT_gene_list": { "class": "File", "path": IMPACT_FILE }, "targets_list": { "path": DATA_SETS['Proj_08390_G']["targets_list"], "class": "File" }, "mutation_maf_files": [{ "path": os.path.join(DATA_SETS['Proj_08390_G']['MAF_DIR'], "Sample1.Sample2.muts.maf"), "class": "File" }, { "path": os.path.join(DATA_SETS['Proj_08390_G']['FACETS_SUITE_DIR'], "Sample4.Sample3_hisens.ccf.portal.maf"), "class": "File" }], "mutation_svs_maf_files": [{ "path": os.path.join(DATA_SETS['Proj_08390_G']['MAF_DIR'], "Sample1.Sample2.svs.pass.vep.maf"), "class": "File" }, { "path": os.path.join(DATA_SETS['Proj_08390_G']['MAF_DIR'], "Sample4.Sample3.svs.pass.vep.maf"), "class": "File" }], "facets_hisens_cncf_files": [{ "path": os.path.join( DATA_SETS['Proj_08390_G']['FACETS_DIR'], "Sample2.rg.md.abra.printreads__Sample1.rg.md.abra.printreads_hisens.cncf.txt" ), "class": "File" }, { "path": os.path.join( DATA_SETS['Proj_08390_G']['FACETS_DIR'], "Sample3.rg.md.abra.printreads__Sample4.rg.md.abra.printreads_hisens.cncf.txt" ), "class": "File" }], "facets_hisens_seg_files": [{ "path": os.path.join( DATA_SETS['Proj_08390_G']['FACETS_DIR'], "Sample2.rg.md.abra.printreads__Sample1.rg.md.abra.printreads_hisens.seg" ), "class": "File" }, { "path": os.path.join( DATA_SETS['Proj_08390_G']['FACETS_DIR'], "Sample3.rg.md.abra.printreads__Sample4.rg.md.abra.printreads_hisens.seg" ), "class": "File" }] } with TemporaryDirectory() as tmpdir: output_json, output_dir = run_cwl(testcase=self, tmpdir=tmpdir, input_json=input_json, cwl_file=cwl_file) expected_output = { 'analysis_dir': { 'class': 'Directory', 'basename': 'analysis', 'location': 'file://' + os.path.join(output_dir, 'analysis'), 'path': os.path.join(output_dir, 'analysis'), 'listing': [{ 'location': 'file://' + os.path.join( output_dir, 'analysis/Proj_08390_G.gene.cna.txt'), 'basename': 'Proj_08390_G.gene.cna.txt', 'class': 'File', 'checksum': 'sha1$ab17d587ad5ae0a87fd6c6d4dd2d5d1701208ce9', 'size': 173982, 'path': os.path.join(output_dir, 'analysis/Proj_08390_G.gene.cna.txt') }, { 'location': 'file://' + os.path.join( output_dir, 'analysis/Proj_08390_G.muts.maf'), 'basename': 'Proj_08390_G.muts.maf', 'class': 'File', 'checksum': 'sha1$66a87cb8cc2eea31f490852d468bedd958c4ecc5', 'size': 59915, 'path': os.path.join(output_dir, 'analysis/Proj_08390_G.muts.maf') }, { 'location': 'file://' + os.path.join(output_dir, 'analysis/Proj_08390_G.muts.share.maf'), 'basename': 'Proj_08390_G.muts.share.maf', 'class': 'File', 'checksum': 'sha1$cbaa23bb848978cde135efd3870db8f35b3f2861', 'size': 10729, 'path': os.path.join(output_dir, 'analysis/Proj_08390_G.muts.share.maf') }, { 'location': 'file://' + os.path.join( output_dir, 'analysis/Proj_08390_G.seg.cna.txt'), 'basename': 'Proj_08390_G.seg.cna.txt', 'class': 'File', 'checksum': 'sha1$f6a77b280c047a7e2082e3a09e8138f861790d3a', 'size': 3191, 'path': os.path.join(output_dir, 'analysis/Proj_08390_G.seg.cna.txt') }, { 'location': 'file://' + os.path.join( output_dir, 'analysis/Proj_08390_G.svs.maf'), 'basename': 'Proj_08390_G.svs.maf', 'class': 'File', 'checksum': 'sha1$5c2a63fc01980550108e58079a8b689d53c97d8c', 'size': 35595, 'path': os.path.join(output_dir, 'analysis/Proj_08390_G.svs.maf') }] } } self.maxDiff = None self.assertDictEqual(output_json, expected_output) comments, mutations = load_mutations( os.path.join(output_dir, 'analysis', 'Proj_08390_G.muts.maf')) self.assertEqual(len(mutations), 34) colnames = mutations[0].keys() some_required_colnames = [ "ASCN.TOTAL_COPY_NUMBER", "ASCN.MINOR_COPY_NUMBER", "ASCN.EXPECTED_ALT_COPIES", "ASCN.CCF_EXPECTED_COPIES", "ASCN.CCF_EXPECTED_COPIES_LOWER", "ASCN.CCF_EXPECTED_COPIES_UPPER", "ASCN.ASCN_METHOD", "ASCN.ASCN_INTEGER_COPY_NUMBER" ] for colname in some_required_colnames: self.assertTrue( colname in colnames, "Column label {} not present in the mutation file. Missing columns: {}" .format(colname, [ c for c in some_required_colnames if c not in colnames ])) self.assertEqual(mutations[0]['t_af'], '0.42953020134228187') self.assertEqual(mutations[0]['is_in_impact'], 'True') self.assertEqual(mutations[0]['impact_assays'], 'IMPACT341,IMPACT410,IMPACT468,IMPACT505')
def test_run_worflow_one_maf(self): """ Test that the workflow works correctly when run with a single maf """ input_json = { "is_impact": True, "argos_version_string": "2.x", "analysis_gene_cna_filename": "Proj_08390_G.gene.cna.txt", "analysis_mutations_filename": "Proj_08390_G.muts.maf", "analysis_mutations_share_filename": "Proj_08390_G.muts.share.maf", "analysis_segment_cna_filename": "Proj_08390_G.seg.cna.txt", "analysis_sv_filename": "Proj_08390_G.svs.maf", "helix_filter_version": "20.06.1", "IMPACT_gene_list": { "class": "File", "path": IMPACT_FILE }, "targets_list": { "path": DATA_SETS['Proj_08390_G']["targets_list"], "class": "File" }, "mutation_maf_files": [{ "path": os.path.join(DATA_SETS['Proj_08390_G']['MAF_DIR'], "Sample1.Sample2.muts.maf"), "class": "File" }], "mutation_svs_maf_files": [{ "path": os.path.join(DATA_SETS['Proj_08390_G']['MAF_DIR'], "Sample1.Sample2.svs.pass.vep.maf"), "class": "File" }], "facets_hisens_cncf_files": [{ "path": os.path.join( DATA_SETS['Proj_08390_G']['FACETS_DIR'], "Sample2.rg.md.abra.printreads__Sample1.rg.md.abra.printreads_hisens.cncf.txt" ), "class": "File" }], "facets_hisens_seg_files": [{ "path": os.path.join( DATA_SETS['Proj_08390_G']['FACETS_DIR'], "Sample2.rg.md.abra.printreads__Sample1.rg.md.abra.printreads_hisens.seg" ), "class": "File" }], } with TemporaryDirectory() as tmpdir: output_json, output_dir = run_cwl(testcase=self, tmpdir=tmpdir, input_json=input_json, cwl_file=cwl_file) expected_output = { 'analysis_dir': { 'class': 'Directory', 'basename': 'analysis', 'location': 'file://' + os.path.join(output_dir, 'analysis'), 'path': os.path.join(output_dir, 'analysis'), 'listing': [{ 'location': 'file://' + os.path.join( output_dir, 'analysis/Proj_08390_G.gene.cna.txt'), 'basename': 'Proj_08390_G.gene.cna.txt', 'class': 'File', 'checksum': 'sha1$7cc89d24556de93b9a409812317581e67e5df494', 'size': 87905, 'path': os.path.join(output_dir, 'analysis/Proj_08390_G.gene.cna.txt') }, { 'location': 'file://' + os.path.join( output_dir, 'analysis/Proj_08390_G.muts.maf'), 'basename': 'Proj_08390_G.muts.maf', 'class': 'File', 'checksum': 'sha1$2c8904927a917d6e935ef207582d995680574d16', 'size': 33243, 'path': os.path.join(output_dir, 'analysis/Proj_08390_G.muts.maf') }, { 'location': 'file://' + os.path.join(output_dir, 'analysis/Proj_08390_G.muts.share.maf'), 'basename': 'Proj_08390_G.muts.share.maf', 'class': 'File', 'checksum': 'sha1$b5af4e0fcd89fecabf8095aa3d7690e5edb8dca1', 'size': 7462, 'path': os.path.join(output_dir, 'analysis/Proj_08390_G.muts.share.maf') }, { 'location': 'file://' + os.path.join( output_dir, 'analysis/Proj_08390_G.seg.cna.txt'), 'basename': 'Proj_08390_G.seg.cna.txt', 'class': 'File', 'checksum': 'sha1$f0ebb82c34b6530447fa1e70b6dedcc039840d61', 'size': 1632, 'path': os.path.join(output_dir, 'analysis/Proj_08390_G.seg.cna.txt') }, { 'location': 'file://' + os.path.join( output_dir, 'analysis/Proj_08390_G.svs.maf'), 'basename': 'Proj_08390_G.svs.maf', 'class': 'File', 'checksum': 'sha1$df420706bb5b772a79317843c0a01a3c88a9571d', 'size': 23603, 'path': os.path.join(output_dir, 'analysis/Proj_08390_G.svs.maf') }] } } self.maxDiff = None self.assertDictEqual(output_json, expected_output) comments, mutations = load_mutations( os.path.join(output_dir, 'analysis', 'Proj_08390_G.muts.maf')) self.assertEqual(len(mutations), 22)
def test_run_worflow_two_mafs(self): """ Test that the workflow works correctly when run with two maf files """ input_json = { "is_impact": True, "argos_version_string": "2.x", "analysis_gene_cna_filename": "Proj_08390_G.gene.cna.txt", "analysis_mutations_filename": "Proj_08390_G.muts.maf", "analysis_mutations_share_filename": "Proj_08390_G.muts.share.maf", "analysis_segment_cna_filename": "Proj_08390_G.seg.cna.txt", "analysis_sv_filename": "Proj_08390_G.svs.maf", "helix_filter_version": "20.06.1", "IMPACT_gene_list": { "class": "File", "path": IMPACT_FILE }, "targets_list": { "path": DATA_SETS['Proj_08390_G']["targets_list"], "class": "File" }, "mutation_maf_files": [{ "path": os.path.join(DATA_SETS['Proj_08390_G']['MAF_DIR'], "Sample1.Sample2.muts.maf"), "class": "File" }, { "path": os.path.join(DATA_SETS['Proj_08390_G']['MAF_DIR'], "Sample4.Sample3.muts.maf"), "class": "File" }], "mutation_svs_maf_files": [{ "path": os.path.join(DATA_SETS['Proj_08390_G']['MAF_DIR'], "Sample1.Sample2.svs.pass.vep.maf"), "class": "File" }, { "path": os.path.join(DATA_SETS['Proj_08390_G']['MAF_DIR'], "Sample4.Sample3.svs.pass.vep.maf"), "class": "File" }], "facets_hisens_cncf_files": [{ "path": os.path.join( DATA_SETS['Proj_08390_G']['FACETS_DIR'], "Sample2.rg.md.abra.printreads__Sample1.rg.md.abra.printreads_hisens.cncf.txt" ), "class": "File" }, { "path": os.path.join( DATA_SETS['Proj_08390_G']['FACETS_DIR'], "Sample3.rg.md.abra.printreads__Sample4.rg.md.abra.printreads_hisens.cncf.txt" ), "class": "File" }], "facets_hisens_seg_files": [{ "path": os.path.join( DATA_SETS['Proj_08390_G']['FACETS_DIR'], "Sample2.rg.md.abra.printreads__Sample1.rg.md.abra.printreads_hisens.seg" ), "class": "File" }, { "path": os.path.join( DATA_SETS['Proj_08390_G']['FACETS_DIR'], "Sample3.rg.md.abra.printreads__Sample4.rg.md.abra.printreads_hisens.seg" ), "class": "File" }] } with TemporaryDirectory() as tmpdir: output_json, output_dir = run_cwl(testcase=self, tmpdir=tmpdir, input_json=input_json, cwl_file=cwl_file) expected_output = { 'analysis_dir': { 'class': 'Directory', 'basename': 'analysis', 'location': 'file://' + os.path.join(output_dir, 'analysis'), 'path': os.path.join(output_dir, 'analysis'), 'listing': [{ 'location': 'file://' + os.path.join( output_dir, 'analysis/Proj_08390_G.gene.cna.txt'), 'basename': 'Proj_08390_G.gene.cna.txt', 'class': 'File', 'checksum': 'sha1$ab17d587ad5ae0a87fd6c6d4dd2d5d1701208ce9', 'size': 173982, 'path': os.path.join(output_dir, 'analysis/Proj_08390_G.gene.cna.txt') }, { 'location': 'file://' + os.path.join( output_dir, 'analysis/Proj_08390_G.muts.maf'), 'basename': 'Proj_08390_G.muts.maf', 'class': 'File', 'checksum': 'sha1$d4352ee2b702877b84db2b632972ccad2441f3e0', 'size': 54458, 'path': os.path.join(output_dir, 'analysis/Proj_08390_G.muts.maf') }, { 'location': 'file://' + os.path.join(output_dir, 'analysis/Proj_08390_G.muts.share.maf'), 'basename': 'Proj_08390_G.muts.share.maf', 'class': 'File', 'checksum': 'sha1$086ce6517eae68e47160c8740c5f00d7c3454110', 'size': 10956, 'path': os.path.join(output_dir, 'analysis/Proj_08390_G.muts.share.maf') }, { 'location': 'file://' + os.path.join( output_dir, 'analysis/Proj_08390_G.seg.cna.txt'), 'basename': 'Proj_08390_G.seg.cna.txt', 'class': 'File', 'checksum': 'sha1$f6a77b280c047a7e2082e3a09e8138f861790d3a', 'size': 3191, 'path': os.path.join(output_dir, 'analysis/Proj_08390_G.seg.cna.txt') }, { 'location': 'file://' + os.path.join( output_dir, 'analysis/Proj_08390_G.svs.maf'), 'basename': 'Proj_08390_G.svs.maf', 'class': 'File', 'checksum': 'sha1$5c2a63fc01980550108e58079a8b689d53c97d8c', 'size': 35595, 'path': os.path.join(output_dir, 'analysis/Proj_08390_G.svs.maf') }] } } self.maxDiff = None self.assertDictEqual(output_json, expected_output) comments, mutations = load_mutations( os.path.join(output_dir, 'analysis', 'Proj_08390_G.muts.maf')) self.assertEqual(len(mutations), 34)