def test_concat_simple_file_with_header(self): """ Test that a single file with no header comes out looking as expected """ with TemporaryDirectory() as tmpdir: # make a dummy file with some lines input_lines = ["#header", "foo", "bar", "baz"] input_file = os.path.join(tmpdir, "input.txt") with open(input_file, "w") as fout: for line in input_lines: fout.write(line + '\n') input_json = { "input_files": [{ "class": "File", "path": input_file }] } input_json_file = os.path.join(tmpdir, "input.json") with open(input_json_file, "w") as input_json_file_data: json.dump(input_json, input_json_file_data) output_dir = os.path.join(tmpdir, "output") tmp_dir = os.path.join(tmpdir, "tmp") cache_dir = os.path.join(tmpdir, "cache") command = [ "cwl-runner", *CWL_ARGS, "--outdir", output_dir, "--tmpdir-prefix", tmp_dir, "--cachedir", cache_dir, cwl_file, input_json_file ] returncode, proc_stdout, proc_stderr = run_command(command) if returncode != 0: print(proc_stderr) self.assertEqual(returncode, 0) output_json = json.loads(proc_stdout) # check the contents of the concatenated file; should be the same as the input output_file = output_json['output_file']['path'] with open(output_file) as fin: output_lines = [line.strip() for line in fin] self.assertEqual(output_lines, ["foo", "bar", "baz"]) expected_output = { 'output_file': { 'location': 'file://' + os.path.join(output_dir, 'output.txt'), 'basename': 'output.txt', 'class': 'File', 'checksum': 'sha1$0562f08aef399135936d6fb4eb0cc7bc1890d5b4', 'size': 12, 'path': os.path.join(output_dir, 'output.txt') } } self.assertDictEqual(output_json, expected_output)
def test_concat_two_mixed_files(self): """ Test that two files, one with a comment and one without, are concatenated correctly """ with TemporaryDirectory() as tmpdir: # make a dummy file with some lines input_lines1 = ["header1", "foo1", "bar1", "baz1"] input_file1 = os.path.join(tmpdir, "input1.txt") with open(input_file1, "w") as fout: for line in input_lines1: fout.write(line + '\n') input_lines2 = ["#comment2", "header2", "foo2", "bar2", "baz2"] input_file2 = os.path.join(tmpdir, "input2.txt") with open(input_file2, "w") as fout: for line in input_lines2: fout.write(line + '\n') input_json = { "input_files": [{ "class": "File", "path": input_file1 }, { "class": "File", "path": input_file2 }] } input_json_file = os.path.join(tmpdir, "input.json") with open(input_json_file, "w") as input_json_file_data: json.dump(input_json, input_json_file_data) output_dir = os.path.join(tmpdir, "output") tmp_dir = os.path.join(tmpdir, "tmp") cache_dir = os.path.join(tmpdir, "cache") command = [ "cwl-runner", *CWL_ARGS, "--outdir", output_dir, "--tmpdir-prefix", tmp_dir, "--cachedir", cache_dir, cwl_file, input_json_file ] returncode, proc_stdout, proc_stderr = run_command(command) if returncode != 0: print(proc_stderr) self.assertEqual(returncode, 0) output_json = json.loads(proc_stdout) # check the contents of the concatenated file; should be the same as the input output_file = output_json['output_file']['path'] with open(output_file) as fin: output_lines = [line.strip() for line in fin] self.assertEqual( output_lines, ["header1", "foo1", "bar1", "baz1", "foo2", "bar2", "baz2"]) expected_output = { 'output_file': { 'location': 'file://' + os.path.join(output_dir, 'output.txt'), 'basename': 'output.txt', 'class': 'File', 'checksum': 'sha1$acaa1f09ca0678b8b7c136ce776c04efb6890f6a', 'size': 38, 'path': os.path.join(output_dir, 'output.txt') } } self.assertDictEqual(output_json, expected_output)
def test_concat_three_files_with_comments(self): """ Test that a three files with headers are concatenated correctly Use three this time to clearly show the ordering of the output iteration """ with TemporaryDirectory() as tmpdir: # make a dummy file with some lines input_lines1 = ["#comment1", "header1", "foo1", "bar1", "baz1"] input_file1 = os.path.join(tmpdir, "input1.txt") with open(input_file1, "w") as fout: for line in input_lines1: fout.write(line + '\n') input_lines2 = ["#comment2", "header2", "foo2", "bar2", "baz2"] input_file2 = os.path.join(tmpdir, "input2.txt") with open(input_file2, "w") as fout: for line in input_lines2: fout.write(line + '\n') input_lines3 = ["#comment3", "header3", "foo3", "bar3", "baz3"] input_file3 = os.path.join(tmpdir, "input3.txt") with open(input_file3, "w") as fout: for line in input_lines3: fout.write(line + '\n') input_json = { "input_files": [{ "class": "File", "path": input_file1 }, { "class": "File", "path": input_file2 }, { "class": "File", "path": input_file3 }] } input_json_file = os.path.join(tmpdir, "input.json") with open(input_json_file, "w") as input_json_file_data: json.dump(input_json, input_json_file_data) output_dir = os.path.join(tmpdir, "output") tmp_dir = os.path.join(tmpdir, "tmp") cache_dir = os.path.join(tmpdir, "cache") command = [ "cwl-runner", *CWL_ARGS, "--outdir", output_dir, "--tmpdir-prefix", tmp_dir, "--cachedir", cache_dir, cwl_file, input_json_file ] returncode, proc_stdout, proc_stderr = run_command(command) if returncode != 0: print(proc_stderr) self.assertEqual(returncode, 0) output_json = json.loads(proc_stdout) # check the contents of the concatenated file; should be the same as the input output_file = output_json['output_file']['path'] with open(output_file) as fin: output_lines = [line.strip() for line in fin] expected_output_lines = [ "header1", "foo1", "bar1", "baz1", "foo2", "bar2", "baz2", "foo3", "bar3", "baz3" ] self.assertEqual(output_lines, expected_output_lines) # TODO: update this once the above ^^^ passes expected_output = { 'output_file': { 'location': 'file://' + os.path.join(output_dir, 'output.txt'), 'basename': 'output.txt', 'class': 'File', 'checksum': 'sha1$b115b7b40aa8a2e08e30a55abf60d742e05e62b4', 'size': 53, 'path': os.path.join(output_dir, 'output.txt') } } self.assertDictEqual(output_json, expected_output)
def test_concat1(self): """ Test concat when original file has a comment line """ with TemporaryDirectory() as tmpdir: # make a dummy file with some lines input_lines = ["# comment here", "HEADER", "foo", "bar", "baz"] input_file = os.path.join(tmpdir, "input.txt") with open(input_file, "w") as fout: for line in input_lines: fout.write(line + '\n') input_json = { "input_files": [{ "class": "File", "path": input_file }], "comment_label": "comment_label", "comment_value": "comment_value" } input_json_file = os.path.join(tmpdir, "input.json") with open(input_json_file, "w") as input_json_file_data: json.dump(input_json, input_json_file_data) output_dir = os.path.join(tmpdir, "output") tmp_dir = os.path.join(tmpdir, "tmp") cache_dir = os.path.join(tmpdir, "cache") command = [ "cwl-runner", *CWL_ARGS, "--outdir", output_dir, "--tmpdir-prefix", tmp_dir, "--cachedir", cache_dir, cwl_file, input_json_file ] returncode, proc_stdout, proc_stderr = run_command(command) if returncode != 0: print(proc_stderr) self.assertEqual(returncode, 0) output_json = json.loads(proc_stdout) # check the contents of the concatenated file; should be the same as the input output_file = output_json['output_file']['path'] with open(output_file) as fin: output_lines = [ line.strip() for line in fin ] expected_lines = ['# comment here', '#comment_label: comment_value', "HEADER", 'foo', 'bar', 'baz'] self.assertEqual(output_lines, expected_lines) expected_output = { 'output_file': { 'location': 'file://' + os.path.join(output_dir, 'output.txt'), 'basename': 'output.txt', 'class': 'File', 'checksum': 'sha1$14ee1247f314dba1e3c28aa8aec9ff7b137a1f41', 'size': 64, 'path': os.path.join(output_dir, 'output.txt') } } self.assertDictEqual(output_json, expected_output)
def test_concat_two_files_with_headers(self): """ Test that a single file with no header comes out looking as expected """ with TemporaryDirectory() as tmpdir: # make a dummy file with some lines input_lines1 = ["header", "foo1", "bar1", "baz1"] input_file1 = os.path.join(tmpdir, "input1.txt") with open(input_file1, "w") as fout: for line in input_lines1: fout.write(line + '\n') input_lines2 = ["header", "foo2", "bar2", "baz2"] input_file2 = os.path.join(tmpdir, "input2.txt") with open(input_file2, "w") as fout: for line in input_lines2: fout.write(line + '\n') input_json = { "input_files": [{ "class": "File", "path": input_file1 }, { "class": "File", "path": input_file2 }] } input_json_file = os.path.join(tmpdir, "input.json") with open(input_json_file, "w") as input_json_file_data: json.dump(input_json, input_json_file_data) output_dir = os.path.join(tmpdir, "output") tmp_dir = os.path.join(tmpdir, "tmp") cache_dir = os.path.join(tmpdir, "cache") command = [ "cwl-runner", *CWL_ARGS, "--outdir", output_dir, "--tmpdir-prefix", tmp_dir, "--cachedir", cache_dir, cwl_file, input_json_file ] returncode, proc_stdout, proc_stderr = run_command(command) if returncode != 0: print(proc_stderr) self.assertEqual(returncode, 0) output_json = json.loads(proc_stdout) # check the contents of the concatenated file; should be the same as the input output_file = output_json['output_file']['path'] with open(output_file) as fin: output_lines = [line.strip() for line in fin] self.assertEqual( output_lines, ["header", "foo1", "bar1", "baz1", "foo2", "bar2", "baz2"]) expected_output = { 'output_file': { 'location': 'file://' + os.path.join(output_dir, 'output.txt'), 'basename': 'output.txt', 'class': 'File', 'checksum': 'sha1$34c3af14e6d21e295f22c77ed5e837b60501bae7', 'size': 37, 'path': os.path.join(output_dir, 'output.txt') } } self.assertDictEqual(output_json, expected_output)
def test_concat2(self): """ Test concat when multiple files have comments """ with TemporaryDirectory() as tmpdir: # make a dummy file with some lines input_lines1 = ["# comment 1 here", "HEADER", "foo1", "bar1"] input_file1 = os.path.join(tmpdir, "input1.txt") with open(input_file1, "w") as fout: for line in input_lines1: fout.write(line + '\n') input_lines2 = ["# comment 2 here", "HEADER", "foo2", "bar2"] input_file2 = os.path.join(tmpdir, "input2.txt") with open(input_file2, "w") as fout: for line in input_lines2: fout.write(line + '\n') input_json = { "input_files": [ { "class": "File", "path": input_file1 }, { "class": "File", "path": input_file2 }, ], "comment_label": "comment_label", "comment_value": "comment_value" } input_json_file = os.path.join(tmpdir, "input.json") with open(input_json_file, "w") as input_json_file_data: json.dump(input_json, input_json_file_data) output_dir = os.path.join(tmpdir, "output") tmp_dir = os.path.join(tmpdir, "tmp") cache_dir = os.path.join(tmpdir, "cache") command = [ "cwl-runner", *CWL_ARGS, "--outdir", output_dir, "--tmpdir-prefix", tmp_dir, "--cachedir", cache_dir, cwl_file, input_json_file ] returncode, proc_stdout, proc_stderr = run_command(command) if returncode != 0: print(proc_stderr) self.assertEqual(returncode, 0) output_json = json.loads(proc_stdout) # check the contents of the concatenated file; should be the same as the input output_file = output_json['output_file']['path'] with open(output_file) as fin: output_lines = [ line.strip() for line in fin ] expected_lines = [ '# comment 1 here', '# comment 2 here', '#comment_label: comment_value', "HEADER", 'foo1', 'bar1', 'foo2', 'bar2' ] self.maxDiff = None self.assertEqual(output_lines, expected_lines) expected_output = { 'output_file': { 'location': 'file://' + os.path.join(output_dir, 'output.txt'), 'basename': 'output.txt', 'class': 'File', 'checksum': 'sha1$5dbce16f9bfef135d6b8288b16350351a33998f3', 'size': 91, 'path': os.path.join(output_dir, 'output.txt') } } self.assertDictEqual(output_json, expected_output)
def test_facets_workflow(self): """ """ # the smallest pair of bam files in the test dataset pair_maf = os.path.join(DATA_SETS['Proj_08390_G']['MAF_DIR'], "Sample24.Sample23.muts.maf") snp_pileup = os.path.join(DATA_SETS['Proj_08390_G']['SNP_PILEUP_DIR'], "Sample24.Sample23.snp_pileup.gz") input_json = { "pairs": [ { "pair_maf": { "path": pair_maf, "class": "File" }, "snp_pileup": { "path": snp_pileup, "class": "File" }, "pair_id": "Sample24.Sample23", "normal_id": "Sample23", "tumor_id": "Sample24" } ] } with TemporaryDirectory() as tmpdir: input_json_file = os.path.join(tmpdir, "input.json") with open(input_json_file, "w") as json_out: json.dump(input_json, json_out) output_dir = os.path.join(tmpdir, "output") tmp_dir = os.path.join(tmpdir, "tmp") cache_dir = os.path.join(tmpdir, "cache") command = [ "cwl-runner", *CWL_ARGS, "--outdir", output_dir, "--tmpdir-prefix", tmp_dir, "--cachedir", cache_dir, cwl_file, input_json_file ] returncode, proc_stdout, proc_stderr = run_command(command) if returncode != 0: print(proc_stderr) self.assertEqual(returncode, 0) output_json = json.loads(proc_stdout) self.maxDiff = None expected_output = { 'annotated_maf': [{ 'location': 'file://' + os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24.Sample23_hisens.ccf.portal.maf'), 'basename': 'Sample24.Sample23_hisens.ccf.portal.maf', 'class': 'File', 'checksum': 'sha1$d91a8e15c66429b09f1b7db41bc38bdfa0b84c64', 'size': 11996607, 'path': os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24.Sample23_hisens.ccf.portal.maf') }], 'arm_level_txt': [{ 'location': 'file://' + os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24.arm_level.txt'), 'basename': 'Sample24.arm_level.txt', 'class': 'File', 'checksum': 'sha1$df37c54ae4969257e436a7a7a595c42ef19ecbb5', 'size': 1824, 'path': os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24.arm_level.txt') }], 'facets_txt': [{ 'location': 'file://' + os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24.txt'), 'basename': 'Sample24.txt', 'class': 'File', 'checksum': 'sha1$a0fb3df832efc18a66a8a54e5609666da5f4d7d7', 'size': 529, 'path': os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24.txt') }], 'failed_pairs': [], 'gene_level_txt': [{ 'location': 'file://' + os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24.gene_level.txt'), 'basename': 'Sample24.gene_level.txt', 'class': 'File', 'checksum': 'sha1$4e916a52458151007486bf536acfff539fdc2ecc', 'size': 148195, 'path': os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24.gene_level.txt') }], 'hisens_rds': [{ 'location': 'file://' + os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24_hisens.rds'), 'basename': 'Sample24_hisens.rds', 'class': 'File', 'checksum': 'sha1$6bfd6c7f29c49ec8ef538dd468a3b4626b05bda2', 'size': 213986, 'path': os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24_hisens.rds') }], 'output_dir': { 'basename': 'facets', 'class': 'Directory', 'listing': [ { 'basename': 'Sample24.Sample23', 'class': 'Directory', 'listing': [ { 'basename': 'Sample24.Sample23_hisens.ccf.portal.maf', 'checksum': 'sha1$d91a8e15c66429b09f1b7db41bc38bdfa0b84c64', 'class': 'File', 'location': 'file://' + os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24.Sample23_hisens.ccf.portal.maf'), 'path': os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24.Sample23_hisens.ccf.portal.maf'), 'size': 11996607 }, { 'basename': 'Sample24.arm_level.txt', 'checksum': 'sha1$df37c54ae4969257e436a7a7a595c42ef19ecbb5', 'class': 'File', 'location': 'file://' + os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24.arm_level.txt'), 'path': os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24.arm_level.txt'), 'size': 1824 }, { 'basename': 'Sample24.txt', 'checksum': 'sha1$a0fb3df832efc18a66a8a54e5609666da5f4d7d7', 'class': 'File', 'location': 'file://' + os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24.txt'), 'path': os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24.txt'), 'size': 529 }, { 'basename': 'Sample24.gene_level.txt', 'checksum': 'sha1$4e916a52458151007486bf536acfff539fdc2ecc', 'class': 'File', 'location': 'file://' + os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24.gene_level.txt'), 'path': os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24.gene_level.txt'), 'size': 148195 }, { 'basename': 'Sample24_hisens.cncf.txt', 'checksum': 'sha1$db9131a33889a1cac82e3bd6b3f0e5e182c65105', 'class': 'File', 'location': 'file://' + os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24_hisens.cncf.txt'), 'path': os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24_hisens.cncf.txt'), 'size': 5238 }, { 'basename': 'Sample24_hisens.rds', 'checksum': 'sha1$6bfd6c7f29c49ec8ef538dd468a3b4626b05bda2', 'class': 'File', 'location': 'file://' + os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24_hisens.rds'), 'path': os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24_hisens.rds'), 'size': 213986 }, { 'basename': 'Sample24_hisens.seg', 'checksum': 'sha1$652f9c6d0245af49bac6ca67a089af7d4e46801b', 'class': 'File', 'location': 'file://' + os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24_hisens.seg'), 'path': os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24_hisens.seg'), 'size': 1897 }, { 'basename': 'Sample24_purity.rds', 'checksum': 'sha1$dd8b967f84b191ff76214c6110db8d0e65f6514c', 'class': 'File', 'location': 'file://' + os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24_purity.rds'), 'path': os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24_purity.rds'), 'size': 213356 }, { 'basename': 'Sample24_purity.seg', 'checksum': 'sha1$591e6d8b432e1e910fe4fb4b1814508131f960c9', 'class': 'File', 'location': 'file://' + os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24_purity.seg'), 'path': os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24_purity.seg'), 'size': 1285 }, { 'basename': 'Sample24.qc.txt', 'checksum': 'sha1$d4a36726a5fcb7b268aae02d97ce4e382e42d9f6', 'class': 'File', 'location': 'file://' + os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24.qc.txt'), 'path': os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24.qc.txt'), 'size': 1339} ], 'location': 'file://' + os.path.join(output_dir, 'facets/Sample24.Sample23'), 'path': os.path.join(output_dir, 'facets/Sample24.Sample23') } ], 'location': 'file://' + os.path.join(output_dir, 'facets'), 'path': os.path.join(output_dir, 'facets') }, 'hisens_seg': [{ 'location': 'file://' + os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24_hisens.seg'), 'basename': 'Sample24_hisens.seg', 'class': 'File', 'checksum': 'sha1$652f9c6d0245af49bac6ca67a089af7d4e46801b', 'size': 1897, 'path': os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24_hisens.seg') }], 'purity_rds': [{ 'location': 'file://' + os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24_purity.rds'), 'basename': 'Sample24_purity.rds', 'class': 'File', 'checksum': 'sha1$dd8b967f84b191ff76214c6110db8d0e65f6514c', 'size': 213356, 'path': os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24_purity.rds') }], 'purity_seg': [{ 'location': 'file://' + os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24_purity.seg'), 'basename': 'Sample24_purity.seg', 'class': 'File', 'checksum': 'sha1$591e6d8b432e1e910fe4fb4b1814508131f960c9', 'size': 1285, 'path': os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24_purity.seg') }], 'qc_txt': [{ 'location': 'file://' + os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24.qc.txt'), 'basename': 'Sample24.qc.txt', 'class': 'File', 'checksum': 'sha1$d4a36726a5fcb7b268aae02d97ce4e382e42d9f6', 'size': 1339, 'path': os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24.qc.txt') }], 'hisens_cncf_txt': [{ 'basename': 'Sample24_hisens.cncf.txt', 'checksum': 'sha1$db9131a33889a1cac82e3bd6b3f0e5e182c65105', 'class': 'File', 'location': 'file://' + os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24_hisens.cncf.txt'), 'path': os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24_hisens.cncf.txt'), 'size': 5238 }], } self.assertDictEqual(output_json, expected_output)
def test_run_facets_wrapper(self): """ """ input_maf = os.path.join(DATA_SETS['Proj_08390_G']['MAF_DIR'], "Sample1.Sample2.muts.maf") input_rds = os.path.join(DATA_SETS['Proj_08390_G']['FACETS_SUITE_DIR'], "Sample1_hisens.rds") input_json = { "maf_file": { "path": input_maf, "class": "File" }, "facets_rds": { "path": input_rds, "class": "File" }, "output_filename": "Sample1_hisens.ccf.maf" } with TemporaryDirectory() as tmpdir: input_json_file = os.path.join(tmpdir, "input.json") with open(input_json_file, "w") as json_out: json.dump(input_json, json_out) output_dir = os.path.join(tmpdir, "output") tmp_dir = os.path.join(tmpdir, "tmp") cache_dir = os.path.join(tmpdir, "cache") command = [ "cwl-runner", *CWL_ARGS, "--outdir", output_dir, "--tmpdir-prefix", tmp_dir, "--cachedir", cache_dir, cwl_file, input_json_file ] returncode, proc_stdout, proc_stderr = run_command(command) if returncode != 0: print(proc_stderr) self.assertEqual(returncode, 0) output_json = json.loads(proc_stdout) expected_output = { 'failed_txt': None, 'output_file': { 'location': 'file://' + os.path.join(output_dir, 'Sample1_hisens.ccf.maf'), 'basename': 'Sample1_hisens.ccf.maf', 'class': 'File', 'checksum': 'sha1$7e478a8a44d27735f26e368989c672ed6ef5d52a', 'size': 19217199, 'path': os.path.join(output_dir, 'Sample1_hisens.ccf.maf') }, 'stderr_txt': { 'basename': 'annotate_maf_stderr.txt', 'checksum': 'sha1$2e672f99c23a2d827c1d33e06377870cdd9c8090', 'class': 'File', 'location': 'file://' + os.path.join(output_dir, 'annotate_maf_stderr.txt'), 'path': os.path.join(output_dir, 'annotate_maf_stderr.txt'), 'size': 105 }, 'stdout_txt': { 'basename': 'annotate_maf_stdout.txt', 'checksum': 'sha1$da39a3ee5e6b4b0d3255bfef95601890afd80709', 'class': 'File', 'location': 'file://' + os.path.join(output_dir, 'annotate_maf_stdout.txt'), 'path': os.path.join(output_dir, 'annotate_maf_stdout.txt'), 'size': 0 } } self.maxDiff = None self.assertDictEqual(output_json, expected_output)
def test_run_copy_number_one_file(self): """ Test that Facets geneLevel copy number analysis step runs as expected with a single input file """ with TemporaryDirectory() as tmpdir: output_dir = os.path.join(tmpdir, "output") input_json = { "output_cna_filename": "data_CNA.txt", "targets_list" : { "class": "File", "path": DATA_SETS['Proj_08390_G']['targets_list'], }, "hisens_cncfs": [ { "class": "File", "path": os.path.join(DATA_SETS['Proj_08390_G']['FACETS_DIR'], "Sample2.rg.md.abra.printreads__Sample1.rg.md.abra.printreads_hisens.cncf.txt") } ], } input_json_file = os.path.join(tmpdir, "input.json") with open(input_json_file, "w") as input_json_file_data: json.dump(input_json, input_json_file_data) output_dir = os.path.join(tmpdir, "output") tmp_dir = os.path.join(tmpdir, "tmp") cache_dir = os.path.join(tmpdir, "cache") command = [ "cwl-runner", *CWL_ARGS, "--outdir", output_dir, "--tmpdir-prefix", tmp_dir, "--cachedir", cache_dir, cwl_file, input_json_file ] returncode, proc_stdout, proc_stderr = run_command(command) if returncode != 0: print(proc_stderr) self.assertEqual(returncode, 0) output_json = json.loads(proc_stdout) expected_output = { 'output_cna_file': { 'location': 'file://' + os.path.join(output_dir, "data_CNA.txt"), 'basename': "data_CNA.txt", 'class': 'File', 'checksum': 'sha1$7cc89d24556de93b9a409812317581e67e5df494', 'size': 87905, 'path': os.path.join(output_dir, "data_CNA.txt") }, 'output_cna_ascna_file': { 'location': 'file://' + os.path.join(output_dir, "data_CNA.ascna.txt"), 'basename': "data_CNA.ascna.txt", 'class': 'File', 'checksum': 'sha1$452d5ddef12a44693d5a98a05f5d300801734cfe', 'size': 6164, 'path': os.path.join(output_dir, "data_CNA.ascna.txt") }, 'output_cna_scna_file': { 'location': 'file://' + os.path.join(output_dir, "data_CNA.scna.txt"), 'basename': "data_CNA.scna.txt", 'class': 'File', 'checksum': 'sha1$8bec923ab1d622b4cf38ae042ac2416725650aed', 'size': 5463, 'path': os.path.join(output_dir, "data_CNA.scna.txt") } } self.maxDiff = None self.assertDictEqual(output_json, expected_output) # load the data_CNA.txt file path = output_json['output_cna_file']['path'] with open(path) as f: header = next(f) header_parts = header.split() expected_header_parts = ['Tumor_Sample_Barcode', 'Hugo_Symbol', 'tcn', 'lcn', 'cf', 'tcn.em', 'lcn.em', 'cf.em', 'chr', 'seg.start', 'seg.end', 'frac_elev_major_cn', 'Nprobes', 'WGD', 'purity', 'FACETS_CALL', 'ccs_filter', 'review', 'FACETS_CNA'] self.assertEqual(header_parts, expected_header_parts) path = output_json['output_cna_ascna_file']['path'] with open(path) as f: header = next(f) header_parts = header.split() expected_header_parts = ['Hugo_Symbol', 's_C_VJ7F47_P001_d'] self.assertEqual(header_parts, expected_header_parts) path = output_json['output_cna_scna_file']['path'] with open(path) as f: header = next(f) header_parts = header.split() expected_header_parts = ['Hugo_Symbol', 's_C_VJ7F47_P001_d'] self.assertEqual(header_parts, expected_header_parts)
def test_run_copy_number_two_files(self): """ Test that Facets geneLevel copy number analysis step runs as expected with two input files """ with TemporaryDirectory() as tmpdir: output_dir = os.path.join(tmpdir, "output") input_json = { "portal_CNA_file": "data_CNA.txt", "targets_list" : { "class": "File", "path": DATA_SETS['Proj_08390_G']['targets_list'], }, "hisens_cncfs": [ { "class": "File", "path": os.path.join(DATA_SETS['Proj_08390_G']['FACETS_DIR'], "Sample2.rg.md.abra.printreads__Sample1.rg.md.abra.printreads_hisens.cncf.txt") }, { "class": "File", "path": os.path.join(DATA_SETS['Proj_08390_G']['FACETS_DIR'], "Sample9.rg.md.abra.printreads__Sample10.rg.md.abra.printreads_hisens.cncf.txt") } ], } input_json_file = os.path.join(tmpdir, "input.json") with open(input_json_file, "w") as input_json_file_data: json.dump(input_json, input_json_file_data) output_dir = os.path.join(tmpdir, "output") tmp_dir = os.path.join(tmpdir, "tmp") cache_dir = os.path.join(tmpdir, "cache") command = [ "cwl-runner", *CWL_ARGS, "--outdir", output_dir, "--tmpdir-prefix", tmp_dir, "--cachedir", cache_dir, cwl_file, input_json_file ] returncode, proc_stdout, proc_stderr = run_command(command) if returncode != 0: print(proc_stderr) self.assertEqual(returncode, 0) output_json = json.loads(proc_stdout) expected_output = { 'output_cna_file': { 'location': 'file://' + os.path.join(output_dir, "data_CNA.txt"), 'basename': "data_CNA.txt", 'class': 'File', 'checksum': 'sha1$6dfa53b8a0fad1156060476bcf445d959f0e6eb2', 'size': 143118, 'path': os.path.join(output_dir, "data_CNA.txt") }, 'output_cna_ascna_file': { 'location': 'file://' + os.path.join(output_dir, "data_CNA.ascna.txt"), 'basename': "data_CNA.ascna.txt", 'class': 'File', 'checksum': 'sha1$3953e55b3db85b69209982211c53b9d8f049dc01', 'size': 8658, 'path': os.path.join(output_dir, "data_CNA.ascna.txt") }, 'output_cna_scna_file': { 'location': 'file://' + os.path.join(output_dir, "data_CNA.scna.txt"), 'basename': "data_CNA.scna.txt", 'class': 'File', 'checksum': 'sha1$9ddcee42cce0d49aec5745303be480b6c4ef0fe8', 'size': 6937, 'path': os.path.join(output_dir, "data_CNA.scna.txt") } } self.maxDiff = None self.assertDictEqual(output_json, expected_output)