class TestSnpPileupCWL(PlutoTestCase): cwl_file = CWLFile('snp-pileup-wrapper.cwl') def test_snp_pileup1(self): """ """ self.input = { "snps_vcf": { "path": FACETS_SNPS_VCF, "class": "File" }, # the smallest pair of bam files in the test dataset "normal_bam": { "path": os.path.join(DATA_SETS['Proj_08390_G']['BAM_DIR'], "Sample23.rg.md.abra.printreads.bam"), "class": "File" }, "tumor_bam": { "path": os.path.join(DATA_SETS['Proj_08390_G']['BAM_DIR'], "Sample24.rg.md.abra.printreads.bam"), "class": "File" }, "output_prefix": "Sample24.Sample23" } output_json, output_dir = self.run_cwl() expected_output = { 'output_file': OFile(name = "Sample24.Sample23.snp_pileup.gz", size = 34851004, hash = "755a8b64f45c819b4e2c481e64bf2fe36d1f5361", dir = output_dir) } self.maxDiff = None self.assertCWLDictEqual(output_json, expected_output)
class TestFusionFilter(PlutoTestCase): cwl_file = CWLFile('fusion_filter.cwl') def test_fusion_filter1(self): """ """ fusion_file = os.path.join(DATA_SETS['Proj_08390_G']['MAF_DIR'], "Sample1.Sample2.svs.pass.vep.portal.txt") self.input = { "fusions_file": { "class": "File", "path": fusion_file }, "output_filename": "data_fusions.txt", "known_fusions_file": { "class": "File", "path": KNOWN_FUSIONS_FILE } } output_json, output_dir = self.run_cwl() expected_output = { "output_file": OFile(name='data_fusions.txt', size=99, hash='c16f763b248813fcdde76f7486f1ddc4e9856038', dir=output_dir) } self.assertCWLDictEqual(output_json, expected_output)
class TestFusionToSV(PlutoTestCase): cwl_file = CWLFile('fusion_to_sv.cwl') def test_fusion_to_sv(self): """ Test fusion to sv conversion """ fusion_file = os.path.join(DATA_SETS['Proj_08390_G']['MAF_DIR'], "Sample1.Sample2.svs.pass.vep.portal.txt") self.input = { "fusion_file": { "class": "File", "path": fusion_file }, "output_filename": "data_SV.txt" } output_json, output_dir = self.run_cwl() expected_output = { 'output_file': OFile( name='data_SV.txt', size=1103, hash='02fda70b7838931321544f6797de4782adaf1a46', dir=output_dir) } self.maxDiff = None strip_related_keys = [ ('basename', 'report.html', ['size', 'checksum']) ] self.assertCWLDictEqual(output_json, expected_output, related_keys = strip_related_keys)
class TestMaf2VcfGz(PlutoTestCase): cwl_file = CWLFile('maf2vcf_gz_workflow.cwl') def test_convert_maf_to_vcf(self): """ Convert a single input maf file into a .vcf.gz with a .tbi """ self.maxDiff = None self.input = { "maf_file": { "class": "File", "path": os.path.join(self.DATA_SETS['Proj_08390_G']['MAF_DIR'], "Sample4.Sample3.muts.maf") }, "ref_fasta": { "class": "File", "path": self.DATA_SETS['Proj_08390_G']['REF_FASTA'] }, } output_json, output_dir = self.run_cwl() # do not include size and checksum since they are not consistent with .gz output_json['output_file'].pop('checksum') output_json['output_file'].pop('size') output_json['output_file']['secondaryFiles'][0].pop('checksum') output_json['output_file']['secondaryFiles'][0].pop('size') expected_output = { 'output_file': { 'location': 'file://' + os.path.join(output_dir, 'variants.vcf.gz'), 'basename': 'variants.vcf.gz', 'class': 'File', # 'checksum': 'sha1$dcda65da7665683dcf97ed9c3989fd75c7a839c8', # 'size': 14885, 'secondaryFiles': [{ 'basename': 'variants.vcf.gz.tbi', 'location': 'file://' + os.path.join(output_dir, 'variants.vcf.gz.tbi'), 'class': 'File', # 'checksum': 'sha1$c537928ca8bd7f33d19302d42c84ed6370687fca', # 'size': 9320, 'path': os.path.join(output_dir, 'variants.vcf.gz.tbi') }], 'path': os.path.join(output_dir, 'variants.vcf.gz') } } self.assertCWLDictEqual(output_json, expected_output)
class TestMergeBed(PlutoTestCase): cwl_file = CWLFile('mergebed.cwl') def test_merge_bed(self): lines1 = [ '1\t118166398\t118166398\n', '5\t35876484\t35876484\n', '17\t11998935\t11998935\n', '20\t62321135\t62321135\n' ] lines2 = [ '5\t35876484\t35876484\n', # in common '7\t116418998\t116418998\n', '7\t151845367\t151845367\n', '17\t11998935\t11998935\n', # in common ] bed1 = os.path.join(self.tmpdir, "1.bed") bed2 = os.path.join(self.tmpdir, "2.bed") with open(bed1, "w") as fout: for line in lines1: fout.write(line) with open(bed2, "w") as fout: for line in lines2: fout.write(line) self.input = { "bed_files": [{ "class": "File", "path": bed1 }, { "class": "File", "path": bed2 }] } output_json, output_dir = self.run_cwl() expected_output = { 'output_file': { 'location': 'file://' + os.path.join(output_dir, 'merged.bed'), 'basename': 'merged.bed', 'class': 'File', 'checksum': 'sha1$638f9f3ef43802b8e372c3cef3848b16f2af1c66', 'size': 149, 'path': os.path.join(output_dir, 'merged.bed') } } self.assertCWLDictEqual(output_json, expected_output) output_file = os.path.join(output_dir, 'merged.bed') with open(output_file) as fin: lines = [line for line in fin] expected_lines = [ '1\t118166398\t118166398\n', '5\t35876484\t35876484\n', '17\t11998935\t11998935\n', '20\t62321135\t62321135\n', '7\t116418998\t116418998\n', '7\t151845367\t151845367\n', '17\t11998935\t11998935\n' ] self.assertEqual(lines, expected_lines)
class TestExampleWorkflow(PlutoTestCase): cwl_file = CWLFile('example_workflow.cwl') def test_example_workflow(self): """ Test case for the example workflow """ self.maxDiff = None self.input = { 'value': "ABC", "samples": [{ "sample_id": "1" }, { "sample_id": "2" }] } output_json, output_dir = self.run_cwl() expected_output = { "output_file": { "location": "file://" + os.path.join(output_dir, "output.concat.tsv"), "basename": "output.concat.tsv", "class": "File", "checksum": "sha1$be6fb2e96f81c63a0b5fc6392a317ba3afbbca19", "size": 30, "path": os.path.join(output_dir, "output.concat.tsv") }, 'env': { 'basename': 'env.txt', # 'checksum': 'sha1$e2f2bf6581461560dc9d4c4c970b5b7b1ba15852', 'class': 'File', 'location': "file://" + os.path.join(output_dir, "env.txt"), 'path': os.path.join(output_dir, 'env.txt') # 'size': 456 } } output_json['env'].pop('checksum') output_json['env'].pop('size') self.assertDictEqual(output_json, expected_output) output_file = output_json['output_file']['path'] with open(output_file) as f: lines = [l.strip() for l in f] expected_lines = [ 'SampleID\tValue', '1\tABC', '2\tABC', ] self.assertEqual(lines, expected_lines)
class LsCWL(Operator): cwl_file = CWLFile('ls.cwl') def __init__(self, **kwargs): super().__init__(**kwargs) self.input = {} if 'input_files' in self.args: if len(self.args['input_files']) > 0: self.input = generate_input(self.args, list_File_keys=['input_files'])
class ExampleWorkflow(Operator): cwl_file = CWLFile('example_workflow.cwl') def __init__(self, **kwargs): super().__init__(**kwargs) self.generate_input_data() def generate_input_data(self): input_args = copy.deepcopy(self.args) self.input = {'value': input_args['value'], 'samples': []} for sample in input_args['sampleIDs']: self.input['samples'].append({'sample_id': sample})
class ConsensusMaf(Operator): cwl_file = CWLFile('consensus_maf.cwl') def __init__(self, **kwargs): super().__init__(**kwargs) if self.args.get('maf_files_list', False): self.args['maf_files'] = self.args.pop('maf_files_list') self.input = generate_input(self.args, array_File_keys=['maf_files']) else: self.input = generate_input(self.args, list_File_keys=['maf_files'])
class TestAddMSIStatus(PlutoTestCase): cwl_file = CWLFile('add_msi_status.cwl') def test_add_msi_status(self): """ Test case for adding the MSI Status label to a table """ lines1 = [[ 'Total_Number_of_Sites', 'Number_of_Somatic_Sites', 'MSI_SCORE', 'SAMPLE_ID' ], ['123', '987', '11', 'Sample1-T'], ['456', '654', '2', 'Sample2-T'], ['789', '321', '5', 'Sample3-T']] tmb_file = self.write_table(self.tmpdir, filename="msi.tsv", lines=lines1) self.input = { 'input_filename': { "class": "File", "path": tmb_file }, 'output_filename': 'output.tsv', 'header': 'MSI_STATUS' } output_json, output_dir = self.run_cwl() expected_output = { 'output_file': { 'location': 'file://' + os.path.join(output_dir, 'output.tsv'), 'basename': 'output.tsv', 'class': 'File', 'checksum': 'sha1$ea9eb5b227f25bd03a50fbcec282259e89d176ac', 'size': 168, 'path': os.path.join(output_dir, 'output.tsv') } } self.assertCWLDictEqual(output_json, expected_output) output_file = expected_output['output_file']['path'] lines = self.read_table(output_file) expected_lines = [[ 'Total_Number_of_Sites', 'Number_of_Somatic_Sites', 'MSI_SCORE', 'SAMPLE_ID', 'MSI_STATUS' ], ['123', '987', '11', 'Sample1-T', 'Instable'], ['456', '654', '2', 'Sample2-T', 'Stable'], ['789', '321', '5', 'Sample3-T', 'Indeterminate']] self.assertEqual(lines, expected_lines)
class ConcatTablesDirCWL(Operator): cwl_file = CWLFile('concat-tables_dir.cwl') def __init__(self, **kwargs): super().__init__(**kwargs) if self.args.get('input_files_list', False): self.args['input_files'] = self.args.pop('input_files_list') self.input = generate_input(self.args, array_File_keys=['input_files'], bool_keys=['comments']) else: self.input = generate_input(self.args, list_File_keys=['input_files'], bool_keys=['comments'])
class TestExampleWorkflow(PlutoTestCase): cwl_file = CWLFile('example_workflow.cwl') def test_example_workflow(self): """ Test case for the example workflow """ self.maxDiff = None self.input = { 'value': "ABC", "samples": [{ "sample_id": "1" }, { "sample_id": "2" }] } output_json, output_dir = self.run_cwl() expected_output = { "output_file": OFile(name='output.concat.tsv', hash='d4297dfdad25ac92ffae2ce61c6cfe12c4089c28', size=27, dir=output_dir), 'env': OFile(name='env.txt', dir=output_dir) } strip_related_keys = [ ('basename', 'env.txt', ['size', 'checksum']), ] self.assertCWLDictEqual(output_json, expected_output, related_keys=strip_related_keys) output_file = os.path.join(output_dir, "output.concat.tsv") with open(output_file) as f: lines = [l.strip() for l in f] expected_lines = [ 'SampleID\tValue', '1\tABC', '2\tABC', ] self.assertEqual(lines, expected_lines)
class TestFilterUncalledMutations(PlutoTestCase): cwl_file = CWLFile('filterUncalledMutations.cwl') def test_1(self): maf_file = os.path.join(DATA_SETS["Fillout01"]["OUTPUT_DIR"], "output.maf") comments, mutations = self.load_mutations(maf_file, strip=True) self.assertEqual(len(mutations), 475) self.input = {"input_file": {"class": "File", "path": maf_file}} output_json, output_dir = self.run_cwl() output_data_mutations_extended = os.path.join( output_dir, 'data_mutations_extended.txt') output_data_mutations_uncalled = os.path.join( output_dir, 'data_mutations_uncalled.txt') expected_output = { 'called_file': OFile(name='data_mutations_extended.txt', dir=output_dir, hash='e7430656d9fcbce36fa57eb92460db57742168ae', size=347254), 'uncalled_file': OFile(name='data_mutations_uncalled.txt', dir=output_dir, hash='58129786cc299011202eb078734b3ff513d54081', size=287883), } self.maxDiff = None self.assertCWLDictEqual(output_json, expected_output) comments, mutations = self.load_mutations( output_data_mutations_extended, strip=True) self.assertEqual(len(mutations), 253) comments, mutations = self.load_mutations( output_data_mutations_uncalled, strip=True) self.assertEqual(len(mutations), 222)
class TestReplaceColname(PlutoTestCase): cwl_file = CWLFile('replace_colname.cwl') def setUp(self): # initialize the tmpdir super().setUp() lines = [["A", "%", "C"], ["1", "2", "3"], ["foo", "bar", "baz"]] self.table = self.write_table(self.tmpdir, filename="table.tsv", lines=lines) def test_change_colnames(self): self.maxDiff = None self.input = { "old_name": "%", "new_name": "pcnt", "input_file": { "class": "File", "path": self.table } } output_json, output_dir = self.run_cwl() expected_output = { 'output_file': { 'location': 'file://' + os.path.join(output_dir, 'output.tsv'), 'basename': 'output.tsv', 'class': 'File', 'checksum': 'sha1$dfe58e029a2c3262dbdf4270b0b5af6bf420589e', 'size': 27, 'path': os.path.join(output_dir, 'output.tsv') } } self.assertCWLDictEqual(output_json, expected_output) output_file = os.path.join(output_dir, 'output.tsv') lines = self.read_table(output_file) expected_lines = [['A', 'pcnt', 'C'], ['1', '2', '3'], ['foo', 'bar', 'baz']] self.assertEqual(lines, expected_lines)
class TestRunFacetsWrapperCWL(PlutoTestCase): cwl_file = CWLFile('run-facets-wrapper.cwl') def test_run_facets_wrapper(self): """ """ self.input = { "snp_pileup": { "path": os.path.join(DATA_SETS['Proj_08390_G']['SNP_PILEUP_DIR'], "Sample24.Sample23.snp_pileup.gz"), "class": "File" }, "sample_id": "Sample24" } output_json, output_dir = self.run_cwl() expected_output = { 'failed': False, 'arm_level_txt': OFile(name = "Sample24.arm_level.txt", hash = "df37c54ae4969257e436a7a7a595c42ef19ecbb5", size = 1824, dir = output_dir), 'gene_level_txt': OFile(name = "Sample24.gene_level.txt", hash = "4e916a52458151007486bf536acfff539fdc2ecc", size = 148195, dir = output_dir), 'hisens_rds': OFile(name = "Sample24_hisens.rds", hash = "6bfd6c7f29c49ec8ef538dd468a3b4626b05bda2", size = 213986, dir = output_dir), 'hisens_seg': OFile(name = "Sample24_hisens.seg", hash = "652f9c6d0245af49bac6ca67a089af7d4e46801b", size = 1897, dir = output_dir), 'hisens_png': OFile(name = "Sample24_hisens.png", hash = "6af56798d0d8e3b49c26ab9d0adc855c3c8a5a50", size = 168166, dir = output_dir), 'purity_rds': OFile(name = "Sample24_purity.rds", hash = "dd8b967f84b191ff76214c6110db8d0e65f6514c", size = 213356, dir = output_dir), 'purity_png': OFile(name = "Sample24_purity.png", hash = "7db765d900c8a431ab0325098b81eda2cd0780bf", size = 164021, dir = output_dir), 'purity_seg': OFile(name = "Sample24_purity.seg", hash = "591e6d8b432e1e910fe4fb4b1814508131f960c9", size = 1285, dir = output_dir), 'qc_txt': OFile(name = "Sample24.qc.txt", hash = "d4a36726a5fcb7b268aae02d97ce4e382e42d9f6", size = 1339, dir = output_dir), 'output_txt': OFile(name = "Sample24.txt", hash = "4769dc7b8d4b127383e1936c07cdba1e2e09aecb", size = 480, dir = output_dir), "stderr_txt": OFile(name = "facets_stderr.txt", dir = output_dir), "stdout_txt": OFile(name = "facets_stdout.txt", dir = output_dir), } strip_related_keys = [ ('basename', 'facets_stderr.txt', ['size', 'checksum']), ('basename', 'facets_stdout.txt', ['size', 'checksum']), ] self.assertCWLDictEqual(output_json, expected_output, related_keys = strip_related_keys) with open(os.path.join(output_dir, 'Sample24_hisens.seg')) as fin: self.assertEqual(len(fin.readlines()), 37) with open(os.path.join(output_dir, 'Sample24_purity.seg')) as fin: self.assertEqual(len(fin.readlines()), 25)
class TestPasteCol(PlutoTestCase): cwl_file = CWLFile('paste-col.cwl') def test_paste_col_1(self): """ """ # make a dummy file with some lines input_lines = ["HEADER1", "foo1", "bar1"] input_file = os.path.join(self.tmpdir, "input.txt") with open(input_file, "w") as fout: for line in input_lines: fout.write(line + '\n') self.input = { "input_file": { "class": "File", "path": input_file }, "output_filename": "output.txt", "header": "HEADER2", "value": "foo2" } output_json, output_dir = self.run_cwl() expected_output = { 'output_file': OFile(name="output.txt", size=36, hash="34753fd98b2355d54740f3fdfc6490262c15dd59", dir=output_dir) } self.assertCWLDictEqual(output_json, expected_output) output_file = expected_output['output_file']['path'] with open(output_file) as fin: output_lines = [line.strip() for line in fin] expected_lines = ['HEADER1\tHEADER2', 'foo1\tfoo2', 'bar1\tfoo2'] self.assertEqual(output_lines, expected_lines)
class TestRunFacetsLegacyWrapperCWL(PlutoTestCase): cwl_file = CWLFile('run-facets-legacy-wrapper.cwl') def test_run_facets_wrapper(self): """ """ self.input = { "snp_pileup": { "path": os.path.join(DATA_SETS['Proj_08390_G']['SNP_PILEUP_DIR'], "Sample24.Sample23.snp_pileup.gz"), "class": "File" }, "sample_id": "Sample24" } output_json, output_dir = self.run_cwl() expected_output = { 'arm_level_txt': OFile(name = "Sample24.arm_level.txt", hash = "df37c54ae4969257e436a7a7a595c42ef19ecbb5", size = 1824, dir = output_dir), 'failed': False, 'gene_level_txt': OFile(name = "Sample24.gene_level.txt", hash = "4e916a52458151007486bf536acfff539fdc2ecc", size = 148195, dir = output_dir), 'hisens_Rdata': OFile(name = "Sample24_hisens.Rdata", hash = "6cc3fd1fad17111e32c7c88b259a523092539181", size = 214260, dir = output_dir), 'hisens_seg': OFile(name = "Sample24_hisens.seg", hash = "652f9c6d0245af49bac6ca67a089af7d4e46801b", size = 1897, dir = output_dir), 'purity_Rdata': OFile(name = "Sample24_purity.Rdata", hash = "ca2c46ceebbb960a02fb960134ba1711983e71c8", size = 213542, dir = output_dir), 'purity_seg': OFile(name = "Sample24_purity.seg", hash = "591e6d8b432e1e910fe4fb4b1814508131f960c9", size = 1285, dir = output_dir), 'qc_txt': OFile(name = "Sample24.qc.txt", hash = "d4a36726a5fcb7b268aae02d97ce4e382e42d9f6", size = 1339, dir = output_dir), 'hisens_cncf_txt': OFile(name = "Sample24_hisens.cncf.txt", hash = "db9131a33889a1cac82e3bd6b3f0e5e182c65105", size = 5238, dir = output_dir), 'purity_cncf_txt': OFile(name = "Sample24_purity.cncf.txt", hash = "b331530e1e46b5ba1bdcedeb67f2aa82da6ebc5f", size = 3630, dir = output_dir), 'stderr_txt': OFile(name = "facets_legacy_stderr.txt", dir = output_dir), 'stdout_txt': OFile(name = 'facets_legacy_stdout.txt', dir = output_dir), } self.maxDiff = None strip_related_keys = [ ('basename', 'facets_legacy_stdout.txt', ['size', 'checksum']), ('basename', 'facets_legacy_stderr.txt', ['size', 'checksum']), ] self.assertCWLDictEqual(output_json, expected_output, related_keys = strip_related_keys) with open(os.path.join(output_dir, 'Sample24_hisens.cncf.txt')) as fin: self.assertEqual(len(fin.readlines()), 37)
class TestReplace(PlutoTestCase): cwl_file = CWLFile('replace.cwl') def test_replace1(self): """ Test that strings get replaced """ # make a dummy file with some lines input_lines = ["HEADER", "foo", "ILLOGICAL", "baz"] input_file = os.path.join(self.tmpdir, "input.txt") with open(input_file, "w") as fout: for line in input_lines: fout.write(line + '\n') self.input = { "input_file": { "class": "File", "path": input_file }, } output_json, output_dir = self.run_cwl() # check the contents of the concatenated file; should be the same as the input output_file = os.path.join(output_dir, 'output.txt') with open(output_file) as fin: output_lines = [line.strip() for line in fin] expected_lines = ["HEADER", "foo", "NA", "baz"] self.assertEqual(output_lines, expected_lines) expected_output = { 'output_file': OFile(name="output.txt", size=18, hash="62255c8ee13b8ba6e01c7e17262a8ba1f174e5cb", dir=output_dir) } self.assertCWLDictEqual(output_json, expected_output)
class TestReduceSigFigs(PlutoTestCase): cwl_file = CWLFile('reduce_sig_figs.cwl') def test_reduce_sig_figs(self): """ Test that significant figures are reduced correctly """ input_lines = ["seg.mean", "3.141592", "2.718281828"] input_file = os.path.join(self.tmpdir, "input.txt") with open(input_file, "w") as fout: for line in input_lines: fout.write(line + '\n') self.input = {"input_file": {"class": "File", "path": input_file}} output_json, output_dir = self.run_cwl() expected_output = { 'output_file': OFile(name="output.txt", size=26, hash="d9f5ec4a9aa27a69ee64edb97eb10d6db65c7ad7", dir=output_dir) } self.assertCWLDictEqual(output_json, expected_output) # check the contents of the file output_file = expected_output['output_file']['path'] with open(output_file) as fin: reader = csv.DictReader(fin) rows = [row for row in reader] self.assertEqual(len(rows), 2) self.assertDictEqual(rows[0], OrderedDict([('seg.mean', '3.1416')])) self.assertDictEqual(rows[1], OrderedDict([('seg.mean', '2.7183')]))
class TestConsensusBed(PlutoTestCase): cwl_file = CWLFile('consensus_bed.cwl') def setUp(self): super().setUp() self.comments = [ ['# comment 1'], ['# comment 2'] ] self.maf_row1 = OrderedDict([ ('Hugo_Symbol', 'RTEL1'), ('Entrez_Gene_Id', '51750'), ('Center', 'mskcc.org'), ('NCBI_Build', 'GRCh37'), ('Chromosome', '20'), ('Start_Position', '62321135'), ('End_Position', '62321135') ]) self.maf_row2 = OrderedDict([ ('Hugo_Symbol', 'FAM46C'), ('Entrez_Gene_Id', '54855'), ('Center', 'mskcc.org'), ('NCBI_Build', 'GRCh37'), ('Chromosome', '1'), ('Start_Position', '118166398'), ('End_Position', '118166398') ]) self.maf_row3 = OrderedDict([ ('Hugo_Symbol', 'IL7R'), ('Entrez_Gene_Id', '3575'), ('Center', 'mskcc.org'), ('NCBI_Build', 'GRCh37'), ('Chromosome', '5'), ('Start_Position', '35876484'), ('End_Position', '35876484') ]) self.maf_row4 = OrderedDict([ ('Hugo_Symbol', 'KMT2C'), ('Entrez_Gene_Id', '58508'), ('Center', 'mskcc.org'), ('NCBI_Build', 'GRCh37'), ('Chromosome', '7'), ('Start_Position', '151845367'), ('End_Position', '151845367') ]) self.maf_row5 = OrderedDict([ ('Hugo_Symbol', 'MET'), ('Entrez_Gene_Id', '4233'), ('Center', 'mskcc.org'), ('NCBI_Build', 'GRCh37'), ('Chromosome', '7'), ('Start_Position', '116418998'), ('End_Position', '116418998') ]) self.maf_row6 = OrderedDict([ ('Hugo_Symbol', 'MAP2K4'), ('Entrez_Gene_Id', '6416'), ('Center', 'mskcc.org'), ('NCBI_Build', 'GRCh37'), ('Chromosome', '17'), ('Start_Position', '11998935'), ('End_Position', '11998935') ]) maf_rows1 = [ self.maf_row1, self.maf_row2, self.maf_row3, self.maf_row4 ] maf_rows2 = [ self.maf_row1, self.maf_row2, self.maf_row5, self.maf_row6 ] maf_lines1 = [] maf_lines2 = [] for comment in self.comments: maf_lines1.append(comment[0] + '\n') maf_lines2.append(comment[0] + '\n') header = '\t'.join([ k for k in maf_rows1[0].keys() ]) header += '\n' maf_lines1.append(header) maf_lines2.append(header) for row in maf_rows1: values = [ v for v in row.values() ] line = '\t'.join(values) line += '\n' maf_lines1.append(line) for row in maf_rows2: values = [ v for v in row.values() ] line = '\t'.join(values) line += '\n' maf_lines2.append(line) self.maf1 = os.path.join(self.tmpdir, "input1.maf") self.maf2 = os.path.join(self.tmpdir, "input2.maf") with open(self.maf1, "w") as fout: for line in maf_lines1: fout.write(line) with open(self.maf2, "w") as fout: for line in maf_lines2: fout.write(line) def test_consensus_bed_workflow(self): """ """ self.maxDiff = None self.input = { 'maf_files': [ {'class': 'File', 'path': self.maf1}, {'class': 'File', 'path': self.maf2}, ] } output_json, output_dir = self.run_cwl() expected_output = { 'output_file': { 'location': 'file://' + os.path.join(output_dir,'merged.bed'), 'basename': 'merged.bed', 'class': 'File', 'checksum': 'sha1$f831dc91b70c02f10f69da2dae21de57d580b654', 'size': 149, 'path': os.path.join(output_dir,'merged.bed') } } self.assertDictEqual(output_json, expected_output) output_file = output_json['output_file']['path'] with open(output_file) as f: lines = [ l for l in f ] expected_lines = [ '1\t118166398\t118166398\n', '5\t35876484\t35876484\n', '7\t116418998\t116418998\n', '17\t11998935\t11998935\n', '20\t62321135\t62321135\n', '7\t151845367\t151845367\n', '20\t62321135\t62321135\n' ] self.assertDictEqual(lines, expected_lines)
class TestSamplesFilloutIndexBatch(PlutoTestCase): cwl_file = CWLFile('samples_fillout_index_batch_workflow.cwl') def setUp(self): super().setUp() self.maxDiff = None self.runner_args[ 'use_cache'] = False # do not use cache for samples fillout workflow it breaks on split_vcf_to_mafs self.sample1_maf = os.path.join(DATA_SETS['Fillout01']['MAF_DIR'], 'Sample1.FillOutUnitTest01.muts.maf') self.sample2_maf = os.path.join(DATA_SETS['Fillout01']['MAF_DIR'], 'Sample2.FillOutUnitTest01.muts.maf') self.sample3_maf = os.path.join(DATA_SETS['Fillout01']['MAF_DIR'], 'Sample3.FillOutUnitTest01.muts.maf') self.sample4_maf = os.path.join(DATA_SETS['Fillout01']['MAF_DIR'], 'Sample4.FillOutUnitTest01.muts.maf') self.sample5_maf = os.path.join(DATA_SETS['Fillout01']['MAF_DIR'], 'Sample5.FillOutUnitTest01.muts.maf') self.sample1_bam = os.path.join(DATA_SETS['Fillout01']['BAM_DIR'], 'Sample1.UnitTest01.bam') self.sample2_bam = os.path.join(DATA_SETS['Fillout01']['BAM_DIR'], 'Sample2.UnitTest01.bam') self.sample3_bam = os.path.join(DATA_SETS['Fillout01']['BAM_DIR'], 'Sample3.UnitTest01.bam') self.sample4_bam = os.path.join(DATA_SETS['Fillout01']['BAM_DIR'], 'Sample4.UnitTest01.bam') self.sample5_bam = os.path.join(DATA_SETS['Fillout01']['BAM_DIR'], 'Sample5.UnitTest01.bam') def test_one_group(self): """ Test case for running the fillout workflow on a number of samples, each with a bam and maf """ sample_group1 = [ { "sample_id": "Sample1", "normal_id": "FROZENPOOLEDNORMAL_IMPACT505_V2", "sample_type": "research", "prefilter": True, "maf_file": { "class": "File", "path": self.sample1_maf }, "bam_file": { "class": "File", "path": self.sample1_bam } }, { "sample_id": "Sample2", "normal_id": "FROZENPOOLEDNORMAL_IMPACT505_V2", "sample_type": "research", "prefilter": True, "maf_file": { "class": "File", "path": self.sample2_maf }, "bam_file": { "class": "File", "path": self.sample2_bam } }, { "sample_id": "Sample3", "normal_id": "FROZENPOOLEDNORMAL_IMPACT505_V2", "sample_type": "clinical", "prefilter": False, "maf_file": { "class": "File", "path": self.sample3_maf }, "bam_file": { "class": "File", "path": self.sample3_bam } }, { "sample_id": "Sample4", "normal_id": "FROZENPOOLEDNORMAL_IMPACT505_V2", "sample_type": "clinical", "prefilter": False, "maf_file": { "class": "File", "path": self.sample4_maf }, "bam_file": { "class": "File", "path": self.sample4_bam } }, { "sample_id": "Sample5", "normal_id": "FROZENPOOLEDNORMAL_IMPACT505_V2", "sample_type": "research", "prefilter": True, "maf_file": { "class": "File", "path": self.sample5_maf }, "bam_file": { "class": "File", "path": self.sample5_bam } }, ] self.input = { "sample_groups": [sample_group1], "fillout_output_fname": 'output.maf', "ref_fasta": { "class": "File", "path": self.DATA_SETS['Proj_08390_G']['REF_FASTA'] }, } output_json, output_dir = self.run_cwl() output_file = os.path.join(output_dir, 'output.maf') filtered_output_path = os.path.join(output_dir, 'output.filtered.maf') portal_output_path = os.path.join(output_dir, 'data_mutations_extended.txt') uncalled_output_path = os.path.join(output_dir, 'data_mutations_uncalled.txt') expected_output = { 'output_file': OFile(name='output.maf', dir=output_dir), 'filtered_file': OFile(name='output.filtered.maf', dir=output_dir), 'portal_file': OFile(name='data_mutations_extended.txt', dir=output_dir), 'uncalled_file': OFile(name='data_mutations_uncalled.txt', dir=output_dir), } # file contents are inconsistent so strip some keys from the output dict strip_related_keys = [ ('basename', 'output.maf', ['size', 'checksum']), ('basename', 'output.filtered.maf', ['size', 'checksum']), ('basename', 'data_mutations_extended.txt', ['size', 'checksum']), ('basename', 'data_mutations_uncalled.txt', ['size', 'checksum']) ] self.assertCWLDictEqual(output_json, expected_output, related_keys=strip_related_keys) self.assertNumMutationsHash(output_file, 310, '18fafe6dd335cb62f515e0323e6b74b2') self.assertNumMutationsHash(filtered_output_path, 225, '450b97a2b93ed9421c141837f99240ce') self.assertNumMutationsHash(portal_output_path, 159, '52a95dcfaf0b767fe90f4115e11f3b0e') self.assertNumMutationsHash(uncalled_output_path, 66, '790f7faefb7b7c039fd48a8ede1cfe35') self.assertEqualNumMutations( [portal_output_path, uncalled_output_path], filtered_output_path) self.assertMutFieldContains( output_file, "Tumor_Sample_Barcode", ["Sample1", "Sample2", "Sample3", "Sample4", "Sample5"], containsAll=True) def test_two_groups(self): sample_group1 = [ { "sample_id": "Sample1", "normal_id": "FROZENPOOLEDNORMAL_IMPACT505_V2", "sample_type": "research", "prefilter": True, "maf_file": { "class": "File", "path": self.sample1_maf }, "bam_file": { "class": "File", "path": self.sample1_bam } }, { "sample_id": "Sample2", "normal_id": "FROZENPOOLEDNORMAL_IMPACT505_V2", "sample_type": "research", "prefilter": True, "maf_file": { "class": "File", "path": self.sample2_maf }, "bam_file": { "class": "File", "path": self.sample2_bam } }, { "sample_id": "Sample3", "normal_id": "FROZENPOOLEDNORMAL_IMPACT505_V2", "sample_type": "clinical", "prefilter": False, "maf_file": { "class": "File", "path": self.sample3_maf }, "bam_file": { "class": "File", "path": self.sample3_bam } }, ] sample_group2 = [ { "sample_id": "Sample4", "normal_id": "FROZENPOOLEDNORMAL_IMPACT505_V2", "sample_type": "clinical", "prefilter": False, "maf_file": { "class": "File", "path": self.sample4_maf }, "bam_file": { "class": "File", "path": self.sample4_bam } }, { "sample_id": "Sample5", "normal_id": "FROZENPOOLEDNORMAL_IMPACT505_V2", "sample_type": "research", "prefilter": True, "maf_file": { "class": "File", "path": self.sample5_maf }, "bam_file": { "class": "File", "path": self.sample5_bam } }, ] self.input = { "sample_groups": [sample_group1, sample_group2], "fillout_output_fname": 'output.maf', "ref_fasta": { "class": "File", "path": self.DATA_SETS['Proj_08390_G']['REF_FASTA'] }, } output_json, output_dir = self.run_cwl() output_file = os.path.join(output_dir, 'output.maf') filtered_output_path = os.path.join(output_dir, 'output.filtered.maf') portal_output_path = os.path.join(output_dir, 'data_mutations_extended.txt') uncalled_output_path = os.path.join(output_dir, 'data_mutations_uncalled.txt') expected_output = { 'output_file': OFile(name='output.maf', dir=output_dir), 'filtered_file': OFile(name='output.filtered.maf', dir=output_dir), 'portal_file': OFile(name='data_mutations_extended.txt', dir=output_dir), 'uncalled_file': OFile(name='data_mutations_uncalled.txt', dir=output_dir), } # file contents are inconsistent so strip some keys from the output dict strip_related_keys = [ ('basename', 'output.maf', ['size', 'checksum']), ('basename', 'output.filtered.maf', ['size', 'checksum']), ('basename', 'data_mutations_extended.txt', ['size', 'checksum']), ('basename', 'data_mutations_uncalled.txt', ['size', 'checksum']) ] self.assertCWLDictEqual(output_json, expected_output, related_keys=strip_related_keys) self.assertNumMutationsHash(output_file, 235, '4e4c91ef129a853a35b86f7fa6f1268a') self.assertNumMutationsHash(filtered_output_path, 150, '8397a12302977db14e798a1b2e3ba151') self.assertNumMutationsHash(portal_output_path, 120, '9d171233ecd91f3518fee98b5948978d') self.assertNumMutationsHash(uncalled_output_path, 30, 'ae90ff0cc0d0d0ab08029553fdccf381') self.assertEqualNumMutations( [portal_output_path, uncalled_output_path], filtered_output_path) self.assertMutFieldContains( output_file, "Tumor_Sample_Barcode", ["Sample1", "Sample2", "Sample3", "Sample4", "Sample5"], containsAll=True) def test_three_groups(self): """ Three groups, one of which contains a single sample (singleton) """ sample_group1 = [ { "sample_id": "Sample1", "normal_id": "FROZENPOOLEDNORMAL_IMPACT505_V2", "sample_type": "research", "prefilter": True, "maf_file": { "class": "File", "path": self.sample1_maf }, "bam_file": { "class": "File", "path": self.sample1_bam } }, { "sample_id": "Sample2", "normal_id": "FROZENPOOLEDNORMAL_IMPACT505_V2", "sample_type": "research", "prefilter": True, "maf_file": { "class": "File", "path": self.sample2_maf }, "bam_file": { "class": "File", "path": self.sample2_bam } }, ] sample_group2 = [{ "sample_id": "Sample3", "normal_id": "FROZENPOOLEDNORMAL_IMPACT505_V2", "sample_type": "clinical", "prefilter": False, "maf_file": { "class": "File", "path": self.sample3_maf }, "bam_file": { "class": "File", "path": self.sample3_bam } }, { "sample_id": "Sample4", "normal_id": "FROZENPOOLEDNORMAL_IMPACT505_V2", "sample_type": "clinical", "prefilter": False, "maf_file": { "class": "File", "path": self.sample4_maf }, "bam_file": { "class": "File", "path": self.sample4_bam } }] # Singleton sample; no DMP clinical matches sample_group3 = [ { "sample_id": "Sample5", "normal_id": "FROZENPOOLEDNORMAL_IMPACT505_V2", "sample_type": "research", "prefilter": True, "maf_file": { "class": "File", "path": self.sample5_maf }, "bam_file": { "class": "File", "path": self.sample5_bam } }, ] self.input = { "sample_groups": [sample_group1, sample_group2, sample_group3], "fillout_output_fname": 'output.maf', "ref_fasta": { "class": "File", "path": self.DATA_SETS['Proj_08390_G']['REF_FASTA'] }, } output_json, output_dir = self.run_cwl() output_file = os.path.join(output_dir, 'output.maf') filtered_output_path = os.path.join(output_dir, 'output.filtered.maf') portal_output_path = os.path.join(output_dir, 'data_mutations_extended.txt') uncalled_output_path = os.path.join(output_dir, 'data_mutations_uncalled.txt') expected_output = { 'output_file': OFile(name='output.maf', dir=output_dir), 'filtered_file': OFile(name='output.filtered.maf', dir=output_dir), 'portal_file': OFile(name='data_mutations_extended.txt', dir=output_dir), 'uncalled_file': OFile(name='data_mutations_uncalled.txt', dir=output_dir), } # file contents are inconsistent so strip some keys from the output dict strip_related_keys = [ ('basename', 'output.maf', ['size', 'checksum']), ('basename', 'output.filtered.maf', ['size', 'checksum']), ('basename', 'data_mutations_extended.txt', ['size', 'checksum']), ('basename', 'data_mutations_uncalled.txt', ['size', 'checksum']) ] self.assertCWLDictEqual(output_json, expected_output, related_keys=strip_related_keys) self.assertNumMutationsHash(output_file, 188, '2f4c5e5cb13430f456bbc41a0a93dc41') self.assertNumMutationsHash(filtered_output_path, 126, '3dda4952d2ae396079155b4bc8cc276f') self.assertNumMutationsHash(portal_output_path, 108, '37b87cea1d161efda602bef860eabdba') self.assertNumMutationsHash(uncalled_output_path, 18, 'cb601fb73ecf937db024351d69a441f1') self.assertEqualNumMutations( [portal_output_path, uncalled_output_path], filtered_output_path) self.assertMutFieldContains( output_file, "Tumor_Sample_Barcode", ["Sample1", "Sample2", "Sample3", "Sample4", "Sample5"], containsAll=True) def test_four_groups(self): """ Four groups, two of which contains a single sample (singleton) """ sample_group1 = [ { "sample_id": "Sample1", "normal_id": "FROZENPOOLEDNORMAL_IMPACT505_V2", "sample_type": "research", "prefilter": True, "maf_file": { "class": "File", "path": self.sample1_maf }, "bam_file": { "class": "File", "path": self.sample1_bam } }, { "sample_id": "Sample2", "normal_id": "FROZENPOOLEDNORMAL_IMPACT505_V2", "sample_type": "research", "prefilter": True, "maf_file": { "class": "File", "path": self.sample2_maf }, "bam_file": { "class": "File", "path": self.sample2_bam } }, ] sample_group2 = [ { "sample_id": "Sample3", "normal_id": "FROZENPOOLEDNORMAL_IMPACT505_V2", "sample_type": "clinical", "prefilter": False, "maf_file": { "class": "File", "path": self.sample3_maf }, "bam_file": { "class": "File", "path": self.sample3_bam } }, ] sample_group3 = [ { "sample_id": "Sample5", "normal_id": "FROZENPOOLEDNORMAL_IMPACT505_V2", "sample_type": "research", "prefilter": True, "maf_file": { "class": "File", "path": self.sample5_maf }, "bam_file": { "class": "File", "path": self.sample5_bam } }, ] sample_group4 = [{ "sample_id": "Sample4", "normal_id": "FROZENPOOLEDNORMAL_IMPACT505_V2", "sample_type": "clinical", "prefilter": False, "maf_file": { "class": "File", "path": self.sample4_maf }, "bam_file": { "class": "File", "path": self.sample4_bam } }] self.input = { "sample_groups": [sample_group1, sample_group2, sample_group3, sample_group4], "fillout_output_fname": 'output.maf', "ref_fasta": { "class": "File", "path": self.DATA_SETS['Proj_08390_G']['REF_FASTA'] }, } output_json, output_dir = self.run_cwl() output_file = os.path.join(output_dir, 'output.maf') filtered_output_path = os.path.join(output_dir, 'output.filtered.maf') portal_output_path = os.path.join(output_dir, 'data_mutations_extended.txt') uncalled_output_path = os.path.join(output_dir, 'data_mutations_uncalled.txt') expected_output = { 'output_file': OFile(name='output.maf', dir=output_dir), 'filtered_file': OFile(name='output.filtered.maf', dir=output_dir), 'portal_file': OFile(name='data_mutations_extended.txt', dir=output_dir), 'uncalled_file': OFile(name='data_mutations_uncalled.txt', dir=output_dir), } # file contents are inconsistent so strip some keys from the output dict strip_related_keys = [ ('basename', 'output.maf', ['size', 'checksum']), ('basename', 'output.filtered.maf', ['size', 'checksum']), ('basename', 'data_mutations_extended.txt', ['size', 'checksum']), ('basename', 'data_mutations_uncalled.txt', ['size', 'checksum']) ] self.assertCWLDictEqual(output_json, expected_output, related_keys=strip_related_keys) self.assertNumMutationsHash( output_file, 157, 'ed7dcce977a13f360463e45f5a07154b') # , _print = True self.assertNumMutationsHash(filtered_output_path, 36, '5ea9c4b66287a100fc90e05619d52364') self.assertNumMutationsHash(portal_output_path, 36, 'ed7be9c6b425b526e167bdcf8c954637') self.assertNumMutationsHash(uncalled_output_path, 0, 'd751713988987e9331980363e24189ce') self.assertEqualNumMutations( [portal_output_path, uncalled_output_path], filtered_output_path) self.assertMutFieldContains( output_file, "Tumor_Sample_Barcode", ["Sample1", "Sample2", "Sample3", "Sample4", "Sample5"], containsAll=True)
class EnvContainerCWL(Operator): cwl_file = CWLFile('env_container.cwl') def __init__(self, **kwargs): super().__init__(**kwargs) self.input = {}
class TestMsiWorkflow(PlutoTestCase): cwl_file = CWLFile('msi_workflow.cwl') # def setUp(self): # # initialize the tmpdir # super().setUp() def test_msi_workflow_demo1(self): """ Test case for running the MSI workflow on single sample """ normal_bam = os.path.join(self.DATA_SETS['demo']['BAM_DIR'], "Sample2.bam") tumor_bam = os.path.join(self.DATA_SETS['demo']['BAM_DIR'], "Sample1.bam") self.input = { "threads": "16", "microsatellites_file": { "class": "File", "path": MICROSATELLITES_LIST }, "pairs": [{ "pair_id": "Sample1.Sample2", "tumor_id": "Sample1", "normal_id": "Sample2" }], "normal_bam_files": [{ "path": normal_bam, "class": "File" }], "tumor_bam_files": [{ "path": tumor_bam, "class": "File" }] } output_json, output_dir = self.run_cwl() expected_output = { 'pairs': [{ "pair_id": "Sample1.Sample2", "tumor_id": "Sample1", "normal_id": "Sample2", "msi_tsv": OFile(name='Sample1.Sample2.msi.tsv', hash="92576a9be4d6a36c67b26d16fdc4134b0d1b9cd9", size=54, dir=output_dir) }] } self.assertCWLDictEqual(output_json, expected_output) output_file = os.path.join(output_dir, 'Sample1.Sample2.msi.tsv') lines = self.read_table(output_file) expected_lines = [['MSI_SCORE', 'MSI_STATUS', 'SAMPLE_ID'], ['20.90', 'Instable', 'Sample1']] self.assertEqual(lines, expected_lines) # @unittest.skipIf(ENABLE_LARGE_TESTS!=True, "is a large test") def test_msi_workflow1(self): """ Test case for running the MSI workflow on multiple samples """ # data_clinical_file = self.write_table(self.tmpdir, filename = "data_clinical_sample.txt", lines = self.data_clinical_lines) normal_bam = os.path.join(self.DATA_SETS['Proj_08390_G']['BAM_DIR'], "Sample23.rg.md.abra.printreads.bam") tumor_bam = os.path.join(self.DATA_SETS['Proj_08390_G']['BAM_DIR'], "Sample24.rg.md.abra.printreads.bam") normal_bam2 = os.path.join(self.DATA_SETS['Proj_08390_G']['BAM_DIR'], "Sample35.rg.md.abra.printreads.bam") tumor_bam2 = os.path.join(self.DATA_SETS['Proj_08390_G']['BAM_DIR'], "Sample36.rg.md.abra.printreads.bam") self.input = { "microsatellites_file": { "class": "File", "path": MICROSATELLITES_LIST }, "pairs": [{ "pair_id": "Sample1-T.Sample1-N", "tumor_id": "Sample1-T", "normal_id": "Sample1-N" }, { "pair_id": "Sample2-T.Sample2-N", "tumor_id": "Sample2-T", "normal_id": "Sample2-N" }], "normal_bam_files": [{ "path": normal_bam, "class": "File" }, { "path": normal_bam2, "class": "File" }], "tumor_bam_files": [{ "path": tumor_bam, "class": "File" }, { "path": tumor_bam2, "class": "File" }] } output_json, output_dir = self.run_cwl() expected_output = { "pairs": [{ "pair_id": "Sample1-T.Sample1-N", "tumor_id": "Sample1-T", "normal_id": "Sample1-N", "msi_tsv": OFile(name='Sample1-T.Sample1-N.msi.tsv', hash="bc132f6ab9b779d7cba51e7ddfa82af724134f03", size=56, dir=output_dir) }, { "pair_id": "Sample2-T.Sample2-N", "tumor_id": "Sample2-T", "normal_id": "Sample2-N", "msi_tsv": OFile(name='Sample2-T.Sample2-N.msi.tsv', hash="11fcf9459010aa5ea06e62e72155807c9723d45a", size=56, dir=output_dir) }] } self.assertCWLDictEqual(output_json, expected_output) output_file = os.path.join(output_dir, 'Sample2-T.Sample2-N.msi.tsv') lines = self.read_table(output_file) expected_lines = [['MSI_SCORE', 'MSI_STATUS', 'SAMPLE_ID'], ['40.14', 'Instable', 'Sample2-T']] self.assertEqual(lines, expected_lines) output_file = os.path.join(output_dir, 'Sample1-T.Sample1-N.msi.tsv') lines = self.read_table(output_file) expected_lines = [['MSI_SCORE', 'MSI_STATUS', 'SAMPLE_ID'], ['21.97', 'Instable', 'Sample1-T']] self.assertEqual(lines, expected_lines) # @unittest.skipIf(ENABLE_LARGE_TESTS!=True, "is a large test") def test_msi_workflow2(self): """ Test case for running the MSI workflow on single sample """ normal_bam = os.path.join(self.DATA_SETS['Proj_08390_G']['BAM_DIR'], "Sample23.rg.md.abra.printreads.bam") tumor_bam = os.path.join(self.DATA_SETS['Proj_08390_G']['BAM_DIR'], "Sample24.rg.md.abra.printreads.bam") self.input = { "microsatellites_file": { "class": "File", "path": MICROSATELLITES_LIST }, "pairs": [{ "pair_id": "Sample1-T.Sample1-N", "tumor_id": "Sample1-T", "normal_id": "Sample1-N" }], "normal_bam_files": [{ "path": normal_bam, "class": "File" }], "tumor_bam_files": [{ "path": tumor_bam, "class": "File" }] } output_json, output_dir = self.run_cwl() expected_output = { "pairs": [{ "pair_id": "Sample1-T.Sample1-N", "tumor_id": "Sample1-T", "normal_id": "Sample1-N", "msi_tsv": OFile(name='Sample1-T.Sample1-N.msi.tsv', hash="bc132f6ab9b779d7cba51e7ddfa82af724134f03", size=56, dir=output_dir) }] } self.assertCWLDictEqual(output_json, expected_output) output_file = os.path.join(output_dir, 'Sample1-T.Sample1-N.msi.tsv') lines = self.read_table(output_file) expected_lines = [['MSI_SCORE', 'MSI_STATUS', 'SAMPLE_ID'], ['21.97', 'Instable', 'Sample1-T']] self.assertEqual(lines, expected_lines)
class TestConcatTablesDir(PlutoTestCase): cwl_file = CWLFile('concat-tables_dir.cwl') def test_concat_two_tables(self): """ Test that two files are concatenated correctly """ self.skipTest("Assertion fails for output") # make a dummy file with some lines input_lines1 = ["HEADER1", "foo1", "bar1"] input_file1 = os.path.join(self.tmpdir, "input1.txt") with open(input_file1, "w") as fout: for line in input_lines1: fout.write(line + '\n') input_lines2 = ["HEADER2", "foo2", "bar2"] input_file2 = os.path.join(self.tmpdir, "input2.txt") with open(input_file2, "w") as fout: for line in input_lines2: fout.write(line + '\n') self.input = { "input_files": [{ "class": "File", "path": input_file1 }, { "class": "File", "path": input_file2 } ], "output_filename": "output.txt" } output_json, output_dir = self.run_cwl() output_path = os.path.join(output_dir, 'output.txt') # check the contents of the concatenated file; should be the same as the input output_file = output_path with open(output_file) as fin: output_lines = [ line.strip() for line in fin ] expected_lines = ['HEADER1\tHEADER2', 'foo1\tNA', 'bar1\tNA', 'NA\tfoo2', 'NA\tbar2'] self.assertEqual(output_lines, expected_lines) expected_output = { 'output_file': { 'location': 'file://' + output_path, 'basename': 'output.txt', 'class': 'File', 'checksum': 'sha1$d92a4e707cb5dad2ec557edfe976680dfffc5f3f', 'size': 53, 'path': output_path } } self.assertCWLDictEqual(output_json, expected_output) def test_concat_one_tables(self): """ Test that one file is returned correctly from the script """ # make a dummy file with some lines input_lines1 = ["HEADER1", "foo1", "bar1"] input_file1 = os.path.join(self.tmpdir, "input1.txt") with open(input_file1, "w") as fout: for line in input_lines1: fout.write(line + '\n') self.input = { "input_files": [{ "class": "File", "path": input_file1 }, ], "output_filename": "output.txt" } output_json, output_dir = self.run_cwl() output_path = os.path.join(output_dir, 'output.txt') # check the contents of the concatenated file; should be the same as the input output_file = output_path with open(output_file) as fin: output_lines = [ line.strip() for line in fin ] expected_lines = ['HEADER1', 'foo1', 'bar1'] self.assertEqual(output_lines, expected_lines) expected_output = { 'output_file': { 'location': 'file://' + output_path, 'basename': 'output.txt', 'class': 'File', 'checksum': 'sha1$2274c54c24a98e8235e34d78b700d04cb95f48dd', 'size': 21, 'path': output_path } } self.assertCWLDictEqual(output_json, expected_output)
class TestMafFilter(PlutoTestCase): cwl_file = CWLFile('maf_filter.cwl') def test_filter_a_maf_file(self): """ Test that a filtered maf file comes out as expected """ input_maf = os.path.join(DATA_SETS['Proj_08390_G']['MAF_DIR'], "Sample1.Sample2.muts.maf") self.assertNumMutations(input_maf, 12514) self.input = { "maf_file": { "class": "File", "path": input_maf }, "argos_version_string": ARGOS_VERSION_STRING, "is_impact": True, "analysis_mutations_filename": "Proj_08390_G.muts.maf", "cbio_mutation_data_filename": 'data_mutations_extended.txt' } output_json, output_dir = self.run_cwl() expected_output = { 'analysis_mutations_file': OFile(name='Proj_08390_G.muts.maf', size=28079, hash='24421ab8d1a39a71f48eecbb0dd167d5d9f5c529', dir=output_dir), 'cbio_mutation_data_file': OFile(name='data_mutations_extended.txt', size=4534, hash='6131494536ce956d741c820378e7e2ce1c714403', dir=output_dir), 'rejected_file': OFile(name='rejected.muts.maf', size=18627626, hash='a06789623715703c5006db6876ecb58b8498f938', dir=output_dir), } self.assertCWLDictEqual(output_json, expected_output) self.assertNumMutations( os.path.join(output_dir, "Proj_08390_G.muts.maf"), 22) # validate output mutation file contents self.assertCompareMutFiles( os.path.join(output_dir, "Proj_08390_G.muts.maf"), os.path.join(DATA_SETS['Proj_08390_G']['MAF_FILTER_DIR'], "Sample1", "analyst_file.txt"), muts_only=True, compare_len=True) self.assertCompareMutFiles( os.path.join(output_dir, 'data_mutations_extended.txt'), os.path.join(DATA_SETS['Proj_08390_G']['MAF_FILTER_DIR'], "Sample1", "portal_file.txt"), muts_only=True, compare_len=True) def test_maf_filter_argos_3_2_0(self): """ Test the maf filter script results when used with argos_version_string 3.2.0 """ input_maf = os.path.join(DATA_SETS['Proj_08390_G']['MAF_DIR'], "Sample1.Sample2.muts.maf") self.assertNumMutations(input_maf, 12514) self.input = { "maf_file": { "class": "File", "path": input_maf }, "argos_version_string": "3.2.0", "is_impact": True, "analysis_mutations_filename": "Proj_08390_G.muts.maf", "cbio_mutation_data_filename": 'data_mutations_extended.txt' } output_json, output_dir = self.run_cwl() expected_output = { 'analysis_mutations_file': OFile(name='Proj_08390_G.muts.maf', size=28081, hash='fd78842c9410e7e622dee270ec9c0e7628811f18', dir=output_dir), 'cbio_mutation_data_file': OFile(name='data_mutations_extended.txt', size=4536, hash='47e716eabbfda3408b2d9a08b9bb432b2cb8fce8', dir=output_dir), 'rejected_file': OFile(name='rejected.muts.maf', size=18627626, hash='a06789623715703c5006db6876ecb58b8498f938', dir=output_dir) } self.assertCWLDictEqual(output_json, expected_output) self.assertNumMutations( expected_output['analysis_mutations_file']['path'], 22) # validate output mutation file contents self.assertCompareMutFiles( expected_output['analysis_mutations_file']['path'], os.path.join(DATA_SETS['Proj_08390_G']['MAF_FILTER_DIR'], "Sample1", "analyst_file.txt"), muts_only=True, compare_len=True) self.assertCompareMutFiles( expected_output['cbio_mutation_data_file']['path'], os.path.join(DATA_SETS['Proj_08390_G']['MAF_FILTER_DIR'], "Sample1", "portal_file.txt"), muts_only=True, compare_len=True) def test_filter_maf_file_impact_false(self): """ Test that a filtered maf file comes out as expected """ self.maxDiff = None input_maf = os.path.join(DATA_SETS['Proj_08390_G']['MAF_DIR'], "Sample1.Sample2.muts.maf") self.assertNumMutations(input_maf, 12514) self.input = { "maf_file": { "class": "File", "path": input_maf }, "argos_version_string": ARGOS_VERSION_STRING, "is_impact": False, "analysis_mutations_filename": "Proj_08390_G.muts.maf", "cbio_mutation_data_filename": 'data_mutations_extended.txt' } output_json, output_dir = self.run_cwl() expected_output = { 'analysis_mutations_file': OFile(name='Proj_08390_G.muts.maf', size=24524, hash='9fb9d43c71e546750ddec6aea2313dda28547b3a', dir=output_dir), 'cbio_mutation_data_file': OFile(name='data_mutations_extended.txt', size=3931, hash='15ca06249511c32c32e058c246a757ec8df11d83', dir=output_dir), 'rejected_file': OFile(name='rejected.muts.maf', size=18790398, hash='e7441703699e82cef500d9557bfcbd3464ce8eab', dir=output_dir) } self.assertCWLDictEqual(output_json, expected_output) self.assertNumMutations( expected_output['analysis_mutations_file']['path'], 18) def test_large_maf_file(self): """ Test that a giant maf file with tons of variants gets filtered as expected """ input_maf = os.path.join(DATA_SETS['Proj_08390_G']['MAF_FILTER_DIR'], "Proj_08390_G", "Proj_08390_G.muts.maf") self.input = { "maf_file": { "class": "File", "path": input_maf }, "argos_version_string": "2.x", "is_impact": True, "analysis_mutations_filename": "Proj_08390_G.muts.maf", "cbio_mutation_data_filename": 'data_mutations_extended.txt' } output_json, output_dir = self.run_cwl() expected_output = { 'analysis_mutations_file': OFile(name='Proj_08390_G.muts.maf', size=2386906, hash='4ef341ab4280140f9be15e65a0258a4170ff651d', dir=output_dir), 'cbio_mutation_data_file': OFile(name='data_mutations_extended.txt', size=278458, hash='af36cf815820fdf41f1401578138b5cbd551a217', dir=output_dir), 'rejected_file': OFile(name='rejected.muts.maf', size=1047796463, hash='345953da2c7cb801fa08368260469cf7c153055f', dir=output_dir) } self.assertCWLDictEqual(output_json, expected_output) self.assertNumMutations( expected_output['analysis_mutations_file']['path'], 1662) self.assertNumMutations( expected_output['cbio_mutation_data_file']['path'], 1139) # validate output mutation file contents self.assertCompareMutFiles( expected_output['analysis_mutations_file']['path'], os.path.join(DATA_SETS['Proj_08390_G']['MAF_FILTER_DIR'], "Proj_08390_G", "analyst_file.txt"), muts_only=True, compare_len=True) self.assertCompareMutFiles( expected_output['cbio_mutation_data_file']['path'], os.path.join(DATA_SETS['Proj_08390_G']['MAF_FILTER_DIR'], "Proj_08390_G", "portal_file.txt"), muts_only=True, compare_len=True)
class TestSamplesFillout(PlutoTestCase): cwl_file = CWLFile('samples_fillout_workflow.cwl') def test_Nick_testcase(self): """ Test case using Nick's custom made maf and bam files for fillout testing This test cases uses the germline filter to exclude some mutations in the output Takes about 10min to run """ self.maxDiff = None self.runner_args[ 'use_cache'] = False # do not use cache because it breaks for some reason self.runner_args['debug'] = True self.runner_args['js_console'] = True sample1_maf = os.path.join(DATA_SETS['Fillout01']['MAF_DIR'], 'Sample1.FillOutUnitTest01.muts.maf') sample2_maf = os.path.join(DATA_SETS['Fillout01']['MAF_DIR'], 'Sample2.FillOutUnitTest01.muts.maf') sample3_maf = os.path.join(DATA_SETS['Fillout01']['MAF_DIR'], 'Sample3.FillOutUnitTest01.muts.maf') sample4_maf = os.path.join(DATA_SETS['Fillout01']['MAF_DIR'], 'Sample4.FillOutUnitTest01.muts.maf') sample5_maf = os.path.join(DATA_SETS['Fillout01']['MAF_DIR'], 'Sample5.FillOutUnitTest01.muts.maf') sample1_bam = os.path.join(DATA_SETS['Fillout01']['BAM_DIR'], 'Sample1.UnitTest01.bam') sample2_bam = os.path.join(DATA_SETS['Fillout01']['BAM_DIR'], 'Sample2.UnitTest01.bam') sample3_bam = os.path.join(DATA_SETS['Fillout01']['BAM_DIR'], 'Sample3.UnitTest01.bam') sample4_bam = os.path.join(DATA_SETS['Fillout01']['BAM_DIR'], 'Sample4.UnitTest01.bam') sample5_bam = os.path.join(DATA_SETS['Fillout01']['BAM_DIR'], 'Sample5.UnitTest01.bam') self.input = { "samples": [ { "sample_id": "Sample1", "normal_id": "FROZENPOOLEDNORMAL_IMPACT505_V2", "sample_type": "research", "maf_file": { "class": "File", "path": sample1_maf }, "bam_file": { "class": "File", "path": sample1_bam } }, { "sample_id": "Sample2", "normal_id": "FROZENPOOLEDNORMAL_IMPACT505_V2", "sample_type": "research", "maf_file": { "class": "File", "path": sample2_maf }, "bam_file": { "class": "File", "path": sample2_bam } }, { "sample_id": "Sample3", "normal_id": "FROZENPOOLEDNORMAL_IMPACT505_V2", "sample_type": "clinical", "maf_file": { "class": "File", "path": sample3_maf }, "bam_file": { "class": "File", "path": sample3_bam } }, { "sample_id": "Sample4", "normal_id": "FROZENPOOLEDNORMAL_IMPACT505_V2", "sample_type": "clinical", "maf_file": { "class": "File", "path": sample4_maf }, "bam_file": { "class": "File", "path": sample4_bam } }, { "sample_id": "Sample5", "normal_id": "FROZENPOOLEDNORMAL_IMPACT505_V2", "sample_type": "research", "maf_file": { "class": "File", "path": sample5_maf }, "bam_file": { "class": "File", "path": sample5_bam } }, ], "ref_fasta": { "class": "File", "path": self.DATA_SETS['Proj_08390_G']['REF_FASTA'] } } output_json, output_dir = self.run_cwl() output_path = os.path.join(output_dir, 'output.maf') filtered_output_path = os.path.join(output_dir, 'output.filtered.maf') portal_output_path = os.path.join(output_dir, 'data_mutations_extended.txt') uncalled_output_path = os.path.join(output_dir, 'data_mutations_uncalled.txt') expected_output = { 'output_file': OFile(name='output.maf', dir=output_dir), 'filtered_file': OFile(name='output.filtered.maf', dir=output_dir), 'portal_file': OFile(name='data_mutations_extended.txt', dir=output_dir), 'uncalled_file': OFile(name='data_mutations_uncalled.txt', dir=output_dir), } # file contenst are inconsistent so strip some keys from the output dict strip_related_keys = [ ('basename', 'output.maf', ['size', 'checksum']), ('basename', 'output.filtered.maf', ['size', 'checksum']), ('basename', 'data_mutations_extended.txt', ['size', 'checksum']), ('basename', 'data_mutations_uncalled.txt', ['size', 'checksum']) ] self.assertCWLDictEqual(output_json, expected_output, related_keys=strip_related_keys) # all_effects field is variable and changes bytes and checksum # need to check number of variant outputs instead self.assertNumMutationsHash(output_path, 475, 'd041bc641d85761b60c6b7ef8606bab2') self.assertNumMutationsHash(filtered_output_path, 230, 'c9cde01507d1b2470057c5d120eaab68') self.assertNumMutationsHash(portal_output_path, 163, '8dd6f3af030a2eca3b5fa0698896361a') self.assertNumMutationsHash(uncalled_output_path, 67, 'a474b61268d2a4c25fd27cc2ccbbce96') self.assertEqualNumMutations( [portal_output_path, uncalled_output_path], filtered_output_path) def test_Nick_testcase_2(self): """ Test case using Nick's custom made maf and bam files for fillout testing This test cases uses the germline filter to exclude some mutations in the output This test case uses only research samples Takes about 10min to run """ self.maxDiff = None self.runner_args[ 'use_cache'] = False # do not use cache because it breaks for some reason self.runner_args['debug'] = True self.runner_args['js_console'] = True sample1_maf = os.path.join(DATA_SETS['Fillout01']['MAF_DIR'], 'Sample1.FillOutUnitTest01.muts.maf') sample2_maf = os.path.join(DATA_SETS['Fillout01']['MAF_DIR'], 'Sample2.FillOutUnitTest01.muts.maf') sample3_maf = os.path.join(DATA_SETS['Fillout01']['MAF_DIR'], 'Sample3.FillOutUnitTest01.muts.maf') sample4_maf = os.path.join(DATA_SETS['Fillout01']['MAF_DIR'], 'Sample4.FillOutUnitTest01.muts.maf') sample5_maf = os.path.join(DATA_SETS['Fillout01']['MAF_DIR'], 'Sample5.FillOutUnitTest01.muts.maf') sample1_bam = os.path.join(DATA_SETS['Fillout01']['BAM_DIR'], 'Sample1.UnitTest01.bam') sample2_bam = os.path.join(DATA_SETS['Fillout01']['BAM_DIR'], 'Sample2.UnitTest01.bam') sample3_bam = os.path.join(DATA_SETS['Fillout01']['BAM_DIR'], 'Sample3.UnitTest01.bam') sample4_bam = os.path.join(DATA_SETS['Fillout01']['BAM_DIR'], 'Sample4.UnitTest01.bam') sample5_bam = os.path.join(DATA_SETS['Fillout01']['BAM_DIR'], 'Sample5.UnitTest01.bam') self.input = { "samples": [ { "sample_id": "Sample1", "normal_id": "FROZENPOOLEDNORMAL_IMPACT505_V2", "sample_type": "research", "maf_file": { "class": "File", "path": sample1_maf }, "bam_file": { "class": "File", "path": sample1_bam } }, { "sample_id": "Sample2", "normal_id": "FROZENPOOLEDNORMAL_IMPACT505_V2", "sample_type": "research", "maf_file": { "class": "File", "path": sample2_maf }, "bam_file": { "class": "File", "path": sample2_bam } }, { "sample_id": "Sample3", "normal_id": "FROZENPOOLEDNORMAL_IMPACT505_V2", "sample_type": "research", "maf_file": { "class": "File", "path": sample3_maf }, "bam_file": { "class": "File", "path": sample3_bam } }, { "sample_id": "Sample4", "normal_id": "FROZENPOOLEDNORMAL_IMPACT505_V2", "sample_type": "research", "maf_file": { "class": "File", "path": sample4_maf }, "bam_file": { "class": "File", "path": sample4_bam } }, { "sample_id": "Sample5", "normal_id": "FROZENPOOLEDNORMAL_IMPACT505_V2", "sample_type": "research", "maf_file": { "class": "File", "path": sample5_maf }, "bam_file": { "class": "File", "path": sample5_bam } }, ], "ref_fasta": { "class": "File", "path": self.DATA_SETS['Proj_08390_G']['REF_FASTA'] } } output_json, output_dir = self.run_cwl() output_path = os.path.join(output_dir, 'output.maf') filtered_output_path = os.path.join(output_dir, 'output.filtered.maf') portal_output_path = os.path.join(output_dir, 'data_mutations_extended.txt') uncalled_output_path = os.path.join(output_dir, 'data_mutations_uncalled.txt') expected_output = { 'output_file': OFile(name='output.maf', dir=output_dir), 'filtered_file': OFile(name='output.filtered.maf', dir=output_dir), 'portal_file': OFile(name='data_mutations_extended.txt', dir=output_dir), 'uncalled_file': OFile(name='data_mutations_uncalled.txt', dir=output_dir), } # file contenst are inconsistent so strip some keys from the output dict strip_related_keys = [ ('basename', 'output.maf', ['size', 'checksum']), ('basename', 'output.filtered.maf', ['size', 'checksum']), ('basename', 'data_mutations_extended.txt', ['size', 'checksum']), ('basename', 'data_mutations_uncalled.txt', ['size', 'checksum']) ] self.assertCWLDictEqual(output_json, expected_output, related_keys=strip_related_keys) # all_effects field is variable and changes bytes and checksum # need to check number of variant outputs instead self.assertNumMutationsHash(output_path, 475, 'd041bc641d85761b60c6b7ef8606bab2') self.assertNumMutationsHash(filtered_output_path, 475, 'd041bc641d85761b60c6b7ef8606bab2') self.assertNumMutationsHash(portal_output_path, 408, '63969ef90cb7a4524ab9063b4889bbde') self.assertNumMutationsHash(uncalled_output_path, 67, 'a474b61268d2a4c25fd27cc2ccbbce96') self.assertEqualNumMutations( [portal_output_path, uncalled_output_path], filtered_output_path)
unit tests for the snp-pileup-wrapper.cwl file """ import os import sys import json import unittest from tempfile import TemporaryDirectory THIS_DIR = os.path.dirname(os.path.realpath(__file__)) PARENT_DIR = os.path.dirname(THIS_DIR) sys.path.insert(0, PARENT_DIR) from pluto.tools import run_command, CWLFile from pluto.settings import CWL_ARGS sys.path.pop(0) cwl_file = CWLFile('concat_with_comments.cwl') class TestConcatWithCommentsCWL(unittest.TestCase): def test_concat_0(self): """ Test concat when no comments are present in the original file """ with TemporaryDirectory() as tmpdir: # make a dummy file with some lines input_lines = ["HEADER", "foo", "bar", "baz"] input_file = os.path.join(tmpdir, "input.txt") with open(input_file, "w") as fout: for line in input_lines: fout.write(line + '\n') input_json = {
class TestAddHeader(PlutoTestCase): cwl_file = CWLFile('add_header.cwl') def test_add_header(self): """ Test case for adding a header to a file """ self.maxDiff = None input_file = os.path.join(self.tmpdir, "input.txt") with open(input_file, "w") as f: f.write("foo") header_str = "HEADER" self.input = { "input_file": { "class": "File", "path": input_file }, "header_str": header_str, } output_json, output_dir = self.run_cwl() expected_output = { 'output_file': { 'location': 'file://' + os.path.join(output_dir, 'output.txt'), 'basename': 'output.txt', 'class': 'File', 'checksum': 'sha1$01838a0977d542fb12680e271393e1d4baaefa8f', 'size': 10, 'path': os.path.join(output_dir, 'output.txt') } } self.assertDictEqual(output_json, expected_output) output_file = expected_output['output_file']['path'] with open(output_file) as f: lines = [l.strip() for l in f] expected_lines = ['HEADER', 'foo'] self.assertEqual(lines, expected_lines) def test_add_header_empty_file(self): """ Test case for adding a header to an empty file should return only the header """ input_file = os.path.join(self.tmpdir, "input.txt") with open(input_file, "w") as f: pass header_str = "HEADER" self.input = { "input_file": { "class": "File", "path": input_file }, "header_str": header_str, } output_json, output_dir = self.run_cwl() expected_output = { 'output_file': { 'location': 'file://' + os.path.join(output_dir, 'output.txt'), 'basename': 'output.txt', 'class': 'File', 'checksum': 'sha1$b4cf58442d6321c81db6bab562806e14bf54bf72', 'size': 7, 'path': os.path.join(output_dir, 'output.txt') } } self.assertDictEqual(output_json, expected_output) output_file = expected_output['output_file']['path'] with open(output_file) as f: lines = [l.strip() for l in f] expected_lines = ['HEADER'] self.assertEqual(lines, expected_lines)
class TestConcatWithCommentsCWL(PlutoTestCase): cwl_file = CWLFile('concat_with_comments.cwl') def test_concat_0(self): """ Test concat when no comments are present in the original file """ # make a dummy file with some lines input_lines = ["HEADER", "foo", "bar", "baz"] input_file = os.path.join(self.tmpdir, "input.txt") with open(input_file, "w") as fout: for line in input_lines: fout.write(line + '\n') self.input = { "input_files": [{ "class": "File", "path": input_file }], "comment_label": "comment_label", "comment_value": "comment_value" } output_json, output_dir = self.run_cwl() expected_output = { 'output_file': OFile(name='output.txt', size=49, hash='7cef8f6de47289a55de99de77563beb3fa371deb', dir=output_dir) } self.assertCWLDictEqual(output_json, expected_output) # check the contents of the concatenated file; should be the same as the input self.assertFileLinesEqual( expected_output['output_file']['path'], ['#comment_label: comment_value', "HEADER", 'foo', 'bar', 'baz']) def test_concat1(self): """ Test concat when original file has a comment line """ # make a dummy file with some lines input_lines = ["# comment here", "HEADER", "foo", "bar", "baz"] input_file = os.path.join(self.tmpdir, "input.txt") with open(input_file, "w") as fout: for line in input_lines: fout.write(line + '\n') self.input = { "input_files": [{ "class": "File", "path": input_file }], "comment_label": "comment_label", "comment_value": "comment_value" } output_json, output_dir = self.run_cwl() expected_output = { 'output_file': OFile(name='output.txt', size=64, hash='14ee1247f314dba1e3c28aa8aec9ff7b137a1f41', dir=output_dir) } self.assertCWLDictEqual(output_json, expected_output) # check the contents of the concatenated file; should be the same as the input self.assertFileLinesEqual(expected_output['output_file']['path'], [ '# comment here', '#comment_label: comment_value', "HEADER", 'foo', 'bar', 'baz' ]) def test_concat2(self): """ Test concat when multiple files have comments """ # make a dummy file with some lines input_lines1 = ["# comment 1 here", "HEADER", "foo1", "bar1"] input_file1 = os.path.join(self.tmpdir, "input1.txt") with open(input_file1, "w") as fout: for line in input_lines1: fout.write(line + '\n') input_lines2 = ["# comment 2 here", "HEADER", "foo2", "bar2"] input_file2 = os.path.join(self.tmpdir, "input2.txt") with open(input_file2, "w") as fout: for line in input_lines2: fout.write(line + '\n') self.input = { "input_files": [ { "class": "File", "path": input_file1 }, { "class": "File", "path": input_file2 }, ], "comment_label": "comment_label", "comment_value": "comment_value" } output_json, output_dir = self.run_cwl() expected_output = { 'output_file': OFile(name='output.txt', size=91, hash='5dbce16f9bfef135d6b8288b16350351a33998f3', dir=output_dir) } self.assertCWLDictEqual(output_json, expected_output) self.assertFileLinesEqual(expected_output['output_file']['path'], [ '# comment 1 here', '# comment 2 here', '#comment_label: comment_value', "HEADER", 'foo1', 'bar1', 'foo2', 'bar2' ])
unit tests for the concat.cwl """ import os import sys import json import unittest from tempfile import TemporaryDirectory THIS_DIR = os.path.dirname(os.path.realpath(__file__)) PARENT_DIR = os.path.dirname(THIS_DIR) sys.path.insert(0, PARENT_DIR) from pluto.tools import run_command, CWLFile from pluto.settings import CWL_ARGS sys.path.pop(0) cwl_file = CWLFile('concat.cwl') class TestConcat(unittest.TestCase): def test_concat_simple_file(self): """ Test that a single file with no header comes out looking as expected """ with TemporaryDirectory() as tmpdir: # make a dummy file with some lines input_lines = ["foo", "bar", "baz"] input_file = os.path.join(tmpdir, "input.txt") with open(input_file, "w") as fout: for line in input_lines: fout.write(line + '\n')