Beispiel #1
0
    def test_run_worflow_one_maf(self):
        """
        Test that the workflow works correctly when run with a single maf
        """
        self.input = {
            "is_impact": True,
            "argos_version_string": "2.x",
            "analysis_gene_cna_filename": "Proj_08390_G.gene.cna.txt",
            "analysis_mutations_filename": "Proj_08390_G.muts.maf",
            "analysis_mutations_share_filename": "Proj_08390_G.muts.share.maf",
            "analysis_segment_cna_filename": "Proj_08390_G.seg.cna.txt",
            "analysis_sv_filename": "Proj_08390_G.svs.maf",
            "helix_filter_version": "20.06.1",
            "IMPACT_gene_list": {
                  "class": "File",
                  "path": IMPACT_FILE
                },
            "targets_list": {
                "path": DATA_SETS['Proj_08390_G']["targets_list"],
                "class": "File"
            },
            "mutation_maf_files": [
                {
                    "path": os.path.join(DATA_SETS['Proj_08390_G']['MAF_DIR'], "Sample1.Sample2.muts.maf"),
                    "class": "File"
                }
            ],
            "mutation_svs_maf_files": [
                {
                    "path": os.path.join(DATA_SETS['Proj_08390_G']['MAF_DIR'], "Sample1.Sample2.svs.pass.vep.maf"),
                    "class": "File"
                }
            ],
            "facets_hisens_cncf_files": [
                {
                    "path": os.path.join(DATA_SETS['Proj_08390_G']['FACETS_DIR'], "Sample2.rg.md.abra.printreads__Sample1.rg.md.abra.printreads_hisens.cncf.txt"),
                    "class": "File"
                }
            ],
            "facets_hisens_seg_files": [
                {
                    "path": os.path.join(DATA_SETS['Proj_08390_G']['FACETS_DIR'], "Sample2.rg.md.abra.printreads__Sample1.rg.md.abra.printreads_hisens.seg"),
                "class": "File"
                }
            ],
        }

        output_json, output_dir = self.run_cwl()

        expected_output = {
            'analysis_dir': ODir(name='analysis', items=[
                OFile(name='Proj_08390_G.gene.cna.txt', size=87905, hash='7cc89d24556de93b9a409812317581e67e5df494'),
                OFile(name='Proj_08390_G.muts.maf', size=33243, hash='2c8904927a917d6e935ef207582d995680574d16'),
                OFile(name='Proj_08390_G.muts.share.maf', size=7462, hash='b5af4e0fcd89fecabf8095aa3d7690e5edb8dca1'),
                OFile(name='Proj_08390_G.seg.cna.txt', size=1632, hash='f0ebb82c34b6530447fa1e70b6dedcc039840d61'),
                OFile(name='Proj_08390_G.svs.maf', size=23603, hash='df420706bb5b772a79317843c0a01a3c88a9571d')], dir=output_dir)
            }
        self.assertCWLDictEqual(output_json, expected_output)
        self.assertNumMutations(os.path.join(output_dir, 'analysis', 'Proj_08390_G.muts.maf'), 22)
Beispiel #2
0
    def test_large_maf_file(self):
        """
        Test that a giant maf file with tons of variants gets filtered as expected
        """
        input_maf = os.path.join(DATA_SETS['Proj_08390_G']['MAF_FILTER_DIR'],
                                 "Proj_08390_G", "Proj_08390_G.muts.maf")

        self.input = {
            "maf_file": {
                "class": "File",
                "path": input_maf
            },
            "argos_version_string": "2.x",
            "is_impact": True,
            "analysis_mutations_filename": "Proj_08390_G.muts.maf",
            "cbio_mutation_data_filename": 'data_mutations_extended.txt'
        }

        output_json, output_dir = self.run_cwl()

        expected_output = {
            'analysis_mutations_file':
            OFile(name='Proj_08390_G.muts.maf',
                  size=2386906,
                  hash='4ef341ab4280140f9be15e65a0258a4170ff651d',
                  dir=output_dir),
            'cbio_mutation_data_file':
            OFile(name='data_mutations_extended.txt',
                  size=278458,
                  hash='af36cf815820fdf41f1401578138b5cbd551a217',
                  dir=output_dir),
            'rejected_file':
            OFile(name='rejected.muts.maf',
                  size=1047796463,
                  hash='345953da2c7cb801fa08368260469cf7c153055f',
                  dir=output_dir)
        }
        self.assertCWLDictEqual(output_json, expected_output)

        self.assertNumMutations(
            expected_output['analysis_mutations_file']['path'], 1662)
        self.assertNumMutations(
            expected_output['cbio_mutation_data_file']['path'], 1139)

        # validate output mutation file contents
        self.assertCompareMutFiles(
            expected_output['analysis_mutations_file']['path'],
            os.path.join(DATA_SETS['Proj_08390_G']['MAF_FILTER_DIR'],
                         "Proj_08390_G", "analyst_file.txt"),
            muts_only=True,
            compare_len=True)

        self.assertCompareMutFiles(
            expected_output['cbio_mutation_data_file']['path'],
            os.path.join(DATA_SETS['Proj_08390_G']['MAF_FILTER_DIR'],
                         "Proj_08390_G", "portal_file.txt"),
            muts_only=True,
            compare_len=True)
Beispiel #3
0
    def test_filter_a_maf_file(self):
        """
        Test that a filtered maf file comes out as expected
        """
        input_maf = os.path.join(DATA_SETS['Proj_08390_G']['MAF_DIR'],
                                 "Sample1.Sample2.muts.maf")
        self.assertNumMutations(input_maf, 12514)

        self.input = {
            "maf_file": {
                "class": "File",
                "path": input_maf
            },
            "argos_version_string": ARGOS_VERSION_STRING,
            "is_impact": True,
            "analysis_mutations_filename": "Proj_08390_G.muts.maf",
            "cbio_mutation_data_filename": 'data_mutations_extended.txt'
        }

        output_json, output_dir = self.run_cwl()

        expected_output = {
            'analysis_mutations_file':
            OFile(name='Proj_08390_G.muts.maf',
                  size=28079,
                  hash='24421ab8d1a39a71f48eecbb0dd167d5d9f5c529',
                  dir=output_dir),
            'cbio_mutation_data_file':
            OFile(name='data_mutations_extended.txt',
                  size=4534,
                  hash='6131494536ce956d741c820378e7e2ce1c714403',
                  dir=output_dir),
            'rejected_file':
            OFile(name='rejected.muts.maf',
                  size=18627626,
                  hash='a06789623715703c5006db6876ecb58b8498f938',
                  dir=output_dir),
        }
        self.assertCWLDictEqual(output_json, expected_output)

        self.assertNumMutations(
            os.path.join(output_dir, "Proj_08390_G.muts.maf"), 22)

        # validate output mutation file contents
        self.assertCompareMutFiles(
            os.path.join(output_dir, "Proj_08390_G.muts.maf"),
            os.path.join(DATA_SETS['Proj_08390_G']['MAF_FILTER_DIR'],
                         "Sample1", "analyst_file.txt"),
            muts_only=True,
            compare_len=True)

        self.assertCompareMutFiles(
            os.path.join(output_dir, 'data_mutations_extended.txt'),
            os.path.join(DATA_SETS['Proj_08390_G']['MAF_FILTER_DIR'],
                         "Sample1", "portal_file.txt"),
            muts_only=True,
            compare_len=True)
Beispiel #4
0
    def test_maf_filter_argos_3_2_0(self):
        """
        Test the maf filter script results when used with argos_version_string 3.2.0
        """
        input_maf = os.path.join(DATA_SETS['Proj_08390_G']['MAF_DIR'],
                                 "Sample1.Sample2.muts.maf")
        self.assertNumMutations(input_maf, 12514)

        self.input = {
            "maf_file": {
                "class": "File",
                "path": input_maf
            },
            "argos_version_string": "3.2.0",
            "is_impact": True,
            "analysis_mutations_filename": "Proj_08390_G.muts.maf",
            "cbio_mutation_data_filename": 'data_mutations_extended.txt'
        }

        output_json, output_dir = self.run_cwl()

        expected_output = {
            'analysis_mutations_file':
            OFile(name='Proj_08390_G.muts.maf',
                  size=28081,
                  hash='fd78842c9410e7e622dee270ec9c0e7628811f18',
                  dir=output_dir),
            'cbio_mutation_data_file':
            OFile(name='data_mutations_extended.txt',
                  size=4536,
                  hash='47e716eabbfda3408b2d9a08b9bb432b2cb8fce8',
                  dir=output_dir),
            'rejected_file':
            OFile(name='rejected.muts.maf',
                  size=18627626,
                  hash='a06789623715703c5006db6876ecb58b8498f938',
                  dir=output_dir)
        }
        self.assertCWLDictEqual(output_json, expected_output)

        self.assertNumMutations(
            expected_output['analysis_mutations_file']['path'], 22)

        # validate output mutation file contents
        self.assertCompareMutFiles(
            expected_output['analysis_mutations_file']['path'],
            os.path.join(DATA_SETS['Proj_08390_G']['MAF_FILTER_DIR'],
                         "Sample1", "analyst_file.txt"),
            muts_only=True,
            compare_len=True)

        self.assertCompareMutFiles(
            expected_output['cbio_mutation_data_file']['path'],
            os.path.join(DATA_SETS['Proj_08390_G']['MAF_FILTER_DIR'],
                         "Sample1", "portal_file.txt"),
            muts_only=True,
            compare_len=True)
Beispiel #5
0
    def test_run_copy_number_two_files(self):
        """
        Test that Facets geneLevel copy number analysis step runs as expected with two input files
        """
        self.input = {
            "portal_CNA_file":
            "data_CNA.txt",
            "targets_list": {
                "class": "File",
                "path": DATA_SETS['Proj_08390_G']['targets_list'],
            },
            "hisens_cncfs": [{
                "class":
                "File",
                "path":
                os.path.join(
                    DATA_SETS['Proj_08390_G']['FACETS_DIR'],
                    "Sample2.rg.md.abra.printreads__Sample1.rg.md.abra.printreads_hisens.cncf.txt"
                )
            }, {
                "class":
                "File",
                "path":
                os.path.join(
                    DATA_SETS['Proj_08390_G']['FACETS_DIR'],
                    "Sample9.rg.md.abra.printreads__Sample10.rg.md.abra.printreads_hisens.cncf.txt"
                )
            }],
        }

        output_json, output_dir = self.run_cwl()

        expected_output = {
            'output_cna_file':
            OFile(name='data_CNA.txt',
                  size=143118,
                  hash='6dfa53b8a0fad1156060476bcf445d959f0e6eb2',
                  dir=output_dir),
            'output_cna_ascna_file':
            OFile(name='data_CNA.ascna.txt',
                  size=8658,
                  hash='3953e55b3db85b69209982211c53b9d8f049dc01',
                  dir=output_dir),
            'output_cna_scna_file':
            OFile(name='data_CNA.scna.txt',
                  size=6937,
                  hash='9ddcee42cce0d49aec5745303be480b6c4ef0fe8',
                  dir=output_dir),
        }
        self.maxDiff = None
        self.assertCWLDictEqual(output_json, expected_output)
Beispiel #6
0
    def test_annotate_demo1(self):
        """
        Test case for running Facets maf annotation on a smaller demo maf file
        """
        input_maf = os.path.join(self.DATA_SETS['demo']['MAF_DIR'],
                                 "Sample1.Sample2.muts.maf")
        input_rds = os.path.join(self.DATA_SETS['demo']['FACETS_DIR'],
                                 "Sample1_hisens.rds")
        self.input = {
            "maf_file": {
                "path": input_maf,
                "class": "File"
            },
            "facets_rds": {
                "path": input_rds,
                "class": "File"
            },
            "output_filename": "Sample1_hisens.ccf.maf"
        }

        output_json, output_dir = self.run_cwl()

        expected_output = {
            'failed':
            False,
            'output_file':
            OFile(name='Sample1_hisens.ccf.maf',
                  hash='8cd487056bd86177d19d3dd0fe072747d31fd9b7',
                  size=60230,
                  dir=output_dir),
            'stderr_txt':
            OFile(name='annotate_maf_stderr.txt', dir=output_dir),
            'stdout_txt':
            OFile(name='annotate_maf_stdout.txt', dir=output_dir),
        }
        strip_related_keys = [
            ('basename', 'annotate_maf_stderr.txt', ['size', 'checksum']),
            ('basename', 'annotate_maf_stdout.txt', ['size', 'checksum'])
        ]
        self.assertCWLDictEqual(output_json,
                                expected_output,
                                related_keys=strip_related_keys)

        path = os.path.join(output_dir, 'Sample1_hisens.ccf.maf')
        table_reader = TableReader(path)
        comments = table_reader.comment_lines
        fieldnames = table_reader.get_fieldnames()
        records = [rec for rec in table_reader.read()]
        self.assertEqual(len(records), 41)
Beispiel #7
0
    def test_tmb_workflow3(self):
        """
        Test case with a single real maf file
        """
        self.input = {
            "assay_coverage":
            '1000',
            "pairs": [{
                "pair_maf": {
                    "path":
                    os.path.join(self.DATA_SETS['Proj_08390_G']['MAF_DIR'],
                                 "Sample1.Sample2.muts.maf"),
                    "class":
                    "File"
                },
                "pair_id": "Sample1-T.Sample1-N",
                "tumor_id": "Sample1-T",
                "normal_id": "Sample1-N"
            }]
        }

        output_json, output_dir = self.run_cwl()

        expected_output = {
            'pairs': [{
                "pair_id":
                "Sample1-T.Sample1-N",
                "tumor_id":
                "Sample1-T",
                "normal_id":
                "Sample1-N",
                "tmb_maf":
                OFile(name="Sample1-T.Sample1-N.tmb.maf",
                      size=519440,
                      hash="809c3c1ac3bb750aebf22ee2f95a5ebafd41e98f",
                      dir=output_dir),
                "tmb_tsv":
                OFile(name="Sample1-T.Sample1-N.tmb.tsv",
                      size=42,
                      hash="8156b9e7a0602ddd7710f002ef9385237a82c5d0",
                      dir=output_dir)
            }]
        }
        self.assertCWLDictEqual(output_json, expected_output)

        lines = self.read_table(expected_output["pairs"][0]["tmb_tsv"]["path"])
        expected_lines = [['CMO_TMB_SCORE', 'SampleID'],
                          ['475000.0', 'Sample1-T']]
        self.assertEqual(lines, expected_lines)
Beispiel #8
0
    def test_concat_simple_file_with_header(self):
        """
        Test that a single file with no header comes out looking as expected
        """
        # make a dummy file with some lines
        input_lines = ["#header", "foo", "bar", "baz"]
        input_file = os.path.join(self.tmpdir, "input.txt")
        with open(input_file, "w") as fout:
            for line in input_lines:
                fout.write(line + '\n')

        self.input = {"input_files": [{"class": "File", "path": input_file}]}

        output_json, output_dir = self.run_cwl()

        # check the contents of the concatenated file; should be the same as the input
        output_file = os.path.join(output_dir, 'output.txt')
        with open(output_file) as fin:
            output_lines = [line.strip() for line in fin]

        self.assertEqual(output_lines, ["foo", "bar", "baz"])

        expected_output = {
            'output_file':
            OFile(name='output.txt',
                  hash='0562f08aef399135936d6fb4eb0cc7bc1890d5b4',
                  size=12,
                  dir=output_dir)
        }
        self.assertCWLDictEqual(output_json, expected_output)
    def test_reduce_sig_figs(self):
        """
        Test that significant figures are reduced correctly
        """
        input_lines = ["seg.mean", "3.141592", "2.718281828"]
        input_file = os.path.join(self.tmpdir, "input.txt")
        with open(input_file, "w") as fout:
            for line in input_lines:
                fout.write(line + '\n')

        self.input = {"input_file": {"class": "File", "path": input_file}}

        output_json, output_dir = self.run_cwl()

        expected_output = {
            'output_file':
            OFile(name="output.txt",
                  size=26,
                  hash="d9f5ec4a9aa27a69ee64edb97eb10d6db65c7ad7",
                  dir=output_dir)
        }

        self.assertCWLDictEqual(output_json, expected_output)

        # check the contents of the file
        output_file = expected_output['output_file']['path']
        with open(output_file) as fin:
            reader = csv.DictReader(fin)
            rows = [row for row in reader]

        self.assertEqual(len(rows), 2)
        self.assertDictEqual(rows[0], OrderedDict([('seg.mean', '3.1416')]))
        self.assertDictEqual(rows[1], OrderedDict([('seg.mean', '2.7183')]))
Beispiel #10
0
    def test_update_caselist_1(self):
        """
        Test simple case list update
        """
        self.input = {
            "case_list": {"class": "File", "path": self.input_file},
            "sample_ids": ["Sample3", "Sample4"],
            "output_filename": "cases_all.txt"
        }
        output_json, output_dir = self.run_cwl()

        output_file = os.path.join(output_dir, 'cases_all.txt')

        expected_output = {
            "output_file": OFile(name = 'cases_all.txt', size = 208, hash = '59aa4b6b6695b7adfd4493390edd4038808a018f', dir = output_dir)
        }

        self.assertCWLDictEqual(output_json, expected_output)

        with open(output_file) as fin:
            text = fin.read()

        expected_text = """case_list_category: all_cases_in_study
stable_id: pi_123_all
case_list_name: All Tumors
case_list_description: All tumor samples
cancer_study_identifier: pi_123
case_list_ids: Sample1\tSample2\tSample3\tSample4
"""
        self.assertEqual(text, expected_text)
Beispiel #11
0
    def test_put_one_file1_in_dir(self):
        """
        Test that one file is put in the dir correctly
        """
        file1 = self.mkstemp(prefix="1.")
        self.input = {
            "output_directory_name": "foo",
            "files": [
                {
                    "class": "File",
                    "path": file1
                },
            ]
        }

        output_json, output_dir = self.run_cwl()

        expected_output = {
            "directory":
            ODir(name='foo',
                 dir=output_dir,
                 items=[
                     OFile(name=os.path.basename(file1),
                           size=0,
                           hash='da39a3ee5e6b4b0d3255bfef95601890afd80709')
                 ])
        }

        self.assertCWLDictEqual(output_json, expected_output)
Beispiel #12
0
    def test_fusion_filter1(self):
        """
        """
        fusion_file = os.path.join(DATA_SETS['Proj_08390_G']['MAF_DIR'],
                                   "Sample1.Sample2.svs.pass.vep.portal.txt")

        self.input = {
            "fusions_file": {
                "class": "File",
                "path": fusion_file
            },
            "output_filename": "data_fusions.txt",
            "known_fusions_file": {
                "class": "File",
                "path": KNOWN_FUSIONS_FILE
            }
        }

        output_json, output_dir = self.run_cwl()

        expected_output = {
            "output_file":
            OFile(name='data_fusions.txt',
                  size=99,
                  hash='c16f763b248813fcdde76f7486f1ddc4e9856038',
                  dir=output_dir)
        }
        self.assertCWLDictEqual(output_json, expected_output)
Beispiel #13
0
    def test_update_caselist_2(self):
        """
        Test update with no files passed
        """
        self.input = {
            "case_list": {"class": "File", "path": self.input_file},
            "sample_ids": [],
            "output_filename": "cases_all.txt"
        }
        output_json, output_dir = self.run_cwl()

        output_file = os.path.join(output_dir, 'cases_all.txt')

        expected_output = {
            "output_file": OFile(name = 'cases_all.txt', size = 192, hash = 'f1ad64f51beac01759ae690b2f787fe3978e8882', dir = output_dir)
        }

        self.assertCWLDictEqual(output_json, expected_output)

        with open(output_file) as fin:
            text = fin.read()

        expected_text = """case_list_category: all_cases_in_study
stable_id: pi_123_all
case_list_name: All Tumors
case_list_description: All tumor samples
cancer_study_identifier: pi_123
case_list_ids: Sample1\tSample2
"""
        self.assertEqual(text, expected_text)
    def test_snp_pileup1(self):
        """
        """
        self.input = {
            "snps_vcf": {
                "path": FACETS_SNPS_VCF,
                "class": "File"
            },
            # the smallest pair of bam files in the test dataset
            "normal_bam": {
                "path": os.path.join(DATA_SETS['Proj_08390_G']['BAM_DIR'], "Sample23.rg.md.abra.printreads.bam"),
                "class": "File"
            },
            "tumor_bam": {
                "path": os.path.join(DATA_SETS['Proj_08390_G']['BAM_DIR'], "Sample24.rg.md.abra.printreads.bam"),
                "class": "File"
            },
            "output_prefix": "Sample24.Sample23"
        }

        output_json, output_dir = self.run_cwl()

        expected_output = {
            'output_file': OFile(name = "Sample24.Sample23.snp_pileup.gz", size = 34851004, hash = "755a8b64f45c819b4e2c481e64bf2fe36d1f5361", dir = output_dir)
            }
        self.maxDiff = None
        self.assertCWLDictEqual(output_json, expected_output)
Beispiel #15
0
    def test_fusion_to_sv(self):
        """
        Test fusion to sv conversion
        """
        fusion_file = os.path.join(DATA_SETS['Proj_08390_G']['MAF_DIR'], "Sample1.Sample2.svs.pass.vep.portal.txt")

        self.input = {
            "fusion_file": {
                  "class": "File",
                  "path": fusion_file
                },
            "output_filename": "data_SV.txt"

        }        

        output_json, output_dir = self.run_cwl()

        expected_output = {
            'output_file': OFile(
                name='data_SV.txt', size=1103, hash='02fda70b7838931321544f6797de4782adaf1a46', dir=output_dir)

            
        }

        self.maxDiff = None
        strip_related_keys = [
        ('basename', 'report.html', ['size', 'checksum'])
        ]
        self.assertCWLDictEqual(output_json, expected_output, related_keys = strip_related_keys)
    def test_concat1(self):
        """
        Test concat when original file has a comment line
        """
        # make a dummy file with some lines
        input_lines = ["# comment here", "HEADER", "foo", "bar", "baz"]
        input_file = os.path.join(self.tmpdir, "input.txt")
        with open(input_file, "w") as fout:
            for line in input_lines:
                fout.write(line + '\n')

        self.input = {
            "input_files": [{
                "class": "File",
                "path": input_file
            }],
            "comment_label": "comment_label",
            "comment_value": "comment_value"
        }
        output_json, output_dir = self.run_cwl()

        expected_output = {
            'output_file':
            OFile(name='output.txt',
                  size=64,
                  hash='14ee1247f314dba1e3c28aa8aec9ff7b137a1f41',
                  dir=output_dir)
        }
        self.assertCWLDictEqual(output_json, expected_output)

        # check the contents of the concatenated file; should be the same as the input
        self.assertFileLinesEqual(expected_output['output_file']['path'], [
            '# comment here', '#comment_label: comment_value', "HEADER", 'foo',
            'bar', 'baz'
        ])
    def test_concat_0(self):
        """
        Test concat when no comments are present in the original file
        """
        # make a dummy file with some lines
        input_lines = ["HEADER", "foo", "bar", "baz"]
        input_file = os.path.join(self.tmpdir, "input.txt")
        with open(input_file, "w") as fout:
            for line in input_lines:
                fout.write(line + '\n')

        self.input = {
            "input_files": [{
                "class": "File",
                "path": input_file
            }],
            "comment_label": "comment_label",
            "comment_value": "comment_value"
        }

        output_json, output_dir = self.run_cwl()

        expected_output = {
            'output_file':
            OFile(name='output.txt',
                  size=49,
                  hash='7cef8f6de47289a55de99de77563beb3fa371deb',
                  dir=output_dir)
        }
        self.assertCWLDictEqual(output_json, expected_output)

        # check the contents of the concatenated file; should be the same as the input
        self.assertFileLinesEqual(
            expected_output['output_file']['path'],
            ['#comment_label: comment_value', "HEADER", 'foo', 'bar', 'baz'])
Beispiel #18
0
    def test_concat_three_files_with_comments(self):
        """
        Test that a three files with headers are concatenated correctly
        Use three this time to clearly show the ordering of the output iteration
        """
        # make a dummy file with some lines
        input_lines1 = ["#comment1", "header1", "foo1", "bar1", "baz1"]
        input_file1 = os.path.join(self.tmpdir, "input1.txt")
        with open(input_file1, "w") as fout:
            for line in input_lines1:
                fout.write(line + '\n')

        input_lines2 = ["#comment2", "header2", "foo2", "bar2", "baz2"]
        input_file2 = os.path.join(self.tmpdir, "input2.txt")
        with open(input_file2, "w") as fout:
            for line in input_lines2:
                fout.write(line + '\n')

        input_lines3 = ["#comment3", "header3", "foo3", "bar3", "baz3"]
        input_file3 = os.path.join(self.tmpdir, "input3.txt")
        with open(input_file3, "w") as fout:
            for line in input_lines3:
                fout.write(line + '\n')

        self.input = {
            "input_files": [{
                "class": "File",
                "path": input_file1
            }, {
                "class": "File",
                "path": input_file2
            }, {
                "class": "File",
                "path": input_file3
            }]
        }

        output_json, output_dir = self.run_cwl()

        # check the contents of the concatenated file; should be the same as the input
        output_file = os.path.join(output_dir, 'output.txt')
        with open(output_file) as fin:
            output_lines = [line.strip() for line in fin]

        expected_output_lines = [
            "header1", "foo1", "bar1", "baz1", "foo2", "bar2", "baz2", "foo3",
            "bar3", "baz3"
        ]
        self.assertEqual(output_lines, expected_output_lines)

        # TODO: update this once the above ^^^ passes
        expected_output = {
            'output_file':
            OFile(name='output.txt',
                  hash='b115b7b40aa8a2e08e30a55abf60d742e05e62b4',
                  size=53,
                  dir=output_dir)
        }
        self.assertCWLDictEqual(output_json, expected_output)
Beispiel #19
0
    def test_tmb_workflow2(self):
        """
        Test case for using a single input pair maf
        """
        self.input = {
            "assay_coverage":
            '1000',
            "pairs": [{
                "pair_maf": {
                    "path": self.maf1,
                    "class": "File"
                },
                "pair_id": "Sample1-T.Sample1-N",
                "tumor_id": "Sample1-T",
                "normal_id": "Sample1-N"
            }]
        }

        output_json, output_dir = self.run_cwl()

        expected_output = {
            'pairs': [{
                "pair_id":
                "Sample1-T.Sample1-N",
                "tumor_id":
                "Sample1-T",
                "normal_id":
                "Sample1-N",
                "tmb_maf":
                OFile(name="Sample1-T.Sample1-N.tmb.maf",
                      size=352,
                      hash="b019b5b3c6aba861371c135fe47520c969fab5ae",
                      dir=output_dir),
                "tmb_tsv":
                OFile(name="Sample1-T.Sample1-N.tmb.tsv",
                      size=40,
                      hash="d6a57cfb5e3001697875e5b5bfae206e0f7f2310",
                      dir=output_dir)
            }]
        }
        self.assertCWLDictEqual(output_json, expected_output)

        lines = self.read_table(expected_output["pairs"][0]["tmb_tsv"]["path"])
        expected_lines = [['CMO_TMB_SCORE', 'SampleID'],
                          ['7000.0', 'Sample1-T']]
        self.assertEqual(lines, expected_lines)
Beispiel #20
0
    def test_msi_workflow_demo1(self):
        """
        Test case for running the MSI workflow on single sample
        """
        normal_bam = os.path.join(self.DATA_SETS['demo']['BAM_DIR'],
                                  "Sample2.bam")
        tumor_bam = os.path.join(self.DATA_SETS['demo']['BAM_DIR'],
                                 "Sample1.bam")

        self.input = {
            "threads":
            "16",
            "microsatellites_file": {
                "class": "File",
                "path": MICROSATELLITES_LIST
            },
            "pairs": [{
                "pair_id": "Sample1.Sample2",
                "tumor_id": "Sample1",
                "normal_id": "Sample2"
            }],
            "normal_bam_files": [{
                "path": normal_bam,
                "class": "File"
            }],
            "tumor_bam_files": [{
                "path": tumor_bam,
                "class": "File"
            }]
        }

        output_json, output_dir = self.run_cwl()

        expected_output = {
            'pairs': [{
                "pair_id":
                "Sample1.Sample2",
                "tumor_id":
                "Sample1",
                "normal_id":
                "Sample2",
                "msi_tsv":
                OFile(name='Sample1.Sample2.msi.tsv',
                      hash="92576a9be4d6a36c67b26d16fdc4134b0d1b9cd9",
                      size=54,
                      dir=output_dir)
            }]
        }
        self.assertCWLDictEqual(output_json, expected_output)

        output_file = os.path.join(output_dir, 'Sample1.Sample2.msi.tsv')
        lines = self.read_table(output_file)

        expected_lines = [['MSI_SCORE', 'MSI_STATUS', 'SAMPLE_ID'],
                          ['20.90', 'Instable', 'Sample1']]
        self.assertEqual(lines, expected_lines)
    def test_add_impact_0(self):
        """
        Test IMPACT CWL with tiny dataset
        """
        maf_lines = [['# comment 1'], ['# comment 2'], ['Hugo_Symbol'],
                     ['SUFU'], ['GOT1'], ['BRCA']]

        impact_lines = [['BRCA', 'IMPACT468'], ['SUFU', 'IMPACT468'],
                        ['SUFU', 'IMPACT505']]

        input_maf = self.write_table(tmpdir=self.tmpdir,
                                     filename='input.maf',
                                     lines=maf_lines)
        impact_file = self.write_table(tmpdir=self.tmpdir,
                                       filename='impact.txt',
                                       lines=impact_lines)
        self.input = {
            "input_file": {
                "class": "File",
                "path": input_maf
            },
            "output_filename": 'output.maf',
            "IMPACT_file": {
                "class": "File",
                "path": impact_file
            },
        }

        output_json, output_dir = self.run_cwl()

        expected_output = {
            'IMPACT_col_added_file':
            OFile(name='output.maf',
                  hash='5c61f3977dad29ebc74966e8fc40a0278f9aab12',
                  size=126,
                  dir=output_dir)
        }
        self.assertCWLDictEqual(output_json, expected_output)

        self.assertMutFileContains(
            filepath=expected_output['IMPACT_col_added_file']['path'],
            expected_comments=['# comment 1', '# comment 2'],
            expected_mutations=[{
                'Hugo_Symbol': 'SUFU',
                'is_in_impact': 'True',
                'impact_assays': 'IMPACT468,IMPACT505'
            }, {
                'Hugo_Symbol': 'GOT1',
                'is_in_impact': 'False',
                'impact_assays': '.'
            }, {
                'Hugo_Symbol': 'BRCA',
                'is_in_impact': 'True',
                'impact_assays': 'IMPACT468'
            }],
            identical=True)
Beispiel #22
0
    def test_run_facets_annotation_wrapper(self):
        """
        Test case for running Facets maf annotation
        """
        input_maf = os.path.join(self.DATA_SETS['Proj_08390_G']['MAF_DIR'],
                                 "Sample1.Sample2.muts.maf")
        input_rds = os.path.join(
            self.DATA_SETS['Proj_08390_G']['FACETS_SUITE_DIR'],
            "Sample1_hisens.rds")
        self.input = {
            "maf_file": {
                "path": input_maf,
                "class": "File"
            },
            "facets_rds": {
                "path": input_rds,
                "class": "File"
            },
            "output_filename": "Sample1_hisens.ccf.maf"
        }

        output_json, output_dir = self.run_cwl()

        expected_output = {
            'failed':
            False,
            'output_file':
            OFile(name='Sample1_hisens.ccf.maf',
                  hash='7e478a8a44d27735f26e368989c672ed6ef5d52a',
                  size=19217199,
                  dir=output_dir),
            'stderr_txt':
            OFile(name='annotate_maf_stderr.txt', dir=output_dir),
            'stdout_txt':
            OFile(name='annotate_maf_stdout.txt', dir=output_dir),
        }
        strip_related_keys = [
            ('basename', 'annotate_maf_stderr.txt', ['size', 'checksum']),
            ('basename', 'annotate_maf_stdout.txt', ['size', 'checksum'])
        ]
        self.assertCWLDictEqual(output_json,
                                expected_output,
                                related_keys=strip_related_keys)
Beispiel #23
0
    def test_example_workflow(self):
        """
        Test case for the example workflow
        """
        self.maxDiff = None
        self.input = {
            'value': "ABC",
            "samples": [{
                "sample_id": "1"
            }, {
                "sample_id": "2"
            }]
        }

        output_json, output_dir = self.run_cwl()

        expected_output = {
            "output_file":
            OFile(name='output.concat.tsv',
                  hash='d4297dfdad25ac92ffae2ce61c6cfe12c4089c28',
                  size=27,
                  dir=output_dir),
            'env':
            OFile(name='env.txt', dir=output_dir)
        }
        strip_related_keys = [
            ('basename', 'env.txt', ['size', 'checksum']),
        ]
        self.assertCWLDictEqual(output_json,
                                expected_output,
                                related_keys=strip_related_keys)

        output_file = os.path.join(output_dir, "output.concat.tsv")
        with open(output_file) as f:
            lines = [l.strip() for l in f]

        expected_lines = [
            'SampleID\tValue',
            '1\tABC',
            '2\tABC',
        ]
        self.assertEqual(lines, expected_lines)
Beispiel #24
0
    def test_filter_maf_file_impact_false(self):
        """
        Test that a filtered maf file comes out as expected
        """
        self.maxDiff = None
        input_maf = os.path.join(DATA_SETS['Proj_08390_G']['MAF_DIR'],
                                 "Sample1.Sample2.muts.maf")
        self.assertNumMutations(input_maf, 12514)

        self.input = {
            "maf_file": {
                "class": "File",
                "path": input_maf
            },
            "argos_version_string": ARGOS_VERSION_STRING,
            "is_impact": False,
            "analysis_mutations_filename": "Proj_08390_G.muts.maf",
            "cbio_mutation_data_filename": 'data_mutations_extended.txt'
        }

        output_json, output_dir = self.run_cwl()

        expected_output = {
            'analysis_mutations_file':
            OFile(name='Proj_08390_G.muts.maf',
                  size=24524,
                  hash='9fb9d43c71e546750ddec6aea2313dda28547b3a',
                  dir=output_dir),
            'cbio_mutation_data_file':
            OFile(name='data_mutations_extended.txt',
                  size=3931,
                  hash='15ca06249511c32c32e058c246a757ec8df11d83',
                  dir=output_dir),
            'rejected_file':
            OFile(name='rejected.muts.maf',
                  size=18790398,
                  hash='e7441703699e82cef500d9557bfcbd3464ce8eab',
                  dir=output_dir)
        }
        self.assertCWLDictEqual(output_json, expected_output)
        self.assertNumMutations(
            expected_output['analysis_mutations_file']['path'], 18)
Beispiel #25
0
    def test_msi_workflow2(self):
        """
        Test case for running the MSI workflow on single sample
        """
        normal_bam = os.path.join(self.DATA_SETS['Proj_08390_G']['BAM_DIR'],
                                  "Sample23.rg.md.abra.printreads.bam")
        tumor_bam = os.path.join(self.DATA_SETS['Proj_08390_G']['BAM_DIR'],
                                 "Sample24.rg.md.abra.printreads.bam")

        self.input = {
            "microsatellites_file": {
                "class": "File",
                "path": MICROSATELLITES_LIST
            },
            "pairs": [{
                "pair_id": "Sample1-T.Sample1-N",
                "tumor_id": "Sample1-T",
                "normal_id": "Sample1-N"
            }],
            "normal_bam_files": [{
                "path": normal_bam,
                "class": "File"
            }],
            "tumor_bam_files": [{
                "path": tumor_bam,
                "class": "File"
            }]
        }

        output_json, output_dir = self.run_cwl()

        expected_output = {
            "pairs": [{
                "pair_id":
                "Sample1-T.Sample1-N",
                "tumor_id":
                "Sample1-T",
                "normal_id":
                "Sample1-N",
                "msi_tsv":
                OFile(name='Sample1-T.Sample1-N.msi.tsv',
                      hash="bc132f6ab9b779d7cba51e7ddfa82af724134f03",
                      size=56,
                      dir=output_dir)
            }]
        }
        self.assertCWLDictEqual(output_json, expected_output)

        output_file = os.path.join(output_dir, 'Sample1-T.Sample1-N.msi.tsv')
        lines = self.read_table(output_file)

        expected_lines = [['MSI_SCORE', 'MSI_STATUS', 'SAMPLE_ID'],
                          ['21.97', 'Instable', 'Sample1-T']]
        self.assertEqual(lines, expected_lines)
    def test_1(self):
        maf_file = os.path.join(DATA_SETS["Fillout01"]["OUTPUT_DIR"],
                                "output.maf")

        comments, mutations = self.load_mutations(maf_file, strip=True)
        self.assertEqual(len(mutations), 475)

        self.input = {"input_file": {"class": "File", "path": maf_file}}

        output_json, output_dir = self.run_cwl()

        output_data_mutations_extended = os.path.join(
            output_dir, 'data_mutations_extended.txt')
        output_data_mutations_uncalled = os.path.join(
            output_dir, 'data_mutations_uncalled.txt')

        expected_output = {
            'called_file':
            OFile(name='data_mutations_extended.txt',
                  dir=output_dir,
                  hash='e7430656d9fcbce36fa57eb92460db57742168ae',
                  size=347254),
            'uncalled_file':
            OFile(name='data_mutations_uncalled.txt',
                  dir=output_dir,
                  hash='58129786cc299011202eb078734b3ff513d54081',
                  size=287883),
        }

        self.maxDiff = None

        self.assertCWLDictEqual(output_json, expected_output)

        comments, mutations = self.load_mutations(
            output_data_mutations_extended, strip=True)
        self.assertEqual(len(mutations), 253)

        comments, mutations = self.load_mutations(
            output_data_mutations_uncalled, strip=True)
        self.assertEqual(len(mutations), 222)
    def test_add_impact_1(self):
        """
        Test that a maf file with is_in_IMPACT column comes out as expected
        """
        self.maxDiff = None
        input_maf = os.path.join(DATA_SETS['Proj_08390_G']['MAF_DIR'],
                                 "Sample1.Sample2.muts.maf")

        self.input = {
            "input_file": {
                "class": "File",
                "path": input_maf
            },
            "output_filename": 'output.maf',
            "IMPACT_file": {
                "class": "File",
                "path": IMPACT_FILE
            },
        }

        output_json, output_dir = self.run_cwl()

        expected_output = {
            'IMPACT_col_added_file':
            OFile(name="output.maf",
                  size=15629589,
                  hash="1397fade2f877c2bcfca791407e328c5c48e6ff0",
                  dir=output_dir)
        }
        self.assertCWLDictEqual(output_json, expected_output)

        # validate output mutation file contents
        with open(expected_output['IMPACT_col_added_file']['path']) as fin:
            output_maf_lines = len(fin.readlines())
        self.assertEqual(output_maf_lines, 12518)

        input_comments, input_mutations = self.load_mutations(input_maf)
        output_comments, output_mutations = self.load_mutations(
            expected_output['IMPACT_col_added_file']['path'])

        true_count = [row['is_in_impact']
                      for row in output_mutations].count('True')
        false_count = [row['is_in_impact']
                       for row in output_mutations].count('False')

        self.assertTrue(true_count == 8367)
        self.assertTrue(false_count == 4147)

        # check that its got two extra columns in the output
        self.assertTrue(
            len(input_mutations[1]) + 2 == len(output_mutations[1]))
    def test_filter_maf_file_cols(self):
        """
        Filter columns in a tiny demo maf file
        """
        maf_lines = [
            ['# comment 1'],  # keep the comments
            ['# comment 2'],
            ['Hugo_Symbol',
             'foo_value'],  # foo_value column should be removed in output
            ['SUFU', '1'],
            ['GOT1', '2']
        ]
        # run the script in a temporary directory
        input_maf_file = self.write_table(tmpdir=self.tmpdir,
                                          filename='input.maf',
                                          lines=maf_lines)
        self.input = {
            "input_file": {
                "class": "File",
                "path": input_maf_file
            },
            "output_filename": "output.maf"
        }

        output_json, output_dir = self.run_cwl()

        expected_output = {
            'output_file':
            OFile(name='output.maf',
                  hash="e55f7bdaa146f37b48d6c920ed27184e394ef1e6",
                  size=46,
                  dir=output_dir)
        }
        self.assertCWLDictEqual(output_json, expected_output)

        # validate number of lines output
        with open(expected_output['output_file']['path']) as fin:
            output_maf_lines = len(fin.readlines())
        self.assertEqual(output_maf_lines, 5)

        # validate file contents
        self.assertMutFileContains(
            filepath=expected_output['output_file']['path'],
            expected_comments=['# comment 1', '# comment 2'],
            expected_mutations=[{
                'Hugo_Symbol': 'SUFU'
            }, {
                'Hugo_Symbol': 'GOT1'
            }],
            identical=True)
Beispiel #29
0
    def test_put_two_files_in_dir(self):
        """
        Test that two files are put in the dir correctly
        """
        file1 = self.mkstemp(prefix="1.")
        file2 = self.mkstemp(prefix="2.")

        # create input data
        self.input = {
            "output_directory_name":
            "foo",
            "files": [{
                "class": "File",
                "path": file1
            }, {
                "class": "File",
                "path": file2
            }]
        }

        output_json, output_dir = self.run_cwl()

        expected_output = {
            "directory":
            ODir(name="foo",
                 dir=output_dir,
                 items=[
                     OFile(name=os.path.basename(file1),
                           size=0,
                           hash='da39a3ee5e6b4b0d3255bfef95601890afd80709'),
                     OFile(name=os.path.basename(file2),
                           size=0,
                           hash='da39a3ee5e6b4b0d3255bfef95601890afd80709'),
                 ])
        }

        self.assertCWLDictEqual(output_json, expected_output)
    def test_concat2(self):
        """
        Test concat when multiple files have comments
        """
        # make a dummy file with some lines
        input_lines1 = ["# comment 1 here", "HEADER", "foo1", "bar1"]
        input_file1 = os.path.join(self.tmpdir, "input1.txt")
        with open(input_file1, "w") as fout:
            for line in input_lines1:
                fout.write(line + '\n')

        input_lines2 = ["# comment 2 here", "HEADER", "foo2", "bar2"]
        input_file2 = os.path.join(self.tmpdir, "input2.txt")
        with open(input_file2, "w") as fout:
            for line in input_lines2:
                fout.write(line + '\n')

        self.input = {
            "input_files": [
                {
                    "class": "File",
                    "path": input_file1
                },
                {
                    "class": "File",
                    "path": input_file2
                },
            ],
            "comment_label":
            "comment_label",
            "comment_value":
            "comment_value"
        }
        output_json, output_dir = self.run_cwl()

        expected_output = {
            'output_file':
            OFile(name='output.txt',
                  size=91,
                  hash='5dbce16f9bfef135d6b8288b16350351a33998f3',
                  dir=output_dir)
        }
        self.assertCWLDictEqual(output_json, expected_output)

        self.assertFileLinesEqual(expected_output['output_file']['path'], [
            '# comment 1 here', '# comment 2 here',
            '#comment_label: comment_value', "HEADER", 'foo1', 'bar1', 'foo2',
            'bar2'
        ])