Esempio n. 1
0
    def test_add_impact_1(self):
        """
        Test that a maf file with is_in_IMPACT column comes out as expected
        """
        self.maxDiff = None
        input_maf = os.path.join(DATA_SETS['Proj_08390_G']['MAF_DIR'],
                                 "Sample1.Sample2.muts.maf")

        with TemporaryDirectory() as tmpdir:
            input_json = {
                "input_file": {
                    "class": "File",
                    "path": input_maf
                },
                "output_filename": 'output.maf',
                "IMPACT_file": {
                    "class": "File",
                    "path": IMPACT_FILE
                },
            }

            output_json, output_dir = run_cwl(testcase=self,
                                              tmpdir=tmpdir,
                                              input_json=input_json,
                                              cwl_file=cwl_file)

            expected_output = {
                'IMPACT_col_added_file': {
                    'location':
                    'file://' + os.path.join(output_dir, 'output.maf'),
                    'basename': 'output.maf',
                    'class': 'File',
                    'checksum':
                    'sha1$1397fade2f877c2bcfca791407e328c5c48e6ff0',
                    'size': 15629589,
                    'path': os.path.join(output_dir, 'output.maf')
                }
            }
            self.assertDictEqual(output_json, expected_output)

            # validate output mutation file contents
            with open(output_json['IMPACT_col_added_file']['path']) as fin:
                output_maf_lines = len(fin.readlines())
            self.assertEqual(output_maf_lines, 12518)

            input_comments, input_mutations = load_mutations(input_maf)
            output_comments, output_mutations = load_mutations(
                output_json['IMPACT_col_added_file']['path'])

            true_count = [row['is_in_impact']
                          for row in output_mutations].count('True')
            false_count = [row['is_in_impact']
                           for row in output_mutations].count('False')

            self.assertTrue(true_count == 8367)
            self.assertTrue(false_count == 4147)

            # check that its got two extra columns in the output
            self.assertTrue(
                len(input_mutations[1]) + 2 == len(output_mutations[1]))
Esempio n. 2
0
    def test_add_af(self):
        """
        Test IMPACT CWL with tiny dataset
        """
        maf_lines = [
            ['# comment 1'],
            ['# comment 2'],
            ['Hugo_Symbol', 't_depth', 't_alt_count'],
            ['SUFU', '100', '75'],
            ['GOT1', '100', '1'],
            ['SOX9', '100', '0'],
        ]

        with TemporaryDirectory() as tmpdir:
            input_maf = write_table(tmpdir=tmpdir,
                                    filename='input.maf',
                                    lines=maf_lines)
            input_json = {
                "input_file": {
                    "class": "File",
                    "path": input_maf
                },
                "output_filename": 'output.maf',
            }
            output_json, output_dir = run_cwl(testcase=self,
                                              tmpdir=tmpdir,
                                              input_json=input_json,
                                              cwl_file=cwl_file)

            expected_output = {
                'output_file': {
                    'location':
                    'file://' + os.path.join(output_dir, 'output.maf'),
                    'basename': 'output.maf',
                    'class': 'File',
                    'checksum':
                    'sha1$39de59ad5d736db692504012ce86d3395685112e',
                    'size': 109,
                    'path': os.path.join(output_dir, 'output.maf')
                }
            }
            self.assertDictEqual(output_json, expected_output)

            comments, mutations = load_mutations(
                output_json['output_file']['path'])

            expected_comments = ['# comment 1', '# comment 2']
            self.assertEqual(comments, expected_comments)

            expected_mutations = [{
                'Hugo_Symbol': 'SUFU',
                't_depth': '100',
                't_alt_count': '75',
                't_af': '0.75'
            }, {
                'Hugo_Symbol': 'GOT1',
                't_depth': '100',
                't_alt_count': '1',
                't_af': '0.01'
            }, {
                'Hugo_Symbol': 'SOX9',
                't_depth': '100',
                't_alt_count': '0',
                't_af': '0.0'
            }]
            self.assertEqual(mutations, expected_mutations)
Esempio n. 3
0
    def test_add_impact_0(self):
        """
        Test IMPACT CWL with tiny dataset
        """
        maf_lines = [['# comment 1'], ['# comment 2'], ['Hugo_Symbol'],
                     ['SUFU'], ['GOT1'], ['BRCA']]

        impact_lines = [['BRCA', 'IMPACT468'], ['SUFU', 'IMPACT468'],
                        ['SUFU', 'IMPACT505']]

        with TemporaryDirectory() as tmpdir:
            input_maf = write_table(tmpdir=tmpdir,
                                    filename='input.maf',
                                    lines=maf_lines)
            impact_file = write_table(tmpdir=tmpdir,
                                      filename='impact.txt',
                                      lines=impact_lines)
            input_json = {
                "input_file": {
                    "class": "File",
                    "path": input_maf
                },
                "output_filename": 'output.maf',
                "IMPACT_file": {
                    "class": "File",
                    "path": impact_file
                },
            }
            output_json, output_dir = run_cwl(testcase=self,
                                              tmpdir=tmpdir,
                                              input_json=input_json,
                                              cwl_file=cwl_file)

            expected_output = {
                'IMPACT_col_added_file': {
                    'location':
                    'file://' + os.path.join(output_dir, 'output.maf'),
                    'basename': 'output.maf',
                    'class': 'File',
                    'checksum':
                    'sha1$5c61f3977dad29ebc74966e8fc40a0278f9aab12',
                    'size': 126,
                    'path': os.path.join(output_dir, 'output.maf')
                }
            }
            self.assertDictEqual(output_json, expected_output)

            comments, mutations = load_mutations(
                output_json['IMPACT_col_added_file']['path'])

            expected_comments = ['# comment 1', '# comment 2']
            self.assertEqual(comments, expected_comments)

            expected_mutations = [{
                'Hugo_Symbol': 'SUFU',
                'is_in_impact': 'True',
                'impact_assays': 'IMPACT468,IMPACT505'
            }, {
                'Hugo_Symbol': 'GOT1',
                'is_in_impact': 'False',
                'impact_assays': '.'
            }, {
                'Hugo_Symbol': 'BRCA',
                'is_in_impact': 'True',
                'impact_assays': 'IMPACT468'
            }]
            self.assertEqual(mutations, expected_mutations)
    def test_run_worflow_mixed_mafs(self):
        """
        Test that the workflow works correctly when run with a mix of Argos muts.maf files and Facets Suite annotated maf files
        The Facets Suite maf files have extra columns that need to be retained in the output
        """
        input_json = {
            "is_impact":
            True,
            "argos_version_string":
            "2.x",
            "analysis_gene_cna_filename":
            "Proj_08390_G.gene.cna.txt",
            "analysis_mutations_filename":
            "Proj_08390_G.muts.maf",
            "analysis_mutations_share_filename":
            "Proj_08390_G.muts.share.maf",
            "analysis_segment_cna_filename":
            "Proj_08390_G.seg.cna.txt",
            "analysis_sv_filename":
            "Proj_08390_G.svs.maf",
            "helix_filter_version":
            "20.06.1",
            "IMPACT_gene_list": {
                "class": "File",
                "path": IMPACT_FILE
            },
            "targets_list": {
                "path": DATA_SETS['Proj_08390_G']["targets_list"],
                "class": "File"
            },
            "mutation_maf_files": [{
                "path":
                os.path.join(DATA_SETS['Proj_08390_G']['MAF_DIR'],
                             "Sample1.Sample2.muts.maf"),
                "class":
                "File"
            }, {
                "path":
                os.path.join(DATA_SETS['Proj_08390_G']['FACETS_SUITE_DIR'],
                             "Sample4.Sample3_hisens.ccf.portal.maf"),
                "class":
                "File"
            }],
            "mutation_svs_maf_files": [{
                "path":
                os.path.join(DATA_SETS['Proj_08390_G']['MAF_DIR'],
                             "Sample1.Sample2.svs.pass.vep.maf"),
                "class":
                "File"
            }, {
                "path":
                os.path.join(DATA_SETS['Proj_08390_G']['MAF_DIR'],
                             "Sample4.Sample3.svs.pass.vep.maf"),
                "class":
                "File"
            }],
            "facets_hisens_cncf_files": [{
                "path":
                os.path.join(
                    DATA_SETS['Proj_08390_G']['FACETS_DIR'],
                    "Sample2.rg.md.abra.printreads__Sample1.rg.md.abra.printreads_hisens.cncf.txt"
                ),
                "class":
                "File"
            }, {
                "path":
                os.path.join(
                    DATA_SETS['Proj_08390_G']['FACETS_DIR'],
                    "Sample3.rg.md.abra.printreads__Sample4.rg.md.abra.printreads_hisens.cncf.txt"
                ),
                "class":
                "File"
            }],
            "facets_hisens_seg_files": [{
                "path":
                os.path.join(
                    DATA_SETS['Proj_08390_G']['FACETS_DIR'],
                    "Sample2.rg.md.abra.printreads__Sample1.rg.md.abra.printreads_hisens.seg"
                ),
                "class":
                "File"
            }, {
                "path":
                os.path.join(
                    DATA_SETS['Proj_08390_G']['FACETS_DIR'],
                    "Sample3.rg.md.abra.printreads__Sample4.rg.md.abra.printreads_hisens.seg"
                ),
                "class":
                "File"
            }]
        }
        with TemporaryDirectory() as tmpdir:
            output_json, output_dir = run_cwl(testcase=self,
                                              tmpdir=tmpdir,
                                              input_json=input_json,
                                              cwl_file=cwl_file)

            expected_output = {
                'analysis_dir': {
                    'class':
                    'Directory',
                    'basename':
                    'analysis',
                    'location':
                    'file://' + os.path.join(output_dir, 'analysis'),
                    'path':
                    os.path.join(output_dir, 'analysis'),
                    'listing': [{
                        'location':
                        'file://' + os.path.join(
                            output_dir, 'analysis/Proj_08390_G.gene.cna.txt'),
                        'basename':
                        'Proj_08390_G.gene.cna.txt',
                        'class':
                        'File',
                        'checksum':
                        'sha1$ab17d587ad5ae0a87fd6c6d4dd2d5d1701208ce9',
                        'size':
                        173982,
                        'path':
                        os.path.join(output_dir,
                                     'analysis/Proj_08390_G.gene.cna.txt')
                    }, {
                        'location':
                        'file://' + os.path.join(
                            output_dir, 'analysis/Proj_08390_G.muts.maf'),
                        'basename':
                        'Proj_08390_G.muts.maf',
                        'class':
                        'File',
                        'checksum':
                        'sha1$66a87cb8cc2eea31f490852d468bedd958c4ecc5',
                        'size':
                        59915,
                        'path':
                        os.path.join(output_dir,
                                     'analysis/Proj_08390_G.muts.maf')
                    }, {
                        'location':
                        'file://' +
                        os.path.join(output_dir,
                                     'analysis/Proj_08390_G.muts.share.maf'),
                        'basename':
                        'Proj_08390_G.muts.share.maf',
                        'class':
                        'File',
                        'checksum':
                        'sha1$cbaa23bb848978cde135efd3870db8f35b3f2861',
                        'size':
                        10729,
                        'path':
                        os.path.join(output_dir,
                                     'analysis/Proj_08390_G.muts.share.maf')
                    }, {
                        'location':
                        'file://' + os.path.join(
                            output_dir, 'analysis/Proj_08390_G.seg.cna.txt'),
                        'basename':
                        'Proj_08390_G.seg.cna.txt',
                        'class':
                        'File',
                        'checksum':
                        'sha1$f6a77b280c047a7e2082e3a09e8138f861790d3a',
                        'size':
                        3191,
                        'path':
                        os.path.join(output_dir,
                                     'analysis/Proj_08390_G.seg.cna.txt')
                    }, {
                        'location':
                        'file://' + os.path.join(
                            output_dir, 'analysis/Proj_08390_G.svs.maf'),
                        'basename':
                        'Proj_08390_G.svs.maf',
                        'class':
                        'File',
                        'checksum':
                        'sha1$5c2a63fc01980550108e58079a8b689d53c97d8c',
                        'size':
                        35595,
                        'path':
                        os.path.join(output_dir,
                                     'analysis/Proj_08390_G.svs.maf')
                    }]
                }
            }
            self.maxDiff = None
            self.assertDictEqual(output_json, expected_output)

            comments, mutations = load_mutations(
                os.path.join(output_dir, 'analysis', 'Proj_08390_G.muts.maf'))
            self.assertEqual(len(mutations), 34)

            colnames = mutations[0].keys()
            some_required_colnames = [
                "ASCN.TOTAL_COPY_NUMBER", "ASCN.MINOR_COPY_NUMBER",
                "ASCN.EXPECTED_ALT_COPIES", "ASCN.CCF_EXPECTED_COPIES",
                "ASCN.CCF_EXPECTED_COPIES_LOWER",
                "ASCN.CCF_EXPECTED_COPIES_UPPER", "ASCN.ASCN_METHOD",
                "ASCN.ASCN_INTEGER_COPY_NUMBER"
            ]
            for colname in some_required_colnames:
                self.assertTrue(
                    colname in colnames,
                    "Column label {} not present in the mutation file. Missing columns: {}"
                    .format(colname, [
                        c for c in some_required_colnames if c not in colnames
                    ]))

            self.assertEqual(mutations[0]['t_af'], '0.42953020134228187')
            self.assertEqual(mutations[0]['is_in_impact'], 'True')
            self.assertEqual(mutations[0]['impact_assays'],
                             'IMPACT341,IMPACT410,IMPACT468,IMPACT505')
    def test_run_worflow_one_maf(self):
        """
        Test that the workflow works correctly when run with a single maf
        """
        input_json = {
            "is_impact":
            True,
            "argos_version_string":
            "2.x",
            "analysis_gene_cna_filename":
            "Proj_08390_G.gene.cna.txt",
            "analysis_mutations_filename":
            "Proj_08390_G.muts.maf",
            "analysis_mutations_share_filename":
            "Proj_08390_G.muts.share.maf",
            "analysis_segment_cna_filename":
            "Proj_08390_G.seg.cna.txt",
            "analysis_sv_filename":
            "Proj_08390_G.svs.maf",
            "helix_filter_version":
            "20.06.1",
            "IMPACT_gene_list": {
                "class": "File",
                "path": IMPACT_FILE
            },
            "targets_list": {
                "path": DATA_SETS['Proj_08390_G']["targets_list"],
                "class": "File"
            },
            "mutation_maf_files": [{
                "path":
                os.path.join(DATA_SETS['Proj_08390_G']['MAF_DIR'],
                             "Sample1.Sample2.muts.maf"),
                "class":
                "File"
            }],
            "mutation_svs_maf_files": [{
                "path":
                os.path.join(DATA_SETS['Proj_08390_G']['MAF_DIR'],
                             "Sample1.Sample2.svs.pass.vep.maf"),
                "class":
                "File"
            }],
            "facets_hisens_cncf_files": [{
                "path":
                os.path.join(
                    DATA_SETS['Proj_08390_G']['FACETS_DIR'],
                    "Sample2.rg.md.abra.printreads__Sample1.rg.md.abra.printreads_hisens.cncf.txt"
                ),
                "class":
                "File"
            }],
            "facets_hisens_seg_files": [{
                "path":
                os.path.join(
                    DATA_SETS['Proj_08390_G']['FACETS_DIR'],
                    "Sample2.rg.md.abra.printreads__Sample1.rg.md.abra.printreads_hisens.seg"
                ),
                "class":
                "File"
            }],
        }

        with TemporaryDirectory() as tmpdir:
            output_json, output_dir = run_cwl(testcase=self,
                                              tmpdir=tmpdir,
                                              input_json=input_json,
                                              cwl_file=cwl_file)

            expected_output = {
                'analysis_dir': {
                    'class':
                    'Directory',
                    'basename':
                    'analysis',
                    'location':
                    'file://' + os.path.join(output_dir, 'analysis'),
                    'path':
                    os.path.join(output_dir, 'analysis'),
                    'listing': [{
                        'location':
                        'file://' + os.path.join(
                            output_dir, 'analysis/Proj_08390_G.gene.cna.txt'),
                        'basename':
                        'Proj_08390_G.gene.cna.txt',
                        'class':
                        'File',
                        'checksum':
                        'sha1$7cc89d24556de93b9a409812317581e67e5df494',
                        'size':
                        87905,
                        'path':
                        os.path.join(output_dir,
                                     'analysis/Proj_08390_G.gene.cna.txt')
                    }, {
                        'location':
                        'file://' + os.path.join(
                            output_dir, 'analysis/Proj_08390_G.muts.maf'),
                        'basename':
                        'Proj_08390_G.muts.maf',
                        'class':
                        'File',
                        'checksum':
                        'sha1$2c8904927a917d6e935ef207582d995680574d16',
                        'size':
                        33243,
                        'path':
                        os.path.join(output_dir,
                                     'analysis/Proj_08390_G.muts.maf')
                    }, {
                        'location':
                        'file://' +
                        os.path.join(output_dir,
                                     'analysis/Proj_08390_G.muts.share.maf'),
                        'basename':
                        'Proj_08390_G.muts.share.maf',
                        'class':
                        'File',
                        'checksum':
                        'sha1$b5af4e0fcd89fecabf8095aa3d7690e5edb8dca1',
                        'size':
                        7462,
                        'path':
                        os.path.join(output_dir,
                                     'analysis/Proj_08390_G.muts.share.maf')
                    }, {
                        'location':
                        'file://' + os.path.join(
                            output_dir, 'analysis/Proj_08390_G.seg.cna.txt'),
                        'basename':
                        'Proj_08390_G.seg.cna.txt',
                        'class':
                        'File',
                        'checksum':
                        'sha1$f0ebb82c34b6530447fa1e70b6dedcc039840d61',
                        'size':
                        1632,
                        'path':
                        os.path.join(output_dir,
                                     'analysis/Proj_08390_G.seg.cna.txt')
                    }, {
                        'location':
                        'file://' + os.path.join(
                            output_dir, 'analysis/Proj_08390_G.svs.maf'),
                        'basename':
                        'Proj_08390_G.svs.maf',
                        'class':
                        'File',
                        'checksum':
                        'sha1$df420706bb5b772a79317843c0a01a3c88a9571d',
                        'size':
                        23603,
                        'path':
                        os.path.join(output_dir,
                                     'analysis/Proj_08390_G.svs.maf')
                    }]
                }
            }
            self.maxDiff = None
            self.assertDictEqual(output_json, expected_output)
            comments, mutations = load_mutations(
                os.path.join(output_dir, 'analysis', 'Proj_08390_G.muts.maf'))
            self.assertEqual(len(mutations), 22)
    def test_run_worflow_two_mafs(self):
        """
        Test that the workflow works correctly when run with two maf files
        """
        input_json = {
            "is_impact":
            True,
            "argos_version_string":
            "2.x",
            "analysis_gene_cna_filename":
            "Proj_08390_G.gene.cna.txt",
            "analysis_mutations_filename":
            "Proj_08390_G.muts.maf",
            "analysis_mutations_share_filename":
            "Proj_08390_G.muts.share.maf",
            "analysis_segment_cna_filename":
            "Proj_08390_G.seg.cna.txt",
            "analysis_sv_filename":
            "Proj_08390_G.svs.maf",
            "helix_filter_version":
            "20.06.1",
            "IMPACT_gene_list": {
                "class": "File",
                "path": IMPACT_FILE
            },
            "targets_list": {
                "path": DATA_SETS['Proj_08390_G']["targets_list"],
                "class": "File"
            },
            "mutation_maf_files": [{
                "path":
                os.path.join(DATA_SETS['Proj_08390_G']['MAF_DIR'],
                             "Sample1.Sample2.muts.maf"),
                "class":
                "File"
            }, {
                "path":
                os.path.join(DATA_SETS['Proj_08390_G']['MAF_DIR'],
                             "Sample4.Sample3.muts.maf"),
                "class":
                "File"
            }],
            "mutation_svs_maf_files": [{
                "path":
                os.path.join(DATA_SETS['Proj_08390_G']['MAF_DIR'],
                             "Sample1.Sample2.svs.pass.vep.maf"),
                "class":
                "File"
            }, {
                "path":
                os.path.join(DATA_SETS['Proj_08390_G']['MAF_DIR'],
                             "Sample4.Sample3.svs.pass.vep.maf"),
                "class":
                "File"
            }],
            "facets_hisens_cncf_files": [{
                "path":
                os.path.join(
                    DATA_SETS['Proj_08390_G']['FACETS_DIR'],
                    "Sample2.rg.md.abra.printreads__Sample1.rg.md.abra.printreads_hisens.cncf.txt"
                ),
                "class":
                "File"
            }, {
                "path":
                os.path.join(
                    DATA_SETS['Proj_08390_G']['FACETS_DIR'],
                    "Sample3.rg.md.abra.printreads__Sample4.rg.md.abra.printreads_hisens.cncf.txt"
                ),
                "class":
                "File"
            }],
            "facets_hisens_seg_files": [{
                "path":
                os.path.join(
                    DATA_SETS['Proj_08390_G']['FACETS_DIR'],
                    "Sample2.rg.md.abra.printreads__Sample1.rg.md.abra.printreads_hisens.seg"
                ),
                "class":
                "File"
            }, {
                "path":
                os.path.join(
                    DATA_SETS['Proj_08390_G']['FACETS_DIR'],
                    "Sample3.rg.md.abra.printreads__Sample4.rg.md.abra.printreads_hisens.seg"
                ),
                "class":
                "File"
            }]
        }
        with TemporaryDirectory() as tmpdir:
            output_json, output_dir = run_cwl(testcase=self,
                                              tmpdir=tmpdir,
                                              input_json=input_json,
                                              cwl_file=cwl_file)

            expected_output = {
                'analysis_dir': {
                    'class':
                    'Directory',
                    'basename':
                    'analysis',
                    'location':
                    'file://' + os.path.join(output_dir, 'analysis'),
                    'path':
                    os.path.join(output_dir, 'analysis'),
                    'listing': [{
                        'location':
                        'file://' + os.path.join(
                            output_dir, 'analysis/Proj_08390_G.gene.cna.txt'),
                        'basename':
                        'Proj_08390_G.gene.cna.txt',
                        'class':
                        'File',
                        'checksum':
                        'sha1$ab17d587ad5ae0a87fd6c6d4dd2d5d1701208ce9',
                        'size':
                        173982,
                        'path':
                        os.path.join(output_dir,
                                     'analysis/Proj_08390_G.gene.cna.txt')
                    }, {
                        'location':
                        'file://' + os.path.join(
                            output_dir, 'analysis/Proj_08390_G.muts.maf'),
                        'basename':
                        'Proj_08390_G.muts.maf',
                        'class':
                        'File',
                        'checksum':
                        'sha1$d4352ee2b702877b84db2b632972ccad2441f3e0',
                        'size':
                        54458,
                        'path':
                        os.path.join(output_dir,
                                     'analysis/Proj_08390_G.muts.maf')
                    }, {
                        'location':
                        'file://' +
                        os.path.join(output_dir,
                                     'analysis/Proj_08390_G.muts.share.maf'),
                        'basename':
                        'Proj_08390_G.muts.share.maf',
                        'class':
                        'File',
                        'checksum':
                        'sha1$086ce6517eae68e47160c8740c5f00d7c3454110',
                        'size':
                        10956,
                        'path':
                        os.path.join(output_dir,
                                     'analysis/Proj_08390_G.muts.share.maf')
                    }, {
                        'location':
                        'file://' + os.path.join(
                            output_dir, 'analysis/Proj_08390_G.seg.cna.txt'),
                        'basename':
                        'Proj_08390_G.seg.cna.txt',
                        'class':
                        'File',
                        'checksum':
                        'sha1$f6a77b280c047a7e2082e3a09e8138f861790d3a',
                        'size':
                        3191,
                        'path':
                        os.path.join(output_dir,
                                     'analysis/Proj_08390_G.seg.cna.txt')
                    }, {
                        'location':
                        'file://' + os.path.join(
                            output_dir, 'analysis/Proj_08390_G.svs.maf'),
                        'basename':
                        'Proj_08390_G.svs.maf',
                        'class':
                        'File',
                        'checksum':
                        'sha1$5c2a63fc01980550108e58079a8b689d53c97d8c',
                        'size':
                        35595,
                        'path':
                        os.path.join(output_dir,
                                     'analysis/Proj_08390_G.svs.maf')
                    }]
                }
            }
            self.maxDiff = None
            self.assertDictEqual(output_json, expected_output)
            comments, mutations = load_mutations(
                os.path.join(output_dir, 'analysis', 'Proj_08390_G.muts.maf'))
            self.assertEqual(len(mutations), 34)