Ejemplo n.º 1
0
    def test_concat_simple_file_with_header(self):
        """
        Test that a single file with no header comes out looking as expected
        """
        with TemporaryDirectory() as tmpdir:

            # make a dummy file with some lines
            input_lines = ["#header", "foo", "bar", "baz"]
            input_file = os.path.join(tmpdir, "input.txt")
            with open(input_file, "w") as fout:
                for line in input_lines:
                    fout.write(line + '\n')

            input_json = {
                "input_files": [{
                    "class": "File",
                    "path": input_file
                }]
            }
            input_json_file = os.path.join(tmpdir, "input.json")
            with open(input_json_file, "w") as input_json_file_data:
                json.dump(input_json, input_json_file_data)

            output_dir = os.path.join(tmpdir, "output")
            tmp_dir = os.path.join(tmpdir, "tmp")
            cache_dir = os.path.join(tmpdir, "cache")

            command = [
                "cwl-runner", *CWL_ARGS, "--outdir", output_dir,
                "--tmpdir-prefix", tmp_dir, "--cachedir", cache_dir, cwl_file,
                input_json_file
            ]

            returncode, proc_stdout, proc_stderr = run_command(command)

            if returncode != 0:
                print(proc_stderr)

            self.assertEqual(returncode, 0)

            output_json = json.loads(proc_stdout)

            # check the contents of the concatenated file; should be the same as the input
            output_file = output_json['output_file']['path']
            with open(output_file) as fin:
                output_lines = [line.strip() for line in fin]

            self.assertEqual(output_lines, ["foo", "bar", "baz"])

            expected_output = {
                'output_file': {
                    'location':
                    'file://' + os.path.join(output_dir, 'output.txt'),
                    'basename': 'output.txt',
                    'class': 'File',
                    'checksum':
                    'sha1$0562f08aef399135936d6fb4eb0cc7bc1890d5b4',
                    'size': 12,
                    'path': os.path.join(output_dir, 'output.txt')
                }
            }
            self.assertDictEqual(output_json, expected_output)
Ejemplo n.º 2
0
    def test_concat_two_mixed_files(self):
        """
        Test that two files, one with a comment and one without, are concatenated correctly
        """
        with TemporaryDirectory() as tmpdir:

            # make a dummy file with some lines
            input_lines1 = ["header1", "foo1", "bar1", "baz1"]
            input_file1 = os.path.join(tmpdir, "input1.txt")
            with open(input_file1, "w") as fout:
                for line in input_lines1:
                    fout.write(line + '\n')

            input_lines2 = ["#comment2", "header2", "foo2", "bar2", "baz2"]
            input_file2 = os.path.join(tmpdir, "input2.txt")
            with open(input_file2, "w") as fout:
                for line in input_lines2:
                    fout.write(line + '\n')

            input_json = {
                "input_files": [{
                    "class": "File",
                    "path": input_file1
                }, {
                    "class": "File",
                    "path": input_file2
                }]
            }
            input_json_file = os.path.join(tmpdir, "input.json")
            with open(input_json_file, "w") as input_json_file_data:
                json.dump(input_json, input_json_file_data)

            output_dir = os.path.join(tmpdir, "output")
            tmp_dir = os.path.join(tmpdir, "tmp")
            cache_dir = os.path.join(tmpdir, "cache")

            command = [
                "cwl-runner", *CWL_ARGS, "--outdir", output_dir,
                "--tmpdir-prefix", tmp_dir, "--cachedir", cache_dir, cwl_file,
                input_json_file
            ]

            returncode, proc_stdout, proc_stderr = run_command(command)

            if returncode != 0:
                print(proc_stderr)

            self.assertEqual(returncode, 0)

            output_json = json.loads(proc_stdout)

            # check the contents of the concatenated file; should be the same as the input
            output_file = output_json['output_file']['path']
            with open(output_file) as fin:
                output_lines = [line.strip() for line in fin]

            self.assertEqual(
                output_lines,
                ["header1", "foo1", "bar1", "baz1", "foo2", "bar2", "baz2"])

            expected_output = {
                'output_file': {
                    'location':
                    'file://' + os.path.join(output_dir, 'output.txt'),
                    'basename': 'output.txt',
                    'class': 'File',
                    'checksum':
                    'sha1$acaa1f09ca0678b8b7c136ce776c04efb6890f6a',
                    'size': 38,
                    'path': os.path.join(output_dir, 'output.txt')
                }
            }
            self.assertDictEqual(output_json, expected_output)
Ejemplo n.º 3
0
    def test_concat_three_files_with_comments(self):
        """
        Test that a three files with headers are concatenated correctly
        Use three this time to clearly show the ordering of the output iteration
        """
        with TemporaryDirectory() as tmpdir:

            # make a dummy file with some lines
            input_lines1 = ["#comment1", "header1", "foo1", "bar1", "baz1"]
            input_file1 = os.path.join(tmpdir, "input1.txt")
            with open(input_file1, "w") as fout:
                for line in input_lines1:
                    fout.write(line + '\n')

            input_lines2 = ["#comment2", "header2", "foo2", "bar2", "baz2"]
            input_file2 = os.path.join(tmpdir, "input2.txt")
            with open(input_file2, "w") as fout:
                for line in input_lines2:
                    fout.write(line + '\n')

            input_lines3 = ["#comment3", "header3", "foo3", "bar3", "baz3"]
            input_file3 = os.path.join(tmpdir, "input3.txt")
            with open(input_file3, "w") as fout:
                for line in input_lines3:
                    fout.write(line + '\n')

            input_json = {
                "input_files": [{
                    "class": "File",
                    "path": input_file1
                }, {
                    "class": "File",
                    "path": input_file2
                }, {
                    "class": "File",
                    "path": input_file3
                }]
            }
            input_json_file = os.path.join(tmpdir, "input.json")
            with open(input_json_file, "w") as input_json_file_data:
                json.dump(input_json, input_json_file_data)

            output_dir = os.path.join(tmpdir, "output")
            tmp_dir = os.path.join(tmpdir, "tmp")
            cache_dir = os.path.join(tmpdir, "cache")

            command = [
                "cwl-runner", *CWL_ARGS, "--outdir", output_dir,
                "--tmpdir-prefix", tmp_dir, "--cachedir", cache_dir, cwl_file,
                input_json_file
            ]

            returncode, proc_stdout, proc_stderr = run_command(command)

            if returncode != 0:
                print(proc_stderr)

            self.assertEqual(returncode, 0)

            output_json = json.loads(proc_stdout)

            # check the contents of the concatenated file; should be the same as the input
            output_file = output_json['output_file']['path']
            with open(output_file) as fin:
                output_lines = [line.strip() for line in fin]

            expected_output_lines = [
                "header1", "foo1", "bar1", "baz1", "foo2", "bar2", "baz2",
                "foo3", "bar3", "baz3"
            ]
            self.assertEqual(output_lines, expected_output_lines)

            # TODO: update this once the above ^^^ passes
            expected_output = {
                'output_file': {
                    'location':
                    'file://' + os.path.join(output_dir, 'output.txt'),
                    'basename': 'output.txt',
                    'class': 'File',
                    'checksum':
                    'sha1$b115b7b40aa8a2e08e30a55abf60d742e05e62b4',
                    'size': 53,
                    'path': os.path.join(output_dir, 'output.txt')
                }
            }
            self.assertDictEqual(output_json, expected_output)
    def test_concat1(self):
        """
        Test concat when original file has a comment line
        """
        with TemporaryDirectory() as tmpdir:
            # make a dummy file with some lines
            input_lines = ["# comment here", "HEADER", "foo", "bar", "baz"]
            input_file = os.path.join(tmpdir, "input.txt")
            with open(input_file, "w") as fout:
                for line in input_lines:
                    fout.write(line + '\n')

            input_json = {
                "input_files": [{
                      "class": "File",
                      "path": input_file
                    }],
                "comment_label": "comment_label",
                "comment_value": "comment_value"
                }
            input_json_file = os.path.join(tmpdir, "input.json")
            with open(input_json_file, "w") as input_json_file_data:
                json.dump(input_json, input_json_file_data)

            output_dir = os.path.join(tmpdir, "output")
            tmp_dir = os.path.join(tmpdir, "tmp")
            cache_dir = os.path.join(tmpdir, "cache")

            command = [
            "cwl-runner",
            *CWL_ARGS,
            "--outdir", output_dir,
            "--tmpdir-prefix", tmp_dir,
            "--cachedir", cache_dir,
            cwl_file, input_json_file
            ]

            returncode, proc_stdout, proc_stderr = run_command(command)

            if returncode != 0:
                print(proc_stderr)

            self.assertEqual(returncode, 0)

            output_json = json.loads(proc_stdout)

            # check the contents of the concatenated file; should be the same as the input
            output_file = output_json['output_file']['path']
            with open(output_file) as fin:
                output_lines = [ line.strip() for line in fin ]

            expected_lines = ['# comment here', '#comment_label: comment_value', "HEADER", 'foo', 'bar', 'baz']
            self.assertEqual(output_lines, expected_lines)

            expected_output = {
                'output_file': {
                    'location': 'file://' + os.path.join(output_dir, 'output.txt'),
                    'basename': 'output.txt',
                    'class': 'File',
                    'checksum': 'sha1$14ee1247f314dba1e3c28aa8aec9ff7b137a1f41',
                    'size': 64,
                    'path': os.path.join(output_dir, 'output.txt')
                    }
                }
            self.assertDictEqual(output_json, expected_output)
Ejemplo n.º 5
0
    def test_concat_two_files_with_headers(self):
        """
        Test that a single file with no header comes out looking as expected
        """
        with TemporaryDirectory() as tmpdir:

            # make a dummy file with some lines
            input_lines1 = ["header", "foo1", "bar1", "baz1"]
            input_file1 = os.path.join(tmpdir, "input1.txt")
            with open(input_file1, "w") as fout:
                for line in input_lines1:
                    fout.write(line + '\n')

            input_lines2 = ["header", "foo2", "bar2", "baz2"]
            input_file2 = os.path.join(tmpdir, "input2.txt")
            with open(input_file2, "w") as fout:
                for line in input_lines2:
                    fout.write(line + '\n')

            input_json = {
                "input_files": [{
                    "class": "File",
                    "path": input_file1
                }, {
                    "class": "File",
                    "path": input_file2
                }]
            }
            input_json_file = os.path.join(tmpdir, "input.json")
            with open(input_json_file, "w") as input_json_file_data:
                json.dump(input_json, input_json_file_data)

            output_dir = os.path.join(tmpdir, "output")
            tmp_dir = os.path.join(tmpdir, "tmp")
            cache_dir = os.path.join(tmpdir, "cache")

            command = [
                "cwl-runner", *CWL_ARGS, "--outdir", output_dir,
                "--tmpdir-prefix", tmp_dir, "--cachedir", cache_dir, cwl_file,
                input_json_file
            ]

            returncode, proc_stdout, proc_stderr = run_command(command)

            if returncode != 0:
                print(proc_stderr)

            self.assertEqual(returncode, 0)

            output_json = json.loads(proc_stdout)

            # check the contents of the concatenated file; should be the same as the input
            output_file = output_json['output_file']['path']
            with open(output_file) as fin:
                output_lines = [line.strip() for line in fin]

            self.assertEqual(
                output_lines,
                ["header", "foo1", "bar1", "baz1", "foo2", "bar2", "baz2"])

            expected_output = {
                'output_file': {
                    'location':
                    'file://' + os.path.join(output_dir, 'output.txt'),
                    'basename': 'output.txt',
                    'class': 'File',
                    'checksum':
                    'sha1$34c3af14e6d21e295f22c77ed5e837b60501bae7',
                    'size': 37,
                    'path': os.path.join(output_dir, 'output.txt')
                }
            }
            self.assertDictEqual(output_json, expected_output)
    def test_concat2(self):
        """
        Test concat when multiple files have comments
        """
        with TemporaryDirectory() as tmpdir:
            # make a dummy file with some lines
            input_lines1 = ["# comment 1 here", "HEADER", "foo1", "bar1"]
            input_file1 = os.path.join(tmpdir, "input1.txt")
            with open(input_file1, "w") as fout:
                for line in input_lines1:
                    fout.write(line + '\n')

            input_lines2 = ["# comment 2 here", "HEADER", "foo2", "bar2"]
            input_file2 = os.path.join(tmpdir, "input2.txt")
            with open(input_file2, "w") as fout:
                for line in input_lines2:
                    fout.write(line + '\n')

            input_json = {
                "input_files": [
                    {
                      "class": "File",
                      "path": input_file1
                    },
                    {
                      "class": "File",
                      "path": input_file2
                    },
                    ],
                "comment_label": "comment_label",
                "comment_value": "comment_value"
                }
            input_json_file = os.path.join(tmpdir, "input.json")
            with open(input_json_file, "w") as input_json_file_data:
                json.dump(input_json, input_json_file_data)

            output_dir = os.path.join(tmpdir, "output")
            tmp_dir = os.path.join(tmpdir, "tmp")
            cache_dir = os.path.join(tmpdir, "cache")

            command = [
            "cwl-runner",
            *CWL_ARGS,
            "--outdir", output_dir,
            "--tmpdir-prefix", tmp_dir,
            "--cachedir", cache_dir,
            cwl_file, input_json_file
            ]

            returncode, proc_stdout, proc_stderr = run_command(command)

            if returncode != 0:
                print(proc_stderr)

            self.assertEqual(returncode, 0)

            output_json = json.loads(proc_stdout)

            # check the contents of the concatenated file; should be the same as the input
            output_file = output_json['output_file']['path']
            with open(output_file) as fin:
                output_lines = [ line.strip() for line in fin ]

            expected_lines = [
                '# comment 1 here',
                '# comment 2 here',
                '#comment_label: comment_value',
                "HEADER",
                'foo1',
                'bar1',
                'foo2',
                'bar2'
                ]
            self.maxDiff = None
            self.assertEqual(output_lines, expected_lines)

            expected_output = {
                'output_file': {
                    'location': 'file://' + os.path.join(output_dir, 'output.txt'),
                    'basename': 'output.txt',
                    'class': 'File',
                    'checksum': 'sha1$5dbce16f9bfef135d6b8288b16350351a33998f3',
                    'size': 91,
                    'path': os.path.join(output_dir, 'output.txt')
                    }
                }
            self.assertDictEqual(output_json, expected_output)
Ejemplo n.º 7
0
    def test_facets_workflow(self):
        """
        """
        # the smallest pair of bam files in the test dataset
        pair_maf = os.path.join(DATA_SETS['Proj_08390_G']['MAF_DIR'], "Sample24.Sample23.muts.maf")
        snp_pileup = os.path.join(DATA_SETS['Proj_08390_G']['SNP_PILEUP_DIR'], "Sample24.Sample23.snp_pileup.gz")
        input_json = {
            "pairs": [
                {
                    "pair_maf": {
                        "path": pair_maf,
                        "class": "File"
                    },
                    "snp_pileup": {
                        "path": snp_pileup,
                        "class": "File"
                    },
                    "pair_id": "Sample24.Sample23",
                    "normal_id": "Sample23",
                    "tumor_id": "Sample24"
                }
            ]
        }

        with TemporaryDirectory() as tmpdir:

            input_json_file = os.path.join(tmpdir, "input.json")
            with open(input_json_file, "w") as json_out:
                json.dump(input_json, json_out)

            output_dir = os.path.join(tmpdir, "output")
            tmp_dir = os.path.join(tmpdir, "tmp")
            cache_dir = os.path.join(tmpdir, "cache")

            command = [
                "cwl-runner",
                *CWL_ARGS,
                "--outdir", output_dir,
                "--tmpdir-prefix", tmp_dir,
                "--cachedir", cache_dir,
                cwl_file, input_json_file
                ]

            returncode, proc_stdout, proc_stderr = run_command(command)

            if returncode != 0:
                print(proc_stderr)

            self.assertEqual(returncode, 0)

            output_json = json.loads(proc_stdout)

            self.maxDiff = None
            expected_output = {
            'annotated_maf': [{
                'location': 'file://' + os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24.Sample23_hisens.ccf.portal.maf'),
                 'basename': 'Sample24.Sample23_hisens.ccf.portal.maf',
                 'class': 'File',
                'checksum': 'sha1$d91a8e15c66429b09f1b7db41bc38bdfa0b84c64',
                 'size': 11996607,
                 'path': os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24.Sample23_hisens.ccf.portal.maf')
            }],
            'arm_level_txt': [{
                'location': 'file://' + os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24.arm_level.txt'),
                'basename': 'Sample24.arm_level.txt',
                'class': 'File',
                'checksum': 'sha1$df37c54ae4969257e436a7a7a595c42ef19ecbb5',
                'size': 1824,
                'path': os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24.arm_level.txt')
            }],
            'facets_txt': [{
                'location': 'file://' + os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24.txt'),
                'basename': 'Sample24.txt',
                'class': 'File',
                'checksum': 'sha1$a0fb3df832efc18a66a8a54e5609666da5f4d7d7',
                'size': 529,
                'path': os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24.txt')
            }],
            'failed_pairs': [],
            'gene_level_txt': [{
                'location': 'file://' + os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24.gene_level.txt'),
                'basename': 'Sample24.gene_level.txt',
                'class': 'File',
                'checksum': 'sha1$4e916a52458151007486bf536acfff539fdc2ecc',
                'size': 148195,
                'path': os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24.gene_level.txt')
            }],
            'hisens_rds': [{
                'location': 'file://' + os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24_hisens.rds'),
                'basename': 'Sample24_hisens.rds',
                'class': 'File',
                'checksum': 'sha1$6bfd6c7f29c49ec8ef538dd468a3b4626b05bda2',
                'size': 213986,
                'path': os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24_hisens.rds')
            }],
            'output_dir': {
                'basename': 'facets',
                'class': 'Directory',
                'listing': [
                    {
                        'basename': 'Sample24.Sample23',
                        'class': 'Directory',
                        'listing': [
                            {
                                'basename': 'Sample24.Sample23_hisens.ccf.portal.maf',
                                'checksum': 'sha1$d91a8e15c66429b09f1b7db41bc38bdfa0b84c64',
                                'class': 'File',
                                'location': 'file://' + os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24.Sample23_hisens.ccf.portal.maf'),
                                'path': os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24.Sample23_hisens.ccf.portal.maf'),
                                'size': 11996607
                            },
                            {
                                'basename': 'Sample24.arm_level.txt',
                                'checksum': 'sha1$df37c54ae4969257e436a7a7a595c42ef19ecbb5',
                                'class': 'File',
                                'location': 'file://' + os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24.arm_level.txt'),
                                'path': os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24.arm_level.txt'),
                                'size': 1824
                            },
                            {
                                'basename': 'Sample24.txt',
                                'checksum': 'sha1$a0fb3df832efc18a66a8a54e5609666da5f4d7d7',
                                'class': 'File',
                                'location': 'file://' + os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24.txt'),
                                'path': os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24.txt'),
                                'size': 529
                            },
                            {
                                'basename': 'Sample24.gene_level.txt',
                                'checksum': 'sha1$4e916a52458151007486bf536acfff539fdc2ecc',
                                'class': 'File',
                                'location': 'file://' + os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24.gene_level.txt'),
                                'path': os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24.gene_level.txt'),
                                'size': 148195
                            },
                            {
                                'basename': 'Sample24_hisens.cncf.txt',
                                'checksum': 'sha1$db9131a33889a1cac82e3bd6b3f0e5e182c65105',
                                'class': 'File',
                                'location': 'file://' + os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24_hisens.cncf.txt'),
                                'path': os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24_hisens.cncf.txt'),
                                'size': 5238
                            },
                            {
                                'basename': 'Sample24_hisens.rds',
                                'checksum': 'sha1$6bfd6c7f29c49ec8ef538dd468a3b4626b05bda2',
                                'class': 'File',
                                'location': 'file://' + os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24_hisens.rds'),
                                'path': os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24_hisens.rds'),
                                'size': 213986
                            },
                            {
                                'basename': 'Sample24_hisens.seg',
                                'checksum': 'sha1$652f9c6d0245af49bac6ca67a089af7d4e46801b',
                                'class': 'File',
                                'location': 'file://' + os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24_hisens.seg'),
                                'path': os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24_hisens.seg'),
                                'size': 1897
                            },
                            {
                                'basename': 'Sample24_purity.rds',
                                'checksum': 'sha1$dd8b967f84b191ff76214c6110db8d0e65f6514c',
                                'class': 'File',
                                'location': 'file://' + os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24_purity.rds'),
                                'path': os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24_purity.rds'),
                                'size': 213356
                            },
                            {
                                'basename': 'Sample24_purity.seg',
                                'checksum': 'sha1$591e6d8b432e1e910fe4fb4b1814508131f960c9',
                                'class': 'File',
                                'location': 'file://' + os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24_purity.seg'),
                                'path': os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24_purity.seg'),
                                'size': 1285
                            },
                            {
                                'basename': 'Sample24.qc.txt',
                                'checksum': 'sha1$d4a36726a5fcb7b268aae02d97ce4e382e42d9f6',
                                'class': 'File',
                                'location': 'file://' + os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24.qc.txt'),
                                'path': os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24.qc.txt'),
                                'size': 1339}
                        ],
                        'location': 'file://' + os.path.join(output_dir, 'facets/Sample24.Sample23'),
                        'path': os.path.join(output_dir, 'facets/Sample24.Sample23')
                    }
                ],
                'location': 'file://' + os.path.join(output_dir, 'facets'),
                'path': os.path.join(output_dir, 'facets')
            },
            'hisens_seg': [{
                'location': 'file://' + os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24_hisens.seg'),
                'basename': 'Sample24_hisens.seg',
                'class': 'File',
                'checksum': 'sha1$652f9c6d0245af49bac6ca67a089af7d4e46801b',
                'size': 1897,
                'path': os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24_hisens.seg')
            }],
            'purity_rds': [{
                'location': 'file://' + os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24_purity.rds'),
                'basename': 'Sample24_purity.rds',
                'class': 'File',
                'checksum': 'sha1$dd8b967f84b191ff76214c6110db8d0e65f6514c',
                'size': 213356,
                'path': os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24_purity.rds')
            }],
            'purity_seg': [{
                'location': 'file://' + os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24_purity.seg'),
                'basename': 'Sample24_purity.seg',
                'class': 'File',
                'checksum': 'sha1$591e6d8b432e1e910fe4fb4b1814508131f960c9',
                'size': 1285,
                'path': os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24_purity.seg')
            }],
            'qc_txt': [{
                'location': 'file://' + os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24.qc.txt'),
                'basename': 'Sample24.qc.txt',
                'class': 'File',
                'checksum': 'sha1$d4a36726a5fcb7b268aae02d97ce4e382e42d9f6',
                'size': 1339,
                'path': os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24.qc.txt')
            }],
            'hisens_cncf_txt': [{
                'basename': 'Sample24_hisens.cncf.txt',
                'checksum': 'sha1$db9131a33889a1cac82e3bd6b3f0e5e182c65105',
                'class': 'File',
                'location': 'file://' + os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24_hisens.cncf.txt'),
                'path': os.path.join(output_dir, 'facets/Sample24.Sample23/Sample24_hisens.cncf.txt'),
                'size': 5238
               }],
            }

            self.assertDictEqual(output_json, expected_output)
Ejemplo n.º 8
0
    def test_run_facets_wrapper(self):
        """
        """
        input_maf = os.path.join(DATA_SETS['Proj_08390_G']['MAF_DIR'],
                                 "Sample1.Sample2.muts.maf")
        input_rds = os.path.join(DATA_SETS['Proj_08390_G']['FACETS_SUITE_DIR'],
                                 "Sample1_hisens.rds")
        input_json = {
            "maf_file": {
                "path": input_maf,
                "class": "File"
            },
            "facets_rds": {
                "path": input_rds,
                "class": "File"
            },
            "output_filename": "Sample1_hisens.ccf.maf"
        }
        with TemporaryDirectory() as tmpdir:
            input_json_file = os.path.join(tmpdir, "input.json")
            with open(input_json_file, "w") as json_out:
                json.dump(input_json, json_out)

            output_dir = os.path.join(tmpdir, "output")
            tmp_dir = os.path.join(tmpdir, "tmp")
            cache_dir = os.path.join(tmpdir, "cache")

            command = [
                "cwl-runner", *CWL_ARGS, "--outdir", output_dir,
                "--tmpdir-prefix", tmp_dir, "--cachedir", cache_dir, cwl_file,
                input_json_file
            ]
            returncode, proc_stdout, proc_stderr = run_command(command)

            if returncode != 0:
                print(proc_stderr)

            self.assertEqual(returncode, 0)

            output_json = json.loads(proc_stdout)

            expected_output = {
                'failed_txt': None,
                'output_file': {
                    'location':
                    'file://' +
                    os.path.join(output_dir, 'Sample1_hisens.ccf.maf'),
                    'basename':
                    'Sample1_hisens.ccf.maf',
                    'class':
                    'File',
                    'checksum':
                    'sha1$7e478a8a44d27735f26e368989c672ed6ef5d52a',
                    'size':
                    19217199,
                    'path':
                    os.path.join(output_dir, 'Sample1_hisens.ccf.maf')
                },
                'stderr_txt': {
                    'basename':
                    'annotate_maf_stderr.txt',
                    'checksum':
                    'sha1$2e672f99c23a2d827c1d33e06377870cdd9c8090',
                    'class':
                    'File',
                    'location':
                    'file://' +
                    os.path.join(output_dir, 'annotate_maf_stderr.txt'),
                    'path':
                    os.path.join(output_dir, 'annotate_maf_stderr.txt'),
                    'size':
                    105
                },
                'stdout_txt': {
                    'basename':
                    'annotate_maf_stdout.txt',
                    'checksum':
                    'sha1$da39a3ee5e6b4b0d3255bfef95601890afd80709',
                    'class':
                    'File',
                    'location':
                    'file://' +
                    os.path.join(output_dir, 'annotate_maf_stdout.txt'),
                    'path':
                    os.path.join(output_dir, 'annotate_maf_stdout.txt'),
                    'size':
                    0
                }
            }
            self.maxDiff = None
            self.assertDictEqual(output_json, expected_output)
Ejemplo n.º 9
0
    def test_run_copy_number_one_file(self):
        """
        Test that Facets geneLevel copy number analysis step runs as expected with a single input file
        """
        with TemporaryDirectory() as tmpdir:
            output_dir = os.path.join(tmpdir, "output")
            input_json = {
                "output_cna_filename": "data_CNA.txt",
                "targets_list" : {
                    "class": "File",
                    "path": DATA_SETS['Proj_08390_G']['targets_list'],
                },
                "hisens_cncfs": [
                    {
                        "class": "File",
                        "path": os.path.join(DATA_SETS['Proj_08390_G']['FACETS_DIR'], "Sample2.rg.md.abra.printreads__Sample1.rg.md.abra.printreads_hisens.cncf.txt")
                    }
                ],
            }
            input_json_file = os.path.join(tmpdir, "input.json")
            with open(input_json_file, "w") as input_json_file_data:
                json.dump(input_json, input_json_file_data)

            output_dir = os.path.join(tmpdir, "output")
            tmp_dir = os.path.join(tmpdir, "tmp")
            cache_dir = os.path.join(tmpdir, "cache")

            command = [
            "cwl-runner",
            *CWL_ARGS,
            "--outdir", output_dir,
            "--tmpdir-prefix", tmp_dir,
            "--cachedir", cache_dir,
            cwl_file, input_json_file
            ]

            returncode, proc_stdout, proc_stderr = run_command(command)

            if returncode != 0:
                print(proc_stderr)

            self.assertEqual(returncode, 0)

            output_json = json.loads(proc_stdout)

            expected_output = {
                'output_cna_file': {
                    'location': 'file://' + os.path.join(output_dir, "data_CNA.txt"),
                    'basename': "data_CNA.txt",
                    'class': 'File',
                    'checksum': 'sha1$7cc89d24556de93b9a409812317581e67e5df494',
                    'size': 87905,
                    'path': os.path.join(output_dir, "data_CNA.txt")
                },
                'output_cna_ascna_file': {
                    'location': 'file://' + os.path.join(output_dir, "data_CNA.ascna.txt"),
                    'basename': "data_CNA.ascna.txt",
                    'class': 'File',
                    'checksum': 'sha1$452d5ddef12a44693d5a98a05f5d300801734cfe',
                    'size': 6164,
                    'path': os.path.join(output_dir, "data_CNA.ascna.txt")
                },
                'output_cna_scna_file': {
                    'location': 'file://' + os.path.join(output_dir, "data_CNA.scna.txt"),
                    'basename': "data_CNA.scna.txt",
                    'class': 'File',
                    'checksum': 'sha1$8bec923ab1d622b4cf38ae042ac2416725650aed',
                    'size': 5463,
                    'path': os.path.join(output_dir, "data_CNA.scna.txt")
                }
            }
            self.maxDiff = None
            self.assertDictEqual(output_json, expected_output)

            # load the data_CNA.txt file
            path = output_json['output_cna_file']['path']
            with open(path) as f:
                header = next(f)
            header_parts = header.split()
            expected_header_parts = ['Tumor_Sample_Barcode', 'Hugo_Symbol', 'tcn', 'lcn', 'cf', 'tcn.em', 'lcn.em', 'cf.em', 'chr', 'seg.start', 'seg.end', 'frac_elev_major_cn', 'Nprobes', 'WGD', 'purity', 'FACETS_CALL', 'ccs_filter', 'review', 'FACETS_CNA']
            self.assertEqual(header_parts, expected_header_parts)

            path = output_json['output_cna_ascna_file']['path']
            with open(path) as f:
                header = next(f)
            header_parts = header.split()
            expected_header_parts = ['Hugo_Symbol', 's_C_VJ7F47_P001_d']
            self.assertEqual(header_parts, expected_header_parts)

            path = output_json['output_cna_scna_file']['path']
            with open(path) as f:
                header = next(f)
            header_parts = header.split()
            expected_header_parts = ['Hugo_Symbol', 's_C_VJ7F47_P001_d']
            self.assertEqual(header_parts, expected_header_parts)
Ejemplo n.º 10
0
    def test_run_copy_number_two_files(self):
        """
        Test that Facets geneLevel copy number analysis step runs as expected with two input files
        """
        with TemporaryDirectory() as tmpdir:
            output_dir = os.path.join(tmpdir, "output")
            input_json = {
                "portal_CNA_file": "data_CNA.txt",
                "targets_list" : {
                    "class": "File",
                    "path": DATA_SETS['Proj_08390_G']['targets_list'],
                },
                "hisens_cncfs": [
                    {
                        "class": "File",
                        "path": os.path.join(DATA_SETS['Proj_08390_G']['FACETS_DIR'], "Sample2.rg.md.abra.printreads__Sample1.rg.md.abra.printreads_hisens.cncf.txt")
                    },
                    {
                        "class": "File",
                        "path": os.path.join(DATA_SETS['Proj_08390_G']['FACETS_DIR'], "Sample9.rg.md.abra.printreads__Sample10.rg.md.abra.printreads_hisens.cncf.txt")
                    }
                ],
            }
            input_json_file = os.path.join(tmpdir, "input.json")
            with open(input_json_file, "w") as input_json_file_data:
                json.dump(input_json, input_json_file_data)

            output_dir = os.path.join(tmpdir, "output")
            tmp_dir = os.path.join(tmpdir, "tmp")
            cache_dir = os.path.join(tmpdir, "cache")

            command = [
            "cwl-runner",
            *CWL_ARGS,
            "--outdir", output_dir,
            "--tmpdir-prefix", tmp_dir,
            "--cachedir", cache_dir,
            cwl_file, input_json_file
            ]

            returncode, proc_stdout, proc_stderr = run_command(command)

            if returncode != 0:
                print(proc_stderr)

            self.assertEqual(returncode, 0)

            output_json = json.loads(proc_stdout)

            expected_output = {
                'output_cna_file': {
                    'location': 'file://' + os.path.join(output_dir, "data_CNA.txt"),
                    'basename': "data_CNA.txt",
                    'class': 'File',
                    'checksum': 'sha1$6dfa53b8a0fad1156060476bcf445d959f0e6eb2',
                    'size': 143118,
                    'path': os.path.join(output_dir, "data_CNA.txt")
                },
                'output_cna_ascna_file': {
                    'location': 'file://' + os.path.join(output_dir, "data_CNA.ascna.txt"),
                    'basename': "data_CNA.ascna.txt",
                    'class': 'File',
                    'checksum': 'sha1$3953e55b3db85b69209982211c53b9d8f049dc01',
                    'size': 8658,
                    'path': os.path.join(output_dir, "data_CNA.ascna.txt")
                },
                'output_cna_scna_file': {
                    'location': 'file://' + os.path.join(output_dir, "data_CNA.scna.txt"),
                    'basename': "data_CNA.scna.txt",
                    'class': 'File',
                    'checksum': 'sha1$9ddcee42cce0d49aec5745303be480b6c4ef0fe8',
                    'size': 6937,
                    'path': os.path.join(output_dir, "data_CNA.scna.txt")
                }
            }
            self.maxDiff = None
            self.assertDictEqual(output_json, expected_output)