Example #1
0
File: cwlTest.py Project: mr-c/toil
    def test_streamable(self):
        """
        Test that a file with 'streamable'=True is a named pipe
        """
        cwlfile = "src/toil/test/cwl/stream.cwl"
        jobfile = "src/toil/test/cwl/stream.json"
        out_name = "output"
        jobstore = f'--jobStore=aws:us-west-1:toil-stream-{uuid.uuid4()}'
        from toil.cwl import cwltoil

        st = StringIO()
        args = [
            "--outdir",
            self.outDir,
            jobstore,
            os.path.join(self.rootDir, cwlfile),
            os.path.join(self.rootDir, jobfile),
        ]
        cwltoil.main(args, stdout=st)
        out = json.loads(st.getvalue())
        out[out_name].pop("http://commonwl.org/cwltool#generation", None)
        out[out_name].pop("nameext", None)
        out[out_name].pop("nameroot", None)
        self.assertEqual(out, self._expected_streaming_output(self.outDir))
        with open(out[out_name]["location"][len("file://"):], "r") as f:
            self.assertEqual(f.read().strip(), "When is s4 coming out?")
Example #2
0
    def test_mpi(self):
        from toil.cwl import cwltoil

        stdout = StringIO()
        main_args = [
            "--outdir",
            self.outDir,
            "--enable-dev",
            "--enable-ext",
            "--mpi-config-file",
            os.path.join(self.rootDir,
                         "src/toil/test/cwl/mock_mpi/fake_mpi.yml"),
            os.path.join(self.rootDir, "src/toil/test/cwl/mpi_simple.cwl"),
        ]
        path = os.environ["PATH"]
        os.environ[
            "PATH"] = f"{path}:{self.rootDir}/src/toil/test/cwl/mock_mpi/"
        cwltoil.main(main_args, stdout=stdout)
        os.environ["PATH"] = path
        out = json.loads(stdout.getvalue())
        with open(out.get("pids", {}).get("location")[len("file://"):]) as f:
            two_pids = [int(i) for i in f.read().split()]
        self.assertEqual(len(two_pids), 2)
        self.assertTrue(isinstance(two_pids[0], int))
        self.assertTrue(isinstance(two_pids[1], int))
Example #3
0
 def _tester(self, cwlfile, jobfile, outDir, expect):
     from toil.cwl import cwltoil
     rootDir = self._projectRootPath()
     st = StringIO.StringIO()
     cwltoil.main(['--outdir', outDir,
                         os.path.join(rootDir, cwlfile),
                         os.path.join(rootDir, jobfile)],
                  stdout=st)
     out = json.loads(st.getvalue())
     self.assertEquals(out, expect)
Example #4
0
 def _debug_worker_tester(self, cwlfile, jobfile, expect):
     from toil.cwl import cwltoil
     st = StringIO()
     cwltoil.main(['--debugWorker', '--outdir', self.outDir,
                  os.path.join(self.rootDir, cwlfile),
                  os.path.join(self.rootDir, jobfile)], stdout=st)
     out = json.loads(st.getvalue())
     out["output"].pop("http://commonwl.org/cwltool#generation", None)
     out["output"].pop("nameext", None)
     out["output"].pop("nameroot", None)
     self.assertEqual(out, expect)
Example #5
0
 def _tester(self, cwlfile, jobfile, expect, main_args=[], out_name="output"):
     from toil.cwl import cwltoil
     st = StringIO()
     main_args = main_args[:]
     main_args.extend(['--outdir', self.outDir,
                       os.path.join(self.rootDir, cwlfile), os.path.join(self.rootDir, jobfile)])
     cwltoil.main(main_args, stdout=st)
     out = json.loads(st.getvalue())
     out[out_name].pop("http://commonwl.org/cwltool#generation", None)
     out[out_name].pop("nameext", None)
     out[out_name].pop("nameroot", None)
     self.assertEqual(out, expect)
Example #6
0
 def _tester(self, cwlfile, jobfile, outDir, expect):
     from toil.cwl import cwltoil
     rootDir = self._projectRootPath()
     st = StringIO.StringIO()
     cwltoil.main([
         '--outdir', outDir,
         os.path.join(rootDir, cwlfile),
         os.path.join(rootDir, jobfile)
     ],
                  stdout=st)
     out = json.loads(st.getvalue())
     self.assertEquals(out, expect)
Example #7
0
 def test_s3_as_secondary_file(self):
     from toil.cwl import cwltoil
     stdout = StringIO()
     main_args = ['--outdir', self.outDir,
                  os.path.join(self.rootDir, 'src/toil/test/cwl/s3_secondary_file.cwl'),
                  os.path.join(self.rootDir, 'src/toil/test/cwl/s3_secondary_file.json')]
     cwltoil.main(main_args, stdout=stdout)
     out = json.loads(stdout.getvalue())
     self.assertEqual(out['output']['checksum'], 'sha1$d14dd02e354918b4776b941d154c18ebc15b9b38')
     self.assertEqual(out['output']['size'], 24)
     with open(out['output']['location'][len('file://'):], 'r') as f:
         self.assertEqual(f.read().strip(), 'When is s4 coming out?')
Example #8
0
 def _tester(self, cwlfile, jobfile, outDir, expect):
     from toil.cwl import cwltoil
     rootDir = self._projectRootPath()
     st = StringIO.StringIO()
     cwltoil.main(['--outdir', outDir,
                         os.path.join(rootDir, cwlfile),
                         os.path.join(rootDir, jobfile)],
                  stdout=st)
     out = json.loads(st.getvalue())
     # locations are internal objects in output for CWL
     out["output"].pop("location", None)
     self.assertEquals(out, expect)
Example #9
0
 def _tester(self, cwlfile, jobfile, outDir, expect):
     from toil.cwl import cwltoil
     rootDir = self._projectRootPath()
     st = StringIO()
     cwltoil.main(['--outdir', outDir,
                         os.path.join(rootDir, cwlfile),
                         os.path.join(rootDir, jobfile)],
                  stdout=st)
     out = json.loads(st.getvalue())
     out["output"].pop("http://commonwl.org/cwltool#generation", None)
     out["output"].pop("nameext", None)
     out["output"].pop("nameroot", None)
     self.assertEquals(out, expect)
Example #10
0
 def _tester(self, cwlfile, jobfile, outDir, expect):
     from toil.cwl import cwltoil
     rootDir = self._projectRootPath()
     st = StringIO()
     cwltoil.main(['--outdir', outDir,
                         os.path.join(rootDir, cwlfile),
                         os.path.join(rootDir, jobfile)],
                  stdout=st)
     out = json.loads(st.getvalue())
     out["output"].pop("http://commonwl.org/cwltool#generation", None)
     out["output"].pop("nameext", None)
     out["output"].pop("nameroot", None)
     self.assertEquals(out, expect)
Example #11
0
 def _tester(self, cwlfile, jobfile, outDir, expect):
     from toil.cwl import cwltoil
     rootDir = self._projectRootPath()
     st = StringIO()
     cwltoil.main([
         '--outdir', outDir,
         os.path.join(rootDir, cwlfile),
         os.path.join(rootDir, jobfile)
     ],
                  stdout=st)
     out = json.loads(st.getvalue())
     # locations are internal objects in output for CWL
     out["output"].pop("location", None)
     self.assertEquals(out, expect)
Example #12
0
 def test_mpi(self):
     from toil.cwl import cwltoil
     stdout = StringIO()
     main_args = ['--outdir', self.outDir,
                  '--enable-dev',
                  '--enable-ext',
                  '--mpi-config-file', os.path.join(self.rootDir, 'src/toil/test/cwl/mock_mpi/fake_mpi.yml'),
                  os.path.join(self.rootDir, 'src/toil/test/cwl/mpi_simple.cwl')]
     cwltoil.main(main_args, stdout=stdout)
     out = json.loads(stdout.getvalue())
     with open(out.get('pids', {}).get('location')[len('file://'):], 'r') as f:
         two_pids = [int(i) for i in f.read().split()]
     self.assertEqual(len(two_pids), 2)
     self.assertTrue(isinstance(two_pids[0], int))
     self.assertTrue(isinstance(two_pids[1], int))
Example #13
0
    def test_s3_as_secondary_file(self):
        from toil.cwl import cwltoil

        stdout = StringIO()
        main_args = [
            "--outdir",
            self.outDir,
            os.path.join(self.rootDir,
                         "src/toil/test/cwl/s3_secondary_file.cwl"),
            os.path.join(self.rootDir,
                         "src/toil/test/cwl/s3_secondary_file.json"),
        ]
        cwltoil.main(main_args, stdout=stdout)
        out = json.loads(stdout.getvalue())
        self.assertEqual(out["output"]["checksum"],
                         "sha1$d14dd02e354918b4776b941d154c18ebc15b9b38")
        self.assertEqual(out["output"]["size"], 24)
        with open(out["output"]["location"][len("file://"):]) as f:
            self.assertEqual(f.read().strip(), "When is s4 coming out?")
Example #14
0
def run_workflow(wf_path: Union[Path, str],
                 inputs: Dict[str, any],
                 output_dir: Optional[Union[Path, str]] = None,
                 base_dir=None):
    _logger.debug(f'Running CWL tool at {wf_path}')
    _logger.debug(f'Input values: {inputs}')
    _logger.debug(f'Results will be written to {output_dir}')

    if output_dir is None:
        output_dir = Path(tempfile.mkdtemp())
    log_path = str(output_dir / 'log.txt')
    prov_dir = output_dir / PROVENANCE

    wf_path = str(wf_path)

    wf_input = [f'--{k}={v}' for k, v in inputs.items()]

    cwltool_args = []

    if base_dir:
        cwltool_args += ['--basedir', str(base_dir)]

    cwltool_args += ['--logFile', str(log_path)]
    cwltool_args += ['--outdir', str(output_dir)]
    cwltool_args += ['--enable-dev']

    # RO Crate containing provenance will be stored in a "provenance" subdirectory
    cwltool_args += ['--provenance', str(prov_dir)]

    try:
        cwltoil.main(cwltool_args + [wf_path] + wf_input)
    except SystemExit as e:
        raise CWLException(
            f'Workflow {wf_path} with inputs {inputs} has failed.', e)

    _logger.debug('CWL tool has run successfully.')

    with open(log_path, 'r') as infile:
        runlogs = infile.read()

    return output_dir, runlogs
Example #15
0
    def test_restart(self):
        """Enable restarts with CWLtoil -- run failing test, re-run correct test.
        """
        from toil.cwl import cwltoil
        from toil.jobStores.abstractJobStore import NoSuchJobStoreException
        from toil.leader import FailedJobsException
        outDir = self._createTempDir()
        cwlDir = os.path.join(self._projectRootPath(), "src", "toil", "test", "cwl")
        cmd = ['--outdir', outDir, '--jobStore', os.path.join(outDir, 'jobStore'), "--no-container",
               os.path.join(cwlDir, "revsort.cwl"), os.path.join(cwlDir, "revsort-job.json")]

        def path_without_rev():
            return ":".join([d for d in os.environ["PATH"].split(":")
                             if not os.path.exists(os.path.join(d, "rev"))])
        orig_path = os.environ["PATH"]
        # Force a failure and half finished job by removing `rev` from the PATH
        os.environ["PATH"] = path_without_rev()
        try:
            cwltoil.main(cmd)
            self.fail("Expected problem job with incorrect PATH did not fail")
        except FailedJobsException:
            pass
        # Finish the job with a correct PATH
        os.environ["PATH"] = orig_path
        cwltoil.main(["--restart"] + cmd)
        # Should fail because previous job completed successfully
        try:
            cwltoil.main(["--restart"] + cmd)
            self.fail("Restart with missing directory did not fail")
        except NoSuchJobStoreException:
            pass
Example #16
0
 def test_restart(self):
     """Enable restarts with CWLtoil -- run failing test, re-run correct test.
     """
     from toil.cwl import cwltoil
     from toil.jobStores.abstractJobStore import NoSuchJobStoreException
     from toil.leader import FailedJobsException
     outDir = self._createTempDir()
     cwlDir = os.path.join(self._projectRootPath(), "src", "toil", "test", "cwl")
     cmd = ['--outdir', outDir, '--jobStore', os.path.join(outDir, 'jobStore'), "--no-container",
            os.path.join(cwlDir, "revsort.cwl"), os.path.join(cwlDir, "revsort-job.json")]
     def path_without_rev():
         return ":".join([d for d in os.environ["PATH"].split(":")
                          if not os.path.exists(os.path.join(d, "rev"))])
     orig_path = os.environ["PATH"]
     # Force a failure and half finished job by removing `rev` from the PATH
     os.environ["PATH"] = path_without_rev()
     try:
         cwltoil.main(cmd)
         self.fail("Expected problem job with incorrect PATH did not fail")
     except FailedJobsException:
         pass
     # Finish the job with a correct PATH
     os.environ["PATH"] = orig_path
     cwltoil.main(cmd + ["--restart"])
     # Should fail because previous job completed successfully
     try:
         cwltoil.main(cmd + ["--restart"])
         self.fail("Restart with missing directory did not fail")
     except NoSuchJobStoreException:
         pass
Example #17
0
 def test_run_revsort(self):
     from toil.cwl import cwltoil
     outDir = self._createTempDir()
     rootDir = self._projectRootPath()
     outputJson = os.path.join(outDir, 'cwl.output.json')
     try:
         cwltoil.main(['--outdir', outDir,
                       os.path.join(rootDir, 'src/toil/test/cwl/revsort.cwl'),
                       os.path.join(rootDir, 'src/toil/test/cwl/revsort-job.json')])
         with open(outputJson) as f:
             out = json.load(f)
     finally:
         if os.path.exists(outputJson):
             os.remove(outputJson)
     self.assertEquals(out, {
         # Having unicode string literals isn't necessary for the assertion but makes for a
         # less noisy diff in case the assertion fails.
         u'output': {
             u'path': unicode(os.path.join(outDir, 'output.txt')),
             u'size': 1111,
             u'class': u'File',
             u'checksum': u'sha1$b9214658cc453331b62c2282b772a5c063dbd284'}})
Example #18
0
def run_toil(ctx, path, job_path, **kwds):
    """Translate planemo kwds to cwltool kwds and run cwltool main function."""
    _ensure_toil_available()

    args = []
    if not ctx.verbose:
        args.append("--quiet")
    output_directory = kwds.get("output_directory", None)
    if output_directory:
        args.append("--outdir")
        args.append(output_directory)
    if kwds.get("no_container", False):
        args.append("--no-container")
        ensure_dependency_resolvers_conf_configured(ctx, kwds)
        args.append("--beta-dependency-resolvers-configuration")
        args.append(kwds["dependency_resolvers_config_file"])
    if kwds.get("mulled_containers"):
        args.append("--beta-use-biocontainers")

    if kwds.get("non_strict_cwl", False):
        args.append("--non-strict")

    args.extend([path, job_path])
    ctx.vlog("Calling cwltoil with arguments %s" % args)
    with tempfile.NamedTemporaryFile("w") as tmp_stdout:
        # cwltool passes sys.stderr to subprocess.Popen - ensure it has
        # and actual fileno.
        with real_io():
            ret_code = cwltoil.main(
                args,
                stdout=tmp_stdout
            )
        tmp_stdout.flush()
        with open(tmp_stdout.name, "r") as stdout_f:
            try:
                result = json.load(stdout_f)
            except ValueError:
                message = JSON_PARSE_ERROR_MESSAGE % (
                    open(tmp_stdout.name, "r").read(),
                    tmp_stdout.name,
                )
                error(message)
                raise Exception(message)

        if ret_code != 0:
            return ErrorRunResponse("Error running Toil")
        outputs = result
    return CwlToolRunResponse(
        "",
        outputs=outputs,
    )
def run_toil(ctx, path, job_path, **kwds):
    """Translate planemo kwds to cwltool kwds and run cwltool main function."""
    _ensure_toil_available()

    args = []
    if not ctx.verbose:
        args.append("--quiet")
    output_directory = kwds.get("output_directory", None)
    if output_directory:
        args.append("--outdir")
        args.append(output_directory)
    if kwds.get("no_container", False):
        args.append("--no-container")
        ensure_dependency_resolvers_conf_configured(ctx, kwds)
        args.append("--beta-dependency-resolvers-configuration")
        args.append(kwds["dependency_resolvers_config_file"])
    if kwds.get("mulled_containers"):
        args.append("--beta-use-biocontainers")

    if kwds.get("non_strict_cwl", False):
        args.append("--non-strict")

    args.extend([path, job_path])
    ctx.vlog("Calling cwltoil with arguments %s" % args)
    with tempfile.NamedTemporaryFile("w") as tmp_stdout:
        # cwltool passes sys.stderr to subprocess.Popen - ensure it has
        # and actual fileno.
        with real_io():
            ret_code = cwltoil.main(args, stdout=tmp_stdout)
        tmp_stdout.flush()
        with open(tmp_stdout.name, "r") as stdout_f:
            try:
                result = json.load(stdout_f)
            except ValueError:
                message = JSON_PARSE_ERROR_MESSAGE % (
                    open(tmp_stdout.name, "r").read(),
                    tmp_stdout.name,
                )
                error(message)
                raise Exception(message)

        if ret_code != 0:
            return ErrorRunResponse("Error running Toil")
        outputs = result
    return CwlToolRunResponse(
        "",
        outputs=outputs,
    )
Example #20
0
    def test_restart(self):
        """
        Enable restarts with toil-cwl-runner -- run failing test, re-run correct test.
        Only implemented for single machine.
        """
        log.info("Running CWL Test Restart.  Expecting failure, then success.")
        from toil.cwl import cwltoil
        from toil.jobStores.abstractJobStore import NoSuchJobStoreException
        from toil.leader import FailedJobsException

        outDir = self._createTempDir()
        cwlDir = os.path.join(self._projectRootPath(), "src", "toil", "test",
                              "cwl")
        cmd = [
            "--outdir",
            outDir,
            "--jobStore",
            os.path.join(outDir, "jobStore"),
            "--no-container",
            os.path.join(cwlDir, "revsort.cwl"),
            os.path.join(cwlDir, "revsort-job.json"),
        ]

        # create a fake rev bin that actually points to the "date" binary
        cal_path = [
            d for d in os.environ["PATH"].split(":")
            if os.path.exists(os.path.join(d, "date"))
        ][-1]
        os.symlink(os.path.join(cal_path, "date"),
                   f'{os.path.join(outDir, "rev")}')

        def path_with_bogus_rev():
            # append to the front of the PATH so that we check there first
            return f"{outDir}:" + os.environ["PATH"]

        orig_path = os.environ["PATH"]
        # Force a failure by trying to use an incorrect version of `rev` from the PATH
        os.environ["PATH"] = path_with_bogus_rev()
        try:
            cwltoil.main(cmd)
            self.fail("Expected problem job with incorrect PATH did not fail")
        except FailedJobsException:
            pass
        # Finish the job with a correct PATH
        os.environ["PATH"] = orig_path
        cwltoil.main(["--restart"] + cmd)
        # Should fail because previous job completed successfully
        try:
            cwltoil.main(["--restart"] + cmd)
            self.fail("Restart with missing directory did not fail")
        except NoSuchJobStoreException:
            pass
Example #21
0
    def call_toil(self):
        """
        Calls Toil to run the operation's pipeline
        """
        # Build the args list
        args = []
        if self.container is not None:
            # If the container is Docker, no extra args are needed. For other
            # containers, set the required args
            if self.container == 'singularity':
                args.extend(['--singularity'])
            elif self.container == 'udocker':
                args.extend(['--user-space-docker-cmd', 'udocker'])
        else:
            args.extend(['--no-container'])
            args.extend(['--preserve-entire-environment'])
        # args.extend(['--bypass-file-store'])  # for debugging weird issues!!
        args.extend(['--batchSystem', self.batch_system])
        if self.batch_system == 'slurm':
            args.extend(['--disableCaching'])
            args.extend(['--defaultCores', str(self.cpus_per_task)])
            args.extend(['--defaultMemory', self.mem_per_node_gb])
            self.toil_env_variables['TOIL_SLURM_ARGS'] = "--export=ALL"
        args.extend(['--maxLocalJobs', str(self.max_nodes)])
        args.extend(['--jobStore', self.jobstore])
        if os.path.exists(self.jobstore):
            args.extend(['--restart'])
        args.extend(['--basedir', self.pipeline_working_dir])
        args.extend(['--outdir', self.pipeline_working_dir])
        args.extend(['--writeLogs', self.log_dir])
        args.extend(['--writeLogsFromAllJobs'
                     ])  # also keep logs of successful jobs
        args.extend(['--maxLogFileSize',
                     '0'])  # disable truncation of log files
        if self.scratch_dir is not None:
            # Note: the trailing '/' is required by Toil v5.3+; in addition,
            # --tmpdir-prefix and --tmp-outdir-prefix require a filename prefix
            # when using --bypass-file-store, but it won't harm to use it anyway.
            args.extend(['--tmpdir-prefix', self.scratch_dir + '/toil.'])
            args.extend(['--tmp-outdir-prefix', self.scratch_dir + '/toil.'])
            args.extend(['--workDir', self.scratch_dir + '/'])
        args.extend(['--clean',
                     'never'])  # preserves the job store for future runs
        args.extend(['--servicePollingInterval', '10'])
        args.extend(['--stats'])
        # The following three options should be enabled for debugging purposes only!!
        # args.extend(['--cleanWorkDir', 'never'])  # enable for debugging purposes only!!
        # args.extend(['--debugWorker'])  # enable for debugging purposes only!!
        # args.extend(['--logLevel', 'DEBUG'])  # enable for debugging purposes only!!
        if self.field.use_mpi and self.toil_major_version >= 5:
            # Create the config file for MPI jobs and add the required args
            if self.batch_system == 'slurm':
                # Use salloc to request the SLRUM allocation and run the MPI job
                config_lines = [
                    "runner: 'mpi_runner.sh'", "nproc_flag: '-N'",
                    "extra_flags: ['mpirun', '--map-by node']"
                ]
            else:
                config_lines = [
                    "runner: 'mpirun'", "nproc_flag: '-np'",
                    "extra_flags: ['--map-by node']"
                ]
                self.log.warning(
                    'MPI support for non-Slurm clusters is experimental. '
                    'Please report any issues encountered.')
            with open(self.mpi_config_file, 'w') as f:
                f.write('\n'.join(config_lines))
            args.extend(['--mpi-config-file', self.mpi_config_file])
            args.extend(['--enable-ext'])
        args.append(self.pipeline_parset_file)
        args.append(self.pipeline_inputs_file)

        # Set env variables, if any
        for k, v in self.toil_env_variables.items():
            os.environ[k] = v

        # Run the pipeline
        # print(f"**** Toil command-line arguments: {args} ****")
        try:
            with open(self.pipeline_outputs_file, 'w') as stdout:
                status = cwltoil.main(args=args, stdout=stdout)
            self.success = (status == 0)
        except FailedJobsException:
            self.success = False

        # Unset env variables, if any
        for k, v in self.toil_env_variables.items():
            os.environ[k] = ''

        # Reset the logging level, as the cwltoil call above can change it
        _logging.set_level(self.parset['logging_level'])
def registerDrmaaBatchSystem():
    from toil.batchSystems.registry import addBatchSystemFactory
    from toil.batchSystems.options import addOptionsDefinition

    def drmaaBatchSystemFactory():
        from wehi_pipeline.batchSystems.drmaaBatchSystem import DrmaaBatchSystem
        return DrmaaBatchSystem

    addBatchSystemFactory('drmaa', drmaaBatchSystemFactory)

    def addOptions(addOptionFn):
        addOptionFn("--jobQueue",
                    dest="jobQueue",
                    default=None,
                    help=("A job queue (used by the DRMAA batch system)"))
        addOptionFn(
            "--jobNamePrefix",
            dest="jobNamePrefix",
            default='toil',
            help=
            ("A job name prefix (will be concatenated with the first part of the Toil workflowID, used by the DRMAA batch system)"
             ))

    addOptionsDefinition(addOptions)


if __name__ == '__main__':
    registerDrmaaBatchSystem()
    sys.argv[0] = re.sub(r'(-script\.pyw?|\.exe)?$', '', sys.argv[0])
    sys.exit(main())