def test_streamable(self): """ Test that a file with 'streamable'=True is a named pipe """ cwlfile = "src/toil/test/cwl/stream.cwl" jobfile = "src/toil/test/cwl/stream.json" out_name = "output" jobstore = f'--jobStore=aws:us-west-1:toil-stream-{uuid.uuid4()}' from toil.cwl import cwltoil st = StringIO() args = [ "--outdir", self.outDir, jobstore, os.path.join(self.rootDir, cwlfile), os.path.join(self.rootDir, jobfile), ] cwltoil.main(args, stdout=st) out = json.loads(st.getvalue()) out[out_name].pop("http://commonwl.org/cwltool#generation", None) out[out_name].pop("nameext", None) out[out_name].pop("nameroot", None) self.assertEqual(out, self._expected_streaming_output(self.outDir)) with open(out[out_name]["location"][len("file://"):], "r") as f: self.assertEqual(f.read().strip(), "When is s4 coming out?")
def test_mpi(self): from toil.cwl import cwltoil stdout = StringIO() main_args = [ "--outdir", self.outDir, "--enable-dev", "--enable-ext", "--mpi-config-file", os.path.join(self.rootDir, "src/toil/test/cwl/mock_mpi/fake_mpi.yml"), os.path.join(self.rootDir, "src/toil/test/cwl/mpi_simple.cwl"), ] path = os.environ["PATH"] os.environ[ "PATH"] = f"{path}:{self.rootDir}/src/toil/test/cwl/mock_mpi/" cwltoil.main(main_args, stdout=stdout) os.environ["PATH"] = path out = json.loads(stdout.getvalue()) with open(out.get("pids", {}).get("location")[len("file://"):]) as f: two_pids = [int(i) for i in f.read().split()] self.assertEqual(len(two_pids), 2) self.assertTrue(isinstance(two_pids[0], int)) self.assertTrue(isinstance(two_pids[1], int))
def _tester(self, cwlfile, jobfile, outDir, expect): from toil.cwl import cwltoil rootDir = self._projectRootPath() st = StringIO.StringIO() cwltoil.main(['--outdir', outDir, os.path.join(rootDir, cwlfile), os.path.join(rootDir, jobfile)], stdout=st) out = json.loads(st.getvalue()) self.assertEquals(out, expect)
def _debug_worker_tester(self, cwlfile, jobfile, expect): from toil.cwl import cwltoil st = StringIO() cwltoil.main(['--debugWorker', '--outdir', self.outDir, os.path.join(self.rootDir, cwlfile), os.path.join(self.rootDir, jobfile)], stdout=st) out = json.loads(st.getvalue()) out["output"].pop("http://commonwl.org/cwltool#generation", None) out["output"].pop("nameext", None) out["output"].pop("nameroot", None) self.assertEqual(out, expect)
def _tester(self, cwlfile, jobfile, expect, main_args=[], out_name="output"): from toil.cwl import cwltoil st = StringIO() main_args = main_args[:] main_args.extend(['--outdir', self.outDir, os.path.join(self.rootDir, cwlfile), os.path.join(self.rootDir, jobfile)]) cwltoil.main(main_args, stdout=st) out = json.loads(st.getvalue()) out[out_name].pop("http://commonwl.org/cwltool#generation", None) out[out_name].pop("nameext", None) out[out_name].pop("nameroot", None) self.assertEqual(out, expect)
def _tester(self, cwlfile, jobfile, outDir, expect): from toil.cwl import cwltoil rootDir = self._projectRootPath() st = StringIO.StringIO() cwltoil.main([ '--outdir', outDir, os.path.join(rootDir, cwlfile), os.path.join(rootDir, jobfile) ], stdout=st) out = json.loads(st.getvalue()) self.assertEquals(out, expect)
def test_s3_as_secondary_file(self): from toil.cwl import cwltoil stdout = StringIO() main_args = ['--outdir', self.outDir, os.path.join(self.rootDir, 'src/toil/test/cwl/s3_secondary_file.cwl'), os.path.join(self.rootDir, 'src/toil/test/cwl/s3_secondary_file.json')] cwltoil.main(main_args, stdout=stdout) out = json.loads(stdout.getvalue()) self.assertEqual(out['output']['checksum'], 'sha1$d14dd02e354918b4776b941d154c18ebc15b9b38') self.assertEqual(out['output']['size'], 24) with open(out['output']['location'][len('file://'):], 'r') as f: self.assertEqual(f.read().strip(), 'When is s4 coming out?')
def _tester(self, cwlfile, jobfile, outDir, expect): from toil.cwl import cwltoil rootDir = self._projectRootPath() st = StringIO.StringIO() cwltoil.main(['--outdir', outDir, os.path.join(rootDir, cwlfile), os.path.join(rootDir, jobfile)], stdout=st) out = json.loads(st.getvalue()) # locations are internal objects in output for CWL out["output"].pop("location", None) self.assertEquals(out, expect)
def _tester(self, cwlfile, jobfile, outDir, expect): from toil.cwl import cwltoil rootDir = self._projectRootPath() st = StringIO() cwltoil.main(['--outdir', outDir, os.path.join(rootDir, cwlfile), os.path.join(rootDir, jobfile)], stdout=st) out = json.loads(st.getvalue()) out["output"].pop("http://commonwl.org/cwltool#generation", None) out["output"].pop("nameext", None) out["output"].pop("nameroot", None) self.assertEquals(out, expect)
def _tester(self, cwlfile, jobfile, outDir, expect): from toil.cwl import cwltoil rootDir = self._projectRootPath() st = StringIO() cwltoil.main([ '--outdir', outDir, os.path.join(rootDir, cwlfile), os.path.join(rootDir, jobfile) ], stdout=st) out = json.loads(st.getvalue()) # locations are internal objects in output for CWL out["output"].pop("location", None) self.assertEquals(out, expect)
def test_mpi(self): from toil.cwl import cwltoil stdout = StringIO() main_args = ['--outdir', self.outDir, '--enable-dev', '--enable-ext', '--mpi-config-file', os.path.join(self.rootDir, 'src/toil/test/cwl/mock_mpi/fake_mpi.yml'), os.path.join(self.rootDir, 'src/toil/test/cwl/mpi_simple.cwl')] cwltoil.main(main_args, stdout=stdout) out = json.loads(stdout.getvalue()) with open(out.get('pids', {}).get('location')[len('file://'):], 'r') as f: two_pids = [int(i) for i in f.read().split()] self.assertEqual(len(two_pids), 2) self.assertTrue(isinstance(two_pids[0], int)) self.assertTrue(isinstance(two_pids[1], int))
def test_s3_as_secondary_file(self): from toil.cwl import cwltoil stdout = StringIO() main_args = [ "--outdir", self.outDir, os.path.join(self.rootDir, "src/toil/test/cwl/s3_secondary_file.cwl"), os.path.join(self.rootDir, "src/toil/test/cwl/s3_secondary_file.json"), ] cwltoil.main(main_args, stdout=stdout) out = json.loads(stdout.getvalue()) self.assertEqual(out["output"]["checksum"], "sha1$d14dd02e354918b4776b941d154c18ebc15b9b38") self.assertEqual(out["output"]["size"], 24) with open(out["output"]["location"][len("file://"):]) as f: self.assertEqual(f.read().strip(), "When is s4 coming out?")
def run_workflow(wf_path: Union[Path, str], inputs: Dict[str, any], output_dir: Optional[Union[Path, str]] = None, base_dir=None): _logger.debug(f'Running CWL tool at {wf_path}') _logger.debug(f'Input values: {inputs}') _logger.debug(f'Results will be written to {output_dir}') if output_dir is None: output_dir = Path(tempfile.mkdtemp()) log_path = str(output_dir / 'log.txt') prov_dir = output_dir / PROVENANCE wf_path = str(wf_path) wf_input = [f'--{k}={v}' for k, v in inputs.items()] cwltool_args = [] if base_dir: cwltool_args += ['--basedir', str(base_dir)] cwltool_args += ['--logFile', str(log_path)] cwltool_args += ['--outdir', str(output_dir)] cwltool_args += ['--enable-dev'] # RO Crate containing provenance will be stored in a "provenance" subdirectory cwltool_args += ['--provenance', str(prov_dir)] try: cwltoil.main(cwltool_args + [wf_path] + wf_input) except SystemExit as e: raise CWLException( f'Workflow {wf_path} with inputs {inputs} has failed.', e) _logger.debug('CWL tool has run successfully.') with open(log_path, 'r') as infile: runlogs = infile.read() return output_dir, runlogs
def test_restart(self): """Enable restarts with CWLtoil -- run failing test, re-run correct test. """ from toil.cwl import cwltoil from toil.jobStores.abstractJobStore import NoSuchJobStoreException from toil.leader import FailedJobsException outDir = self._createTempDir() cwlDir = os.path.join(self._projectRootPath(), "src", "toil", "test", "cwl") cmd = ['--outdir', outDir, '--jobStore', os.path.join(outDir, 'jobStore'), "--no-container", os.path.join(cwlDir, "revsort.cwl"), os.path.join(cwlDir, "revsort-job.json")] def path_without_rev(): return ":".join([d for d in os.environ["PATH"].split(":") if not os.path.exists(os.path.join(d, "rev"))]) orig_path = os.environ["PATH"] # Force a failure and half finished job by removing `rev` from the PATH os.environ["PATH"] = path_without_rev() try: cwltoil.main(cmd) self.fail("Expected problem job with incorrect PATH did not fail") except FailedJobsException: pass # Finish the job with a correct PATH os.environ["PATH"] = orig_path cwltoil.main(["--restart"] + cmd) # Should fail because previous job completed successfully try: cwltoil.main(["--restart"] + cmd) self.fail("Restart with missing directory did not fail") except NoSuchJobStoreException: pass
def test_restart(self): """Enable restarts with CWLtoil -- run failing test, re-run correct test. """ from toil.cwl import cwltoil from toil.jobStores.abstractJobStore import NoSuchJobStoreException from toil.leader import FailedJobsException outDir = self._createTempDir() cwlDir = os.path.join(self._projectRootPath(), "src", "toil", "test", "cwl") cmd = ['--outdir', outDir, '--jobStore', os.path.join(outDir, 'jobStore'), "--no-container", os.path.join(cwlDir, "revsort.cwl"), os.path.join(cwlDir, "revsort-job.json")] def path_without_rev(): return ":".join([d for d in os.environ["PATH"].split(":") if not os.path.exists(os.path.join(d, "rev"))]) orig_path = os.environ["PATH"] # Force a failure and half finished job by removing `rev` from the PATH os.environ["PATH"] = path_without_rev() try: cwltoil.main(cmd) self.fail("Expected problem job with incorrect PATH did not fail") except FailedJobsException: pass # Finish the job with a correct PATH os.environ["PATH"] = orig_path cwltoil.main(cmd + ["--restart"]) # Should fail because previous job completed successfully try: cwltoil.main(cmd + ["--restart"]) self.fail("Restart with missing directory did not fail") except NoSuchJobStoreException: pass
def test_run_revsort(self): from toil.cwl import cwltoil outDir = self._createTempDir() rootDir = self._projectRootPath() outputJson = os.path.join(outDir, 'cwl.output.json') try: cwltoil.main(['--outdir', outDir, os.path.join(rootDir, 'src/toil/test/cwl/revsort.cwl'), os.path.join(rootDir, 'src/toil/test/cwl/revsort-job.json')]) with open(outputJson) as f: out = json.load(f) finally: if os.path.exists(outputJson): os.remove(outputJson) self.assertEquals(out, { # Having unicode string literals isn't necessary for the assertion but makes for a # less noisy diff in case the assertion fails. u'output': { u'path': unicode(os.path.join(outDir, 'output.txt')), u'size': 1111, u'class': u'File', u'checksum': u'sha1$b9214658cc453331b62c2282b772a5c063dbd284'}})
def run_toil(ctx, path, job_path, **kwds): """Translate planemo kwds to cwltool kwds and run cwltool main function.""" _ensure_toil_available() args = [] if not ctx.verbose: args.append("--quiet") output_directory = kwds.get("output_directory", None) if output_directory: args.append("--outdir") args.append(output_directory) if kwds.get("no_container", False): args.append("--no-container") ensure_dependency_resolvers_conf_configured(ctx, kwds) args.append("--beta-dependency-resolvers-configuration") args.append(kwds["dependency_resolvers_config_file"]) if kwds.get("mulled_containers"): args.append("--beta-use-biocontainers") if kwds.get("non_strict_cwl", False): args.append("--non-strict") args.extend([path, job_path]) ctx.vlog("Calling cwltoil with arguments %s" % args) with tempfile.NamedTemporaryFile("w") as tmp_stdout: # cwltool passes sys.stderr to subprocess.Popen - ensure it has # and actual fileno. with real_io(): ret_code = cwltoil.main( args, stdout=tmp_stdout ) tmp_stdout.flush() with open(tmp_stdout.name, "r") as stdout_f: try: result = json.load(stdout_f) except ValueError: message = JSON_PARSE_ERROR_MESSAGE % ( open(tmp_stdout.name, "r").read(), tmp_stdout.name, ) error(message) raise Exception(message) if ret_code != 0: return ErrorRunResponse("Error running Toil") outputs = result return CwlToolRunResponse( "", outputs=outputs, )
def run_toil(ctx, path, job_path, **kwds): """Translate planemo kwds to cwltool kwds and run cwltool main function.""" _ensure_toil_available() args = [] if not ctx.verbose: args.append("--quiet") output_directory = kwds.get("output_directory", None) if output_directory: args.append("--outdir") args.append(output_directory) if kwds.get("no_container", False): args.append("--no-container") ensure_dependency_resolvers_conf_configured(ctx, kwds) args.append("--beta-dependency-resolvers-configuration") args.append(kwds["dependency_resolvers_config_file"]) if kwds.get("mulled_containers"): args.append("--beta-use-biocontainers") if kwds.get("non_strict_cwl", False): args.append("--non-strict") args.extend([path, job_path]) ctx.vlog("Calling cwltoil with arguments %s" % args) with tempfile.NamedTemporaryFile("w") as tmp_stdout: # cwltool passes sys.stderr to subprocess.Popen - ensure it has # and actual fileno. with real_io(): ret_code = cwltoil.main(args, stdout=tmp_stdout) tmp_stdout.flush() with open(tmp_stdout.name, "r") as stdout_f: try: result = json.load(stdout_f) except ValueError: message = JSON_PARSE_ERROR_MESSAGE % ( open(tmp_stdout.name, "r").read(), tmp_stdout.name, ) error(message) raise Exception(message) if ret_code != 0: return ErrorRunResponse("Error running Toil") outputs = result return CwlToolRunResponse( "", outputs=outputs, )
def test_restart(self): """ Enable restarts with toil-cwl-runner -- run failing test, re-run correct test. Only implemented for single machine. """ log.info("Running CWL Test Restart. Expecting failure, then success.") from toil.cwl import cwltoil from toil.jobStores.abstractJobStore import NoSuchJobStoreException from toil.leader import FailedJobsException outDir = self._createTempDir() cwlDir = os.path.join(self._projectRootPath(), "src", "toil", "test", "cwl") cmd = [ "--outdir", outDir, "--jobStore", os.path.join(outDir, "jobStore"), "--no-container", os.path.join(cwlDir, "revsort.cwl"), os.path.join(cwlDir, "revsort-job.json"), ] # create a fake rev bin that actually points to the "date" binary cal_path = [ d for d in os.environ["PATH"].split(":") if os.path.exists(os.path.join(d, "date")) ][-1] os.symlink(os.path.join(cal_path, "date"), f'{os.path.join(outDir, "rev")}') def path_with_bogus_rev(): # append to the front of the PATH so that we check there first return f"{outDir}:" + os.environ["PATH"] orig_path = os.environ["PATH"] # Force a failure by trying to use an incorrect version of `rev` from the PATH os.environ["PATH"] = path_with_bogus_rev() try: cwltoil.main(cmd) self.fail("Expected problem job with incorrect PATH did not fail") except FailedJobsException: pass # Finish the job with a correct PATH os.environ["PATH"] = orig_path cwltoil.main(["--restart"] + cmd) # Should fail because previous job completed successfully try: cwltoil.main(["--restart"] + cmd) self.fail("Restart with missing directory did not fail") except NoSuchJobStoreException: pass
def call_toil(self): """ Calls Toil to run the operation's pipeline """ # Build the args list args = [] if self.container is not None: # If the container is Docker, no extra args are needed. For other # containers, set the required args if self.container == 'singularity': args.extend(['--singularity']) elif self.container == 'udocker': args.extend(['--user-space-docker-cmd', 'udocker']) else: args.extend(['--no-container']) args.extend(['--preserve-entire-environment']) # args.extend(['--bypass-file-store']) # for debugging weird issues!! args.extend(['--batchSystem', self.batch_system]) if self.batch_system == 'slurm': args.extend(['--disableCaching']) args.extend(['--defaultCores', str(self.cpus_per_task)]) args.extend(['--defaultMemory', self.mem_per_node_gb]) self.toil_env_variables['TOIL_SLURM_ARGS'] = "--export=ALL" args.extend(['--maxLocalJobs', str(self.max_nodes)]) args.extend(['--jobStore', self.jobstore]) if os.path.exists(self.jobstore): args.extend(['--restart']) args.extend(['--basedir', self.pipeline_working_dir]) args.extend(['--outdir', self.pipeline_working_dir]) args.extend(['--writeLogs', self.log_dir]) args.extend(['--writeLogsFromAllJobs' ]) # also keep logs of successful jobs args.extend(['--maxLogFileSize', '0']) # disable truncation of log files if self.scratch_dir is not None: # Note: the trailing '/' is required by Toil v5.3+; in addition, # --tmpdir-prefix and --tmp-outdir-prefix require a filename prefix # when using --bypass-file-store, but it won't harm to use it anyway. args.extend(['--tmpdir-prefix', self.scratch_dir + '/toil.']) args.extend(['--tmp-outdir-prefix', self.scratch_dir + '/toil.']) args.extend(['--workDir', self.scratch_dir + '/']) args.extend(['--clean', 'never']) # preserves the job store for future runs args.extend(['--servicePollingInterval', '10']) args.extend(['--stats']) # The following three options should be enabled for debugging purposes only!! # args.extend(['--cleanWorkDir', 'never']) # enable for debugging purposes only!! # args.extend(['--debugWorker']) # enable for debugging purposes only!! # args.extend(['--logLevel', 'DEBUG']) # enable for debugging purposes only!! if self.field.use_mpi and self.toil_major_version >= 5: # Create the config file for MPI jobs and add the required args if self.batch_system == 'slurm': # Use salloc to request the SLRUM allocation and run the MPI job config_lines = [ "runner: 'mpi_runner.sh'", "nproc_flag: '-N'", "extra_flags: ['mpirun', '--map-by node']" ] else: config_lines = [ "runner: 'mpirun'", "nproc_flag: '-np'", "extra_flags: ['--map-by node']" ] self.log.warning( 'MPI support for non-Slurm clusters is experimental. ' 'Please report any issues encountered.') with open(self.mpi_config_file, 'w') as f: f.write('\n'.join(config_lines)) args.extend(['--mpi-config-file', self.mpi_config_file]) args.extend(['--enable-ext']) args.append(self.pipeline_parset_file) args.append(self.pipeline_inputs_file) # Set env variables, if any for k, v in self.toil_env_variables.items(): os.environ[k] = v # Run the pipeline # print(f"**** Toil command-line arguments: {args} ****") try: with open(self.pipeline_outputs_file, 'w') as stdout: status = cwltoil.main(args=args, stdout=stdout) self.success = (status == 0) except FailedJobsException: self.success = False # Unset env variables, if any for k, v in self.toil_env_variables.items(): os.environ[k] = '' # Reset the logging level, as the cwltoil call above can change it _logging.set_level(self.parset['logging_level'])
def registerDrmaaBatchSystem(): from toil.batchSystems.registry import addBatchSystemFactory from toil.batchSystems.options import addOptionsDefinition def drmaaBatchSystemFactory(): from wehi_pipeline.batchSystems.drmaaBatchSystem import DrmaaBatchSystem return DrmaaBatchSystem addBatchSystemFactory('drmaa', drmaaBatchSystemFactory) def addOptions(addOptionFn): addOptionFn("--jobQueue", dest="jobQueue", default=None, help=("A job queue (used by the DRMAA batch system)")) addOptionFn( "--jobNamePrefix", dest="jobNamePrefix", default='toil', help= ("A job name prefix (will be concatenated with the first part of the Toil workflowID, used by the DRMAA batch system)" )) addOptionsDefinition(addOptions) if __name__ == '__main__': registerDrmaaBatchSystem() sys.argv[0] = re.sub(r'(-script\.pyw?|\.exe)?$', '', sys.argv[0]) sys.exit(main())