def write_output_files(self) -> None: """ Fetch all the files that this workflow generated and output information about them to `outputs.json`. """ output_obj = {} job_store_type, _ = Toil.parseLocator(self.job_store) # For CWL workflows, the stdout should be a JSON object containing the outputs if self.wf_type == "cwl": try: with open(os.path.join(self.work_dir, "stdout")) as f: output_obj = json.load(f) except Exception as e: logger.warning("Failed to read outputs object from stdout:", exc_info=e) elif job_store_type == "file": for file in os.listdir(self.out_dir): location = os.path.join(self.out_dir, file) output_obj[file] = { "location": location, "size": os.stat(location).st_size, "class": get_file_class(location), } # TODO: fetch files from other job stores self.write("outputs.json", json.dumps(output_obj))
def checkOptions(parser, options): # This is not a full Toil port. Files will still be accessed # directly from disk options.halfile = os.path.abspath(options.halfile) options.outputDirectory = os.path.abspath(options.outputDirectory) jobStoreType, locator = Toil.parseLocator(options.jobStore) if jobStoreType != "file": raise RuntimeError("only local jobStores are supported") assert False if not os.path.exists(options.halfile): raise RuntimeError("Input hal file %s does not exist.\n" % options.halfile) if not os.path.exists(options.outputDirectory): system("mkdir -p %s" % options.outputDirectory) elif not os.path.isdir(options.outputDirectory): raise RuntimeError( "Output directory specified (%s) is not a directory\n" % options.outputDirectory) options.snpwidth = None checkHubOptions(parser, options) checkBedOptions(parser, options) checkWigOptions(parser, options) checkRmskOptions(parser, options) checkConservationOptions(parser, options)
def _getResultsFileName(self, toilPath): """ Get a path for the batch systems to store results. GridEngine, slurm, and LSF currently use this and only work if locator is file. """ # Use parser to extract the path and type locator, filePath = Toil.parseLocator(toilPath) assert locator == "file" return os.path.join(filePath, "results.txt")
def _getResultsFileName(self, toilPath): """ Get a path for the batch systems to store results. GridEngine, slurm, and LSF currently use this and only work if locator is file. """ # Use parser to extract the path and type locator, filePath = Toil.parseLocator(toilPath) assert locator == "file" return os.path.join(filePath, "results.txt")
def setupBinaries(options): """Ensure that Cactus's C/C++ components are ready to run, and set up the environment.""" if options.latest: os.environ["CACTUS_USE_LATEST"] = "1" if options.binariesMode is not None: # Mode is specified on command line mode = options.binariesMode else: # Might be specified through the environment, or not, in which # case the default is to use Docker. mode = os.environ.get("CACTUS_BINARIES_MODE", "docker") os.environ["CACTUS_BINARIES_MODE"] = mode if mode == "docker": # Verify Docker exists on the target system from distutils.spawn import find_executable if find_executable('docker') is None: raise RuntimeError("The `docker` executable wasn't found on the " "system. Please install Docker if possible, or " "use --binariesMode local and add cactus's bin " "directory to your PATH.") # If running without Docker, verify that we can find the Cactus executables elif mode == "local": from distutils.spawn import find_executable if find_executable('cactus_caf') is None: raise RuntimeError("Cactus isn't using Docker, but it can't find " "the Cactus binaries. Please add Cactus's bin " "directory to your PATH (and run `make` in the " "Cactus directory if you haven't already).") if find_executable('ktserver') is None: raise RuntimeError("Cactus isn't using Docker, but it can't find " "`ktserver`, the KyotoTycoon database server. " "Please install KyotoTycoon " "(https://github.com/alticelabs/kyoto) " "and add the binary to your PATH, or use the " "Docker mode.") else: assert mode == "singularity" jobStoreType, locator = Toil.parseLocator(options.jobStore) if jobStoreType != "file": raise RuntimeError( "Singularity mode is only supported when using the FileJobStore." ) if options.containerImage: imgPath = os.path.abspath(options.containerImage) os.environ["CACTUS_USE_LOCAL_SINGULARITY_IMG"] = "1" else: # When SINGULARITY_CACHEDIR is set, singularity will refuse to store images in the current directory if 'SINGULARITY_CACHEDIR' in os.environ: imgPath = os.path.join(os.environ['SINGULARITY_CACHEDIR'], "cactus.img") else: imgPath = os.path.join(os.path.abspath(locator), "cactus.img") os.environ["CACTUS_SINGULARITY_IMG"] = imgPath
def setupBinaries(options): """Ensure that Cactus's C/C++ components are ready to run, and set up the environment.""" if options.latest: os.environ["CACTUS_USE_LATEST"] = "1" if options.binariesMode is not None: # Mode is specified on command line mode = options.binariesMode else: # Might be specified through the environment, or not, in which # case the default is to use Docker. mode = os.environ.get("CACTUS_BINARIES_MODE", "docker") os.environ["CACTUS_BINARIES_MODE"] = mode if mode == "docker": # Verify Docker exists on the target system from distutils.spawn import find_executable if find_executable('docker') is None: raise RuntimeError("The `docker` executable wasn't found on the " "system. Please install Docker if possible, or " "use --binariesMode local and add cactus's bin " "directory to your PATH.") # If running without Docker, verify that we can find the Cactus executables elif mode == "local": from distutils.spawn import find_executable if find_executable('cactus_caf') is None: raise RuntimeError("Cactus isn't using Docker, but it can't find " "the Cactus binaries. Please add Cactus's bin " "directory to your PATH (and run `make` in the " "Cactus directory if you haven't already).") if find_executable('ktserver') is None: raise RuntimeError("Cactus isn't using Docker, but it can't find " "`ktserver`, the KyotoTycoon database server. " "Please install KyotoTycoon " "(https://github.com/alticelabs/kyoto) " "and add the binary to your PATH, or use the " "Docker mode.") else: assert mode == "singularity" jobStoreType, locator = Toil.parseLocator(options.jobStore) if jobStoreType != "file": raise RuntimeError("Singularity mode is only supported when using the FileJobStore.") if options.containerImage: imgPath = os.path.abspath(options.containerImage) os.environ["CACTUS_USE_LOCAL_SINGULARITY_IMG"] = "1" else: # When SINGULARITY_CACHEDIR is set, singularity will refuse to store images in the current directory if 'SINGULARITY_CACHEDIR' in os.environ: imgPath = os.path.join(os.environ['SINGULARITY_CACHEDIR'], "cactus.img") else: imgPath = os.path.join(os.path.abspath(locator), "cactus.img") os.environ["CACTUS_SINGULARITY_IMG"] = imgPath
def __init__(self, config, maxCores, maxMemory, maxDisk): super(ParasolBatchSystem, self).__init__(config, maxCores, maxMemory, maxDisk) if maxMemory != sys.maxsize: logger.warning( 'The Parasol batch system does not support maxMemory.') # Keep the name of the results file for the pstat2 command.. command = config.parasolCommand if os.path.sep not in command: try: command = which(command) except StopIteration: raise RuntimeError("Can't find %s on PATH." % command) logger.debug('Using Parasol at %s', command) self.parasolCommand = command jobStoreType, path = Toil.parseLocator(config.jobStore) if jobStoreType != 'file': raise RuntimeError( "The parasol batch system doesn't currently work with any " "jobStore type except file jobStores.") self.parasolResultsDir = tempfile.mkdtemp(dir=os.path.abspath(path)) logger.debug("Using parasol results dir: %s", self.parasolResultsDir) # In Parasol, each results file corresponds to a separate batch, and all jobs in a batch # have the same cpu and memory requirements. The keys to this dictionary are the (cpu, # memory) tuples for each batch. A new batch is created whenever a job has a new unique # combination of cpu and memory requirements. self.resultsFiles = dict() self.maxBatches = config.parasolMaxBatches # Allows the worker process to send back the IDs of jobs that have finished, so the batch # system can decrease its used cpus counter self.cpuUsageQueue = Queue() # Also stores finished job IDs, but is read by getUpdatedJobIDs(). self.updatedJobsQueue = Queue() # Use this to stop the worker when shutting down self.running = True self.worker = Thread(target=self.updatedJobWorker, args=()) self.worker.start() self.usedCpus = 0 self.jobIDsToCpu = {} # Set of jobs that have been issued but aren't known to have finished or been killed yet. # Jobs that end by themselves are removed in getUpdatedJob, and jobs that are killed are # removed in killBatchJobs. self.runningJobs = set()
def __init__(self, config: Config, maxCores: float, maxMemory: int, maxDisk: int) -> None: super().__init__(config, maxCores, maxMemory, maxDisk) # Connect to TES, using Funnel-compatible environment variables to fill in credentials if not specified. self.tes = tes.HTTPClient(config.tes_endpoint, user=config.tes_user, password=config.tes_password, token=config.tes_bearer_token) # Get service info from the TES server and pull out supported storages. # We need this so we can tell if the server is likely to be able to # mount any of our local files. These are URL bases that the server # supports. server_info = self.tes.get_service_info() logger.debug("Detected TES server info: %s", server_info) self.server_storages = server_info.storage or [] # Define directories to mount for each task, as py-tes Input objects self.mounts: List[tes.Input] = [] if config.jobStore: job_store_type, job_store_path = Toil.parseLocator(config.jobStore) if job_store_type == 'file': # If we have a file job store, we want to mount it at the same path, if we can self._mount_local_path_if_possible(job_store_path, job_store_path) # If we have AWS credentials, we want to mount them in our home directory if we can. aws_credentials_path = os.path.join(os.path.expanduser("~"), '.aws') if os.path.isdir(aws_credentials_path): self._mount_local_path_if_possible(aws_credentials_path, '/root/.aws') # We assign job names based on a numerical job ID. This functionality # is managed by the BatchSystemLocalSupport. # Here is where we will store the user script resource object if we get one. self.user_script: Optional[Resource] = None # Ge the image to deploy from Toil's configuration self.docker_image = applianceSelf() # We need a way to map between our batch system ID numbers, and TES task IDs from the server. self.bs_id_to_tes_id: Dict[int, str] = {} self.tes_id_to_bs_id: Dict[str, int] = {}
def sort_options(self) -> List[str]: """ Sort the command line arguments in the order that can be recognized by the workflow execution engine. """ options = [] # First, we pass the default engine parameters options.extend(self.engine_options) # Then, we pass the user options specific for this workflow run. This should override the default for key, value in self.request.get("workflow_engine_parameters", {}).items(): if value is None: # flags options.append(key) else: options.append(f"{key}={value}") # determine job store and set a new default if the user did not set one cloud = False for option in options: if option.startswith("--jobStore="): self.job_store = option[11:] options.remove(option) if option.startswith(("--outdir=", "-o=")): options.remove(option) job_store_type, _ = Toil.parseLocator(self.job_store) if job_store_type in ("aws", "google", "azure"): cloud = True if self.wf_type in ("cwl", "wdl"): if not cloud: options.append("--outdir=" + self.out_dir) options.append("--jobStore=" + self.job_store) else: # TODO: find a way to communicate the out_dir to the Toil workflow. # append the positional jobStore argument at the end for Toil workflows options.append(self.job_store) return options
def importSingularityImage(options): """Import the Singularity image from Docker if using Singularity.""" mode = os.environ.get("CACTUS_BINARIES_MODE", "docker") localImage = os.environ.get("CACTUS_USE_LOCAL_SINGULARITY_IMG", "0") if mode == "singularity" and Toil.parseLocator( options.jobStore)[0] == "file": imgPath = os.environ["CACTUS_SINGULARITY_IMG"] # If not using local image, pull the docker image if localImage == "0": # Singularity will complain if the image file already exists. Remove it. try: os.remove(imgPath) except OSError: # File doesn't exist pass # Singularity 2.4 broke the functionality that let --name # point to a path instead of a name in the CWD. So we change # to the proper directory manually, then change back after the # image is pulled. # NOTE: singularity writes images in the current directory only # when SINGULARITY_CACHEDIR is not set oldCWD = os.getcwd() os.chdir(os.path.dirname(imgPath)) # --size is deprecated starting in 2.4, but is needed for 2.3 support. Keeping it in for now. try: check_call([ "singularity", "pull", "--size", "2000", "--name", os.path.basename(imgPath), "docker://" + getDockerImage() ]) except CalledProcessError: # Call failed, try without --size, required for singularity 3+ check_call([ "singularity", "pull", "--name", os.path.basename(imgPath), "docker://" + getDockerImage() ]) os.chdir(oldCWD) else: logger.info( "Using pre-built singularity image: '{}'".format(imgPath))
def main() -> None: parser = parser_with_common_options() options = parser.parse_args() set_logging_from_options(options) config = Config() config.setOptions(options) job_store_type, _ = Toil.parseLocator(config.jobStore) if job_store_type != 'file': # Remote (aws/google) jobstore; use the old (broken?) method job_store = Toil.resumeJobStore(config.jobStore) logger.info("Starting routine to kill running jobs in the toil workflow: %s", config.jobStore) # TODO: This behaviour is now broken: https://github.com/DataBiosphere/toil/commit/a3d65fc8925712221e4cda116d1825d4a1e963a1 # There's no guarantee that the batch system in use can enumerate # running jobs belonging to the job store we've attached to. And # moreover we don't even bother trying to kill the leader at its # recorded PID, even if it is a local process. batch_system = Toil.createBatchSystem(job_store.config) # Should automatically kill existing jobs, so we're good. for job_id in batch_system.getIssuedBatchJobIDs(): # Just in case we do it again. batch_system.killBatchJobs([job_id]) logger.info("All jobs SHOULD have been killed") else: # otherwise, kill the pid recorded in the jobstore. # TODO: We assume thnis is a local PID. job_store = Toil.resumeJobStore(config.jobStore) assert isinstance(job_store, FileJobStore), "Need a FileJobStore which has a sharedFilesDir" pid_log = os.path.join(job_store.sharedFilesDir, 'pid.log') with open(pid_log) as f: pid_to_kill = f.read().strip() try: os.kill(int(pid_to_kill), signal.SIGTERM) logger.info("Toil process %s successfully terminated." % str(pid_to_kill)) except OSError: logger.error("Toil process %s could not be terminated." % str(pid_to_kill)) raise
def sort_options( self, workflow_engine_parameters: Optional[Dict[str, Optional[str]]] = None ) -> List[str]: """ Sort the command line arguments in the order that can be recognized by the workflow execution engine. :param workflow_engine_parameters: User-specified parameters for this particular workflow. Keys are command-line options, and values are option arguments, or None for options that are flags. """ options = [] # First, we pass the default engine parameters options.extend(self.engine_options) if workflow_engine_parameters: # Then, we pass the user options specific for this workflow run. # This should override the default for key, value in workflow_engine_parameters.items(): if value is None: # flags options.append(key) else: options.append(f"{key}={value}") # We want to clean always by default, unless a particular job store or # a clean option was passed. clean = None # Parse options and drop options we may need to override. for option in options: if option.startswith("--jobStore="): self.job_store = option[11:] options.remove(option) if option.startswith(("--outdir=", "-o=")): # We need to generate this one ourselves. options.remove(option) if option.startswith("--clean="): clean = option[8:] cloud = False job_store_type, _ = Toil.parseLocator(self.job_store) if job_store_type in ("aws", "google", "azure"): cloud = True if self.job_store == self.default_job_store and clean is None: # User didn't specify a clean option, and we're on a default, # randomly generated job store, so we should clean it up even if we # crash. options.append("--clean=always") if self.wf_type in ("cwl", "wdl"): if not cloud: options.append("--outdir=" + self.out_dir) options.append("--jobStore=" + self.job_store) else: # TODO: find a way to communicate the out_dir to the Toil workflow. # append the positional jobStore argument at the end for Toil workflows options.append(self.job_store) return options