Example #1
0
    def write_output_files(self) -> None:
        """
        Fetch all the files that this workflow generated and output information
        about them to `outputs.json`.
        """
        output_obj = {}
        job_store_type, _ = Toil.parseLocator(self.job_store)

        # For CWL workflows, the stdout should be a JSON object containing the outputs
        if self.wf_type == "cwl":
            try:
                with open(os.path.join(self.work_dir, "stdout")) as f:
                    output_obj = json.load(f)
            except Exception as e:
                logger.warning("Failed to read outputs object from stdout:",
                               exc_info=e)

        elif job_store_type == "file":
            for file in os.listdir(self.out_dir):
                location = os.path.join(self.out_dir, file)
                output_obj[file] = {
                    "location": location,
                    "size": os.stat(location).st_size,
                    "class": get_file_class(location),
                }

        # TODO: fetch files from other job stores

        self.write("outputs.json", json.dumps(output_obj))
Example #2
0
def checkOptions(parser, options):

    # This is not a full Toil port.  Files will still be accessed
    # directly from disk
    options.halfile = os.path.abspath(options.halfile)
    options.outputDirectory = os.path.abspath(options.outputDirectory)

    jobStoreType, locator = Toil.parseLocator(options.jobStore)
    if jobStoreType != "file":
        raise RuntimeError("only local jobStores are supported")
    assert False

    if not os.path.exists(options.halfile):
        raise RuntimeError("Input hal file %s does not exist.\n" %
                           options.halfile)
    if not os.path.exists(options.outputDirectory):
        system("mkdir -p %s" % options.outputDirectory)
    elif not os.path.isdir(options.outputDirectory):
        raise RuntimeError(
            "Output directory specified (%s) is not a directory\n" %
            options.outputDirectory)

    options.snpwidth = None
    checkHubOptions(parser, options)
    checkBedOptions(parser, options)
    checkWigOptions(parser, options)
    checkRmskOptions(parser, options)
    checkConservationOptions(parser, options)
Example #3
0
 def _getResultsFileName(self, toilPath):
     """
     Get a path for the batch systems to store results. GridEngine, slurm,
     and LSF currently use this and only work if locator is file.
     """
     # Use  parser to extract the path and type
     locator, filePath = Toil.parseLocator(toilPath)
     assert locator == "file"
     return os.path.join(filePath, "results.txt")
Example #4
0
 def _getResultsFileName(self, toilPath):
     """
     Get a path for the batch systems to store results. GridEngine, slurm,
     and LSF currently use this and only work if locator is file.
     """
     # Use  parser to extract the path and type
     locator, filePath = Toil.parseLocator(toilPath)
     assert locator == "file"
     return os.path.join(filePath, "results.txt")
Example #5
0
def setupBinaries(options):
    """Ensure that Cactus's C/C++ components are ready to run, and set up the environment."""
    if options.latest:
        os.environ["CACTUS_USE_LATEST"] = "1"
    if options.binariesMode is not None:
        # Mode is specified on command line
        mode = options.binariesMode
    else:
        # Might be specified through the environment, or not, in which
        # case the default is to use Docker.
        mode = os.environ.get("CACTUS_BINARIES_MODE", "docker")
    os.environ["CACTUS_BINARIES_MODE"] = mode
    if mode == "docker":
        # Verify Docker exists on the target system
        from distutils.spawn import find_executable
        if find_executable('docker') is None:
            raise RuntimeError("The `docker` executable wasn't found on the "
                               "system. Please install Docker if possible, or "
                               "use --binariesMode local and add cactus's bin "
                               "directory to your PATH.")
    # If running without Docker, verify that we can find the Cactus executables
    elif mode == "local":
        from distutils.spawn import find_executable
        if find_executable('cactus_caf') is None:
            raise RuntimeError("Cactus isn't using Docker, but it can't find "
                               "the Cactus binaries. Please add Cactus's bin "
                               "directory to your PATH (and run `make` in the "
                               "Cactus directory if you haven't already).")
        if find_executable('ktserver') is None:
            raise RuntimeError("Cactus isn't using Docker, but it can't find "
                               "`ktserver`, the KyotoTycoon database server. "
                               "Please install KyotoTycoon "
                               "(https://github.com/alticelabs/kyoto) "
                               "and add the binary to your PATH, or use the "
                               "Docker mode.")
    else:
        assert mode == "singularity"
        jobStoreType, locator = Toil.parseLocator(options.jobStore)
        if jobStoreType != "file":
            raise RuntimeError(
                "Singularity mode is only supported when using the FileJobStore."
            )
        if options.containerImage:
            imgPath = os.path.abspath(options.containerImage)
            os.environ["CACTUS_USE_LOCAL_SINGULARITY_IMG"] = "1"
        else:
            # When SINGULARITY_CACHEDIR is set, singularity will refuse to store images in the current directory
            if 'SINGULARITY_CACHEDIR' in os.environ:
                imgPath = os.path.join(os.environ['SINGULARITY_CACHEDIR'],
                                       "cactus.img")
            else:
                imgPath = os.path.join(os.path.abspath(locator), "cactus.img")
        os.environ["CACTUS_SINGULARITY_IMG"] = imgPath
def setupBinaries(options):
    """Ensure that Cactus's C/C++ components are ready to run, and set up the environment."""
    if options.latest:
        os.environ["CACTUS_USE_LATEST"] = "1"
    if options.binariesMode is not None:
        # Mode is specified on command line
        mode = options.binariesMode
    else:
        # Might be specified through the environment, or not, in which
        # case the default is to use Docker.
        mode = os.environ.get("CACTUS_BINARIES_MODE", "docker")
    os.environ["CACTUS_BINARIES_MODE"] = mode
    if mode == "docker":
        # Verify Docker exists on the target system
        from distutils.spawn import find_executable
        if find_executable('docker') is None:
            raise RuntimeError("The `docker` executable wasn't found on the "
                               "system. Please install Docker if possible, or "
                               "use --binariesMode local and add cactus's bin "
                               "directory to your PATH.")
    # If running without Docker, verify that we can find the Cactus executables
    elif mode == "local":
        from distutils.spawn import find_executable
        if find_executable('cactus_caf') is None:
            raise RuntimeError("Cactus isn't using Docker, but it can't find "
                               "the Cactus binaries. Please add Cactus's bin "
                               "directory to your PATH (and run `make` in the "
                               "Cactus directory if you haven't already).")
        if find_executable('ktserver') is None:
            raise RuntimeError("Cactus isn't using Docker, but it can't find "
                               "`ktserver`, the KyotoTycoon database server. "
                               "Please install KyotoTycoon "
                               "(https://github.com/alticelabs/kyoto) "
                               "and add the binary to your PATH, or use the "
                               "Docker mode.")
    else:
        assert mode == "singularity"
        jobStoreType, locator = Toil.parseLocator(options.jobStore)
        if jobStoreType != "file":
            raise RuntimeError("Singularity mode is only supported when using the FileJobStore.")
        if options.containerImage:
            imgPath = os.path.abspath(options.containerImage)
            os.environ["CACTUS_USE_LOCAL_SINGULARITY_IMG"] = "1"
        else:
            # When SINGULARITY_CACHEDIR is set, singularity will refuse to store images in the current directory
            if 'SINGULARITY_CACHEDIR' in os.environ:
                imgPath = os.path.join(os.environ['SINGULARITY_CACHEDIR'], "cactus.img")
            else:
                imgPath = os.path.join(os.path.abspath(locator), "cactus.img")
        os.environ["CACTUS_SINGULARITY_IMG"] = imgPath
Example #7
0
    def __init__(self, config, maxCores, maxMemory, maxDisk):
        super(ParasolBatchSystem, self).__init__(config, maxCores, maxMemory,
                                                 maxDisk)
        if maxMemory != sys.maxsize:
            logger.warning(
                'The Parasol batch system does not support maxMemory.')
        # Keep the name of the results file for the pstat2 command..
        command = config.parasolCommand
        if os.path.sep not in command:
            try:
                command = which(command)
            except StopIteration:
                raise RuntimeError("Can't find %s on PATH." % command)
        logger.debug('Using Parasol at %s', command)
        self.parasolCommand = command
        jobStoreType, path = Toil.parseLocator(config.jobStore)
        if jobStoreType != 'file':
            raise RuntimeError(
                "The parasol batch system doesn't currently work with any "
                "jobStore type except file jobStores.")
        self.parasolResultsDir = tempfile.mkdtemp(dir=os.path.abspath(path))
        logger.debug("Using parasol results dir: %s", self.parasolResultsDir)

        # In Parasol, each results file corresponds to a separate batch, and all jobs in a batch
        # have the same cpu and memory requirements. The keys to this dictionary are the (cpu,
        # memory) tuples for each batch. A new batch is created whenever a job has a new unique
        # combination of cpu and memory requirements.
        self.resultsFiles = dict()
        self.maxBatches = config.parasolMaxBatches

        # Allows the worker process to send back the IDs of jobs that have finished, so the batch
        #  system can decrease its used cpus counter
        self.cpuUsageQueue = Queue()

        # Also stores finished job IDs, but is read by getUpdatedJobIDs().
        self.updatedJobsQueue = Queue()

        # Use this to stop the worker when shutting down
        self.running = True

        self.worker = Thread(target=self.updatedJobWorker, args=())
        self.worker.start()
        self.usedCpus = 0
        self.jobIDsToCpu = {}

        # Set of jobs that have been issued but aren't known to have finished or been killed yet.
        #  Jobs that end by themselves are removed in getUpdatedJob, and jobs that are killed are
        #  removed in killBatchJobs.
        self.runningJobs = set()
Example #8
0
File: tes.py Project: tmooney/toil
    def __init__(self, config: Config, maxCores: float, maxMemory: int,
                 maxDisk: int) -> None:
        super().__init__(config, maxCores, maxMemory, maxDisk)
        # Connect to TES, using Funnel-compatible environment variables to fill in credentials if not specified.
        self.tes = tes.HTTPClient(config.tes_endpoint,
                                  user=config.tes_user,
                                  password=config.tes_password,
                                  token=config.tes_bearer_token)

        # Get service info from the TES server and pull out supported storages.
        # We need this so we can tell if the server is likely to be able to
        # mount any of our local files. These are URL bases that the server
        # supports.
        server_info = self.tes.get_service_info()
        logger.debug("Detected TES server info: %s", server_info)
        self.server_storages = server_info.storage or []

        # Define directories to mount for each task, as py-tes Input objects
        self.mounts: List[tes.Input] = []

        if config.jobStore:
            job_store_type, job_store_path = Toil.parseLocator(config.jobStore)
            if job_store_type == 'file':
                # If we have a file job store, we want to mount it at the same path, if we can
                self._mount_local_path_if_possible(job_store_path,
                                                   job_store_path)

        # If we have AWS credentials, we want to mount them in our home directory if we can.
        aws_credentials_path = os.path.join(os.path.expanduser("~"), '.aws')
        if os.path.isdir(aws_credentials_path):
            self._mount_local_path_if_possible(aws_credentials_path,
                                               '/root/.aws')

        # We assign job names based on a numerical job ID. This functionality
        # is managed by the BatchSystemLocalSupport.

        # Here is where we will store the user script resource object if we get one.
        self.user_script: Optional[Resource] = None

        # Ge the image to deploy from Toil's configuration
        self.docker_image = applianceSelf()

        # We need a way to map between our batch system ID numbers, and TES task IDs from the server.
        self.bs_id_to_tes_id: Dict[int, str] = {}
        self.tes_id_to_bs_id: Dict[str, int] = {}
Example #9
0
    def sort_options(self) -> List[str]:
        """
        Sort the command line arguments in the order that can be recognized by
        the workflow execution engine.
        """
        options = []

        # First, we pass the default engine parameters
        options.extend(self.engine_options)

        # Then, we pass the user options specific for this workflow run. This should override the default
        for key, value in self.request.get("workflow_engine_parameters",
                                           {}).items():
            if value is None:  # flags
                options.append(key)
            else:
                options.append(f"{key}={value}")

        # determine job store and set a new default if the user did not set one
        cloud = False
        for option in options:
            if option.startswith("--jobStore="):
                self.job_store = option[11:]
                options.remove(option)
            if option.startswith(("--outdir=", "-o=")):
                options.remove(option)

        job_store_type, _ = Toil.parseLocator(self.job_store)
        if job_store_type in ("aws", "google", "azure"):
            cloud = True

        if self.wf_type in ("cwl", "wdl"):
            if not cloud:
                options.append("--outdir=" + self.out_dir)
            options.append("--jobStore=" + self.job_store)
        else:
            # TODO: find a way to communicate the out_dir to the Toil workflow.

            # append the positional jobStore argument at the end for Toil workflows
            options.append(self.job_store)

        return options
def importSingularityImage(options):
    """Import the Singularity image from Docker if using Singularity."""
    mode = os.environ.get("CACTUS_BINARIES_MODE", "docker")
    localImage = os.environ.get("CACTUS_USE_LOCAL_SINGULARITY_IMG", "0")
    if mode == "singularity" and Toil.parseLocator(
            options.jobStore)[0] == "file":
        imgPath = os.environ["CACTUS_SINGULARITY_IMG"]
        # If not using local image, pull the docker image
        if localImage == "0":
            # Singularity will complain if the image file already exists. Remove it.
            try:
                os.remove(imgPath)
            except OSError:
                # File doesn't exist
                pass
            # Singularity 2.4 broke the functionality that let --name
            # point to a path instead of a name in the CWD. So we change
            # to the proper directory manually, then change back after the
            # image is pulled.
            # NOTE: singularity writes images in the current directory only
            #       when SINGULARITY_CACHEDIR is not set
            oldCWD = os.getcwd()
            os.chdir(os.path.dirname(imgPath))
            # --size is deprecated starting in 2.4, but is needed for 2.3 support. Keeping it in for now.
            try:
                check_call([
                    "singularity", "pull", "--size", "2000", "--name",
                    os.path.basename(imgPath), "docker://" + getDockerImage()
                ])
            except CalledProcessError:
                # Call failed, try without --size, required for singularity 3+
                check_call([
                    "singularity", "pull", "--name",
                    os.path.basename(imgPath), "docker://" + getDockerImage()
                ])
            os.chdir(oldCWD)
        else:
            logger.info(
                "Using pre-built singularity image: '{}'".format(imgPath))
Example #11
0
def main() -> None:
    parser = parser_with_common_options()
    options = parser.parse_args()
    set_logging_from_options(options)
    config = Config()
    config.setOptions(options)

    job_store_type, _ = Toil.parseLocator(config.jobStore)

    if job_store_type != 'file':
        # Remote (aws/google) jobstore; use the old (broken?) method
        job_store = Toil.resumeJobStore(config.jobStore)
        logger.info("Starting routine to kill running jobs in the toil workflow: %s", config.jobStore)
        # TODO: This behaviour is now broken: https://github.com/DataBiosphere/toil/commit/a3d65fc8925712221e4cda116d1825d4a1e963a1
        # There's no guarantee that the batch system in use can enumerate
        # running jobs belonging to the job store we've attached to. And
        # moreover we don't even bother trying to kill the leader at its
        # recorded PID, even if it is a local process.
        batch_system = Toil.createBatchSystem(job_store.config)  # Should automatically kill existing jobs, so we're good.
        for job_id in batch_system.getIssuedBatchJobIDs():  # Just in case we do it again.
            batch_system.killBatchJobs([job_id])
        logger.info("All jobs SHOULD have been killed")
    else:
        # otherwise, kill the pid recorded in the jobstore.
        # TODO: We assume thnis is a local PID.
        job_store = Toil.resumeJobStore(config.jobStore)
        assert isinstance(job_store, FileJobStore), "Need a FileJobStore which has a sharedFilesDir"
        pid_log = os.path.join(job_store.sharedFilesDir, 'pid.log')
        with open(pid_log) as f:
            pid_to_kill = f.read().strip()
        try:
            os.kill(int(pid_to_kill), signal.SIGTERM)
            logger.info("Toil process %s successfully terminated." % str(pid_to_kill))
        except OSError:
            logger.error("Toil process %s could not be terminated." % str(pid_to_kill))
            raise
Example #12
0
    def sort_options(
        self,
        workflow_engine_parameters: Optional[Dict[str, Optional[str]]] = None
    ) -> List[str]:
        """
        Sort the command line arguments in the order that can be recognized by
        the workflow execution engine.

        :param workflow_engine_parameters: User-specified parameters for this
        particular workflow. Keys are command-line options, and values are
        option arguments, or None for options that are flags.
        """
        options = []

        # First, we pass the default engine parameters
        options.extend(self.engine_options)

        if workflow_engine_parameters:
            # Then, we pass the user options specific for this workflow run.
            # This should override the default
            for key, value in workflow_engine_parameters.items():
                if value is None:  # flags
                    options.append(key)
                else:
                    options.append(f"{key}={value}")

        # We want to clean always by default, unless a particular job store or
        # a clean option was passed.
        clean = None

        # Parse options and drop options we may need to override.
        for option in options:
            if option.startswith("--jobStore="):
                self.job_store = option[11:]
                options.remove(option)
            if option.startswith(("--outdir=", "-o=")):
                # We need to generate this one ourselves.
                options.remove(option)
            if option.startswith("--clean="):
                clean = option[8:]

        cloud = False
        job_store_type, _ = Toil.parseLocator(self.job_store)
        if job_store_type in ("aws", "google", "azure"):
            cloud = True

        if self.job_store == self.default_job_store and clean is None:
            # User didn't specify a clean option, and we're on a default,
            # randomly generated job store, so we should clean it up even if we
            # crash.
            options.append("--clean=always")

        if self.wf_type in ("cwl", "wdl"):
            if not cloud:
                options.append("--outdir=" + self.out_dir)
            options.append("--jobStore=" + self.job_store)
        else:
            # TODO: find a way to communicate the out_dir to the Toil workflow.

            # append the positional jobStore argument at the end for Toil workflows
            options.append(self.job_store)

        return options