def _getFileDirectory(self, jobStoreID=None, cleanup=False): """ Get a new empty directory path for a file to be stored at. :param str jobStoreID: If the jobStoreID is not None, the file wil be associated with the job with that ID. :param bool cleanup: If cleanup is also True, this directory will be cleaned up when the job is deleted. :rtype :string, string is the absolute path to a directory to put the file in. """ if jobStoreID != None: # Make a temporary file within the job's files directory # Make sure the job is legit self._checkJobStoreId(jobStoreID) # Find where all its created files should live, depending on if # they need to go away when the job is deleted or not. jobFilesDir = self._getJobFilesDir(jobStoreID) if not cleanup \ else self._getJobFilesCleanupDir(jobStoreID) # Lazily create the parent directory. # We don't want our tree filled with confusingly empty directories. mkdir_p(jobFilesDir) # Then make a temp directory inside it return tempfile.mkdtemp(prefix='file-', dir=jobFilesDir) else: # Make a temporary file within the non-job-associated files hierarchy return tempfile.mkdtemp(prefix='file-', dir=self._getArbitraryFilesDir())
def setUpClass(cls): super(ToilTest, cls).setUpClass() cls._tempDirs = [] tempBaseDir = os.environ.get('TOIL_TEST_TEMP', None) if tempBaseDir is not None and not os.path.isabs(tempBaseDir): tempBaseDir = os.path.abspath( os.path.join(cls._projectRootPath(), tempBaseDir)) mkdir_p(tempBaseDir) cls._tempBaseDir = tempBaseDir
def initialize(self, config): try: os.mkdir(self.jobStoreDir) except OSError as e: if e.errno == errno.EEXIST: raise JobStoreExistsException(self.jobStoreDir) else: raise mkdir_p(self.jobsDir) mkdir_p(self.statsDir) mkdir_p(self.filesDir) mkdir_p(self.jobFilesDir) mkdir_p(self.sharedFilesDir) self.linkImports = config.linkImports super(FileJobStore, self).initialize(config)
def __init__(self, stateDirBase): """ Create a new DeferredFunctionManager, sharing state with other instances in other processes using the given shared state directory. Uses a fixed path under that directory for state files. Creates it if not present. Note that if the current umask lets other people create files in that new directory, we are going to execute their code! The created directory will be left behind, because we never know if another worker will come along later on this node. """ # Work out where state files live self.stateDir = os.path.join(stateDirBase, "deferred") mkdir_p(self.stateDir) # We need to get a state file, locked by us and not somebody scanning for abandoned state files. # So we suffix not-yet-ready ones with .tmp self.stateFD, self.stateFileName = tempfile.mkstemp(dir=self.stateDir, suffix='.tmp') # Lock the state file. The lock will automatically go away if our process does. try: fcntl.lockf(self.stateFD, fcntl.LOCK_EX | fcntl.LOCK_NB) except IOError as e: # Someone else might have locked it even though they should not have. raise RuntimeError( "Could not lock deferred function state file %s: %s" % (self.stateFileName, str(e))) # Rename it to remove the ".tmp" os.rename(self.stateFileName, self.stateFileName[:-4]) self.stateFileName = self.stateFileName[:-4] # Wrap the FD in a Python file object, which we will use to actually use it. # Problem: we can't be readable and writable at the same time. So we need two file objects. self.stateFileOut = os.fdopen(self.stateFD, 'wb') self.stateFileIn = open(self.stateFileName, 'rb') logger.debug("Running for file %s" % self.stateFileName)
def _getDynamicSprayDir(self, root): """ Gets a temporary directory in a possibly multi-level hierarchy of directories under the given root. Each time a directory in the hierarchy starts to fill up, additional hierarchy levels are created under it, and we randomly "spray" further files and directories across them. We can't actually enforce that we never go over our internal limit for files in a directory, because any number of calls to this function can be happening simultaneously. But we can enforce that, once too many files are visible on disk, only subdirectories will be created. The returned directory will exist, and may contain other data already. The caller may not create any files or directories in the returned directory with single-character names that are in self.validDirs. :param str root : directory to put the hierarchy under, which will fill first. :rtype : string, path to temporary directory in which to place files/directories. """ tempDir = root # Make sure the root exists mkdir_p(tempDir) while len(os.listdir(tempDir)) >= self.fanOut: # We need to use a layer of directories under here to avoid over-packing the directory tempDir = os.path.join(tempDir, random.choice(self.validDirs)) mkdir_p(tempDir) # When we get here, we found a sufficiently empty directory return tempDir
def _singularity(job, tool, parameters=None, workDir=None, singularityParameters=None, outfile=None, checkOutput=False): """ :param toil.Job.job job: The Job instance for the calling function. :param str tool: Name of the Docker image to be used (e.g. quay.io/ucsc_cgl/samtools). :param list[str] parameters: Command line arguments to be passed to the tool. If list of lists: list[list[str]], then treat as successive commands chained with pipe. :param str workDir: Directory to mount into the container via `--bind`. Destination convention is /data :param list[str] singularityrParameters: Parameters to pass to Singularity. Default parameters are the mountpoint `--bind work_dir:/data` where /data is the destination convention. These defaults are removed if singularity_parmaters is passed, so be sure to pass them if they are desired. :param file outfile: Pipe output of Singularity call to file handle :param bool checkOutput: When True, this function returns singularity's output. """ if parameters is None: parameters = [] if workDir is None: workDir = os.getcwd() # Setup the outgoing subprocess call for singularity baseSingularityCall = ['singularity', '-q', 'exec'] if singularityParameters: baseSingularityCall += singularityParameters else: # Mount workdir as /mnt and work in there. # Hope the image actually has a /mnt available. # Otherwise this silently doesn't mount. # But with -u (user namespaces) we have no luck pointing in-container # home at anything other than our real home (like something under /var # where Toil puts things). # Note that we target Singularity 3+. baseSingularityCall += [ '-u', '-B', '{}:{}'.format(os.path.abspath(workDir), '/mnt'), '--pwd', '/mnt' ] # Problem: Multiple Singularity downloads sharing the same cache directory will # not work correctly. See https://github.com/sylabs/singularity/issues/3634 # and https://github.com/sylabs/singularity/issues/4555. # As a workaround, we have out own cache which we manage ourselves. cache_dir = os.path.join( os.environ.get('SINGULARITY_CACHEDIR', os.path.join(os.environ.get('HOME'), '.singularity')), 'toil') mkdir_p(cache_dir) # What Singularity url/spec do we want? source_image = _convertImageSpec(tool) # What name in the cache dir do we want? # We cache everything as sandbox directories and not .sif files because, as # laid out in https://github.com/sylabs/singularity/issues/4617, there # isn't a way to run from a .sif file and have write permissions on system # directories in the container, because the .sif build process makes # everything owned by root inside the image. Since some toil-vg containers # (like the R one) want to touch system files (to install R packages at # runtime), we do it this way to act more like Docker. # # Also, only sandbox directories work with user namespaces, and only user # namespaces work inside unprivileged Docker containers like the Toil # appliance. sandbox_dirname = os.path.join( cache_dir, '{}.sandbox'.format(hashlib.sha256(source_image).hexdigest())) if not os.path.exists(sandbox_dirname): # We atomically drop the sandbox at that name when we get it # Make a temp directory to be the sandbox temp_sandbox_dirname = tempfile.mkdtemp(dir=cache_dir) # Download with a fresh cache to a sandbox download_env = os.environ.copy() download_env['SINGULARITY_CACHEDIR'] = job.fileStore.getLocalTempDir() subprocess.check_call([ 'singularity', 'build', '-s', '-F', temp_sandbox_dirname, source_image ], env=download_env) # Clean up the Singularity cache since it is single use shutil.rmtree(download_env['SINGULARITY_CACHEDIR']) try: # This may happen repeatedly but it is atomic os.rename(temp_sandbox_dirname, sandbox_dirname) except FileExistsError: # Can't rename a directory over another # Make sure someone else has made the directory assert os.path.exists(sandbox_dirname) # Remove our redundant copy shutil.rmtree(temp_sandbox_name) # TODO: we could save some downloading by having one process download # and the others wait, but then we would need a real fnctl locking # system here. # Make subprocess call for singularity run # If parameters is list of lists, treat each list as separate command and chain with pipes if len(parameters) > 0 and type(parameters[0]) is list: # When piping, all arguments now get merged into a single string to bash -c. # We try to support spaces in paths by wrapping them all in quotes first. chain_params = [ ' '.join(p) for p in [map(pipes.quote, q) for q in parameters] ] # Use bash's set -eo pipefail to detect and abort on a failure in any command in the chain call = baseSingularityCall + [ sandbox_dirname, '/bin/bash', '-c', 'set -eo pipefail && {}'.format(' | '.join(chain_params)) ] else: call = baseSingularityCall + [sandbox_dirname] + parameters _logger.info("Calling singularity with " + repr(call)) params = {} if outfile: params['stdout'] = outfile if checkOutput: callMethod = subprocess.check_output else: callMethod = subprocess.check_call out = callMethod(call, **params) # After Singularity exits, it is possible that cleanup of the container's # temporary files is still in progress (sicne it also waits for the # container to exit). If we return immediately and the Toil job then # immediately finishes, we can have a race between Toil's temp # cleanup/space tracking code and Singularity's temp cleanup code to delete # the same directory tree. Toil doesn't handle this well, and crashes when # files it expected to be able to see are missing (at least on some # versions). So we introduce a delay here to try and make sure that # Singularity wins the race with high probability. # # See https://github.com/sylabs/singularity/issues/1255 time.sleep(0.5) return out
def _obtain_credentials_from_cache_or_boto3(self): """ Get the cached credentials, or retrieve them from Boto 3 and cache them (or wait for another cooperating process to do so) if they are missing or not fresh enough. """ cache_path = '~/.cache/aws/cached_temporary_credentials' path = os.path.expanduser(cache_path) tmp_path = path + '.tmp' while True: log.debug('Attempting to read cached credentials from %s.', path) try: with open(path, 'r') as f: content = f.read() if content: record = content.split('\n') assert len(record) == 4 self._access_key = record[0] self._secret_key = record[1] self._security_token = record[2] self._credential_expiry_time = str_to_datetime( record[3]) else: log.debug( '%s is empty. Credentials are not temporary.', path) self._obtain_credentials_from_boto3() return except IOError as e: if e.errno == errno.ENOENT: log.debug('Cached credentials are missing.') dir_path = os.path.dirname(path) if not os.path.exists(dir_path): log.debug('Creating parent directory %s', dir_path) # A race would be ok at this point mkdir_p(dir_path) else: raise else: if self._credentials_need_refresh(): log.debug('Cached credentials are expired.') else: log.debug( 'Cached credentials exist and are still fresh.') return # We get here if credentials are missing or expired log.debug('Racing to create %s.', tmp_path) # Only one process, the winner, will succeed try: fd = os.open(tmp_path, os.O_CREAT | os.O_EXCL | os.O_WRONLY, 0o600) except OSError as e: if e.errno == errno.EEXIST: log.debug( 'Lost the race to create %s. Waiting on winner to remove it.', tmp_path) while os.path.exists(tmp_path): time.sleep(0.1) log.debug('Winner removed %s. Trying from the top.', tmp_path) else: raise else: try: log.debug( 'Won the race to create %s. Requesting credentials from backend.', tmp_path) self._obtain_credentials_from_boto3() except: os.close(fd) fd = None log.debug('Failed to obtain credentials, removing %s.', tmp_path) # This unblocks the loosers. os.unlink(tmp_path) # Bail out. It's too likely to happen repeatedly raise else: if self._credential_expiry_time is None: os.close(fd) fd = None log.debug( 'Credentials are not temporary. Leaving %s empty and renaming it to %s.', tmp_path, path) # No need to actually cache permanent credentials, # because we hnow we aren't getting them from the # metadata server or by assuming a role. Those both # give temporary credentials. else: log.debug('Writing credentials to %s.', tmp_path) with os.fdopen(fd, 'w') as fh: fd = None fh.write('\n'.join([ self._access_key, self._secret_key, self._security_token, datetime_to_str( self._credential_expiry_time) ])) log.debug( 'Wrote credentials to %s. Renaming to %s.', tmp_path, path) os.rename(tmp_path, path) return finally: if fd is not None: os.close(fd)
def _populate_keys_from_metadata_server(self): global _populate_keys_from_metadata_server_orig path = os.path.expanduser(cache_path) tmp_path = path + '.tmp' while True: log.debug('Attempting to read cached credentials from %s.', path) try: with open(path, 'r') as f: content = f.read() if content: record = content.split('\n') assert len(record) == 4 self._access_key = record[0] self._secret_key = record[1] self._security_token = record[2] self._credential_expiry_time = str_to_datetime(record[3]) else: log.debug('%s is empty. Credentials are not temporary.', path) return except IOError as e: if e.errno == errno.ENOENT: log.debug('Cached credentials are missing.') dir_path = os.path.dirname(path) if not os.path.exists(dir_path): log.debug('Creating parent directory %s', dir_path) # A race would be ok at this point mkdir_p(dir_path) else: raise else: if self._credentials_need_refresh(): log.debug('Cached credentials are expired.') else: log.debug('Cached credentials exist and are still fresh.') return # We get here if credentials are missing or expired log.debug('Racing to create %s.', tmp_path) # Only one process, the winner, will succeed try: fd = os.open(tmp_path, os.O_CREAT | os.O_EXCL | os.O_WRONLY, 0o600) except OSError as e: if e.errno == errno.EEXIST: log.debug( 'Lost the race to create %s. Waiting on winner to remove it.', tmp_path) while os.path.exists(tmp_path): time.sleep(0.1) log.debug('Winner removed %s. Trying from the top.', tmp_path) else: raise else: try: log.debug( 'Won the race to create %s. Requesting credentials from metadata service.', tmp_path) _populate_keys_from_metadata_server_orig(self) except: os.close(fd) fd = None log.debug('Failed to obtain credentials, removing %s.', tmp_path) # This unblocks the loosers. os.unlink(tmp_path) # Bail out. It's too likely to happen repeatedly raise else: if self._credential_expiry_time is None: os.close(fd) fd = None log.debug( 'Credentials are not temporary. Leaving %s empty and renaming it to %s.', tmp_path, path) else: log.debug('Writing credentials to %s.', tmp_path) with os.fdopen(fd, 'w') as fh: fd = None fh.write('\n'.join([ self._access_key, self._secret_key, self._security_token, datetime_to_str(self._credential_expiry_time) ])) log.debug('Wrote credentials to %s. Renaming to %s.', tmp_path, path) os.rename(tmp_path, path) return finally: if fd is not None: os.close(fd)