Esempio n. 1
0
 def shutdown(self, driver):
     log.critical("Shutting down executor...")
     for taskId, pid in self.runningTasks.items():
         self.killTask(driver, taskId)
     Resource.cleanSystem()
     AbstractBatchSystem.workerCleanup(self.workerCleanupInfo)
     log.critical("Executor shut down")
Esempio n. 2
0
 def shutdown(self, driver):
     log.critical('Shutting down executor ...')
     for taskId in self.runningTasks.keys():
         self.killTask(driver, taskId)
     Resource.cleanSystem()
     BatchSystemSupport.workerCleanup(self.workerCleanupInfo)
     log.critical('... executor shut down.')
Esempio n. 3
0
 def shutdown(self, driver):
     log.critical('Shutting down executor ...')
     for taskId in self.runningTasks.keys():
         self.killTask(driver, taskId)
     Resource.cleanSystem()
     BatchSystemSupport.workerCleanup(self.workerCleanupInfo)
     log.critical('... executor shut down.')
Esempio n. 4
0
 def __init__(self):
     super(MesosExecutor, self).__init__()
     self.popenLock = threading.Lock()
     self.runningTasks = {}
     self.workerCleanupInfo = None
     Resource.prepareSystem()
     self.address = None
Esempio n. 5
0
 def __init__(self):
     super(MesosExecutor, self).__init__()
     self.popenLock = threading.Lock()
     self.runningTasks = {}
     self.workerCleanupInfo = None
     Resource.prepareSystem()
     self.address = None
Esempio n. 6
0
 def shutdown(self, driver):
     log.critical("Shutting down executor...")
     for taskId, pid in self.runningTasks.items():
         self.killTask(driver, taskId)
     Resource.cleanSystem()
     AbstractBatchSystem.workerCleanup(self.workerCleanupInfo)
     log.critical("Executor shut down")
Esempio n. 7
0
 def __init__(self):
     super(MesosExecutor, self).__init__()
     self.popenLock = threading.Lock()
     self.runningTasks = {}
     self.workerCleanupInfo = None
     Resource.prepareSystem()
     self.address = None
     # Setting this value at this point will ensure that the toil workflow directory will go to
     # the mesos sandbox if the user hasn't specified --workDir on the command line.
     if not os.getenv('TOIL_WORKDIR'):
         os.environ['TOIL_WORKDIR'] = os.getcwd()
Esempio n. 8
0
 def __init__(self):
     super(MesosExecutor, self).__init__()
     self.popenLock = threading.Lock()
     self.runningTasks = {}
     self.workerCleanupInfo = None
     Resource.prepareSystem()
     self.address = None
     # Setting this value at this point will ensure that the toil workflow directory will go to
     # the mesos sandbox if the user hasn't specified --workDir on the command line.
     if not os.getenv('TOIL_WORKDIR'):
         os.environ['TOIL_WORKDIR'] = os.getcwd()
Esempio n. 9
0
def executor():
    """
    Main function of the _toil_kubernetes_executor entrypoint.

    Runs inside the Toil container.

    Responsible for setting up the user script and running the command for the
    job (which may in turn invoke the Toil worker entrypoint).

    """

    logging.basicConfig(level=logging.DEBUG)
    logger.debug("Starting executor")

    if len(sys.argv) != 2:
        logger.error('Executor requires exactly one base64-encoded argument')
        sys.exit(1)

    # Take in a base64-encoded pickled dict as our first argument and decode it
    try:
        # Make sure to encode the text arguments to bytes before base 64 decoding
        job = pickle.loads(base64.b64decode(sys.argv[1].encode('utf-8')))
    except:
        exc_info = sys.exc_info()
        logger.error('Exception while unpickling task: ', exc_info=exc_info)
        sys.exit(1)

    # Set JTRES_ROOT and other global state needed for resource
    # downloading/deployment to work.
    logger.debug('Preparing system for resource download')
    Resource.prepareSystem()

    if 'userScript' in job:
        job['userScript'].register()
    logger.debug("Invoking command: '%s'", job['command'])
    # Construct the job's environment
    jobEnv = dict(os.environ, **job['environment'])
    logger.debug('Using environment variables: %s', jobEnv.keys())

    # Start the child process
    child = subprocess.Popen(job['command'],
                             preexec_fn=lambda: os.setpgrp(),
                             shell=True,
                             env=jobEnv)

    # Reporduce child's exit code
    sys.exit(child.wait())
Esempio n. 10
0
def executor() -> None:
    """
    Main function of the _toil_contained_executor entrypoint.

    Runs inside the Toil container.

    Responsible for setting up the user script and running the command for the
    job (which may in turn invoke the Toil worker entrypoint).

    """

    configure_root_logger()
    set_log_level("DEBUG")
    logger.debug("Starting executor")

    # If we don't manage to run the child, what should our exit code be?
    exit_code = EXIT_STATUS_UNAVAILABLE_VALUE

    if len(sys.argv) != 2:
        logger.error('Executor requires exactly one base64-encoded argument')
        sys.exit(exit_code)

    # Take in a base64-encoded pickled dict as our first argument and decode it
    try:
        # Make sure to encode the text arguments to bytes before base 64 decoding
        job = pickle.loads(base64.b64decode(sys.argv[1].encode('utf-8')))
    except:
        exc_info = sys.exc_info()
        logger.error('Exception while unpickling task: ', exc_info=exc_info)
        sys.exit(exit_code)

    if 'environment' in job:
        # Adopt the job environment into the executor.
        # This lets us use things like TOIL_WORKDIR when figuring out how to talk to other executors.
        logger.debug('Adopting environment: %s', str(job['environment'].keys()))
        for var, value in job['environment'].items():
            os.environ[var] = value

    # Set JTRES_ROOT and other global state needed for resource
    # downloading/deployment to work.
    # TODO: Every worker downloads resources independently.
    # We should have a way to share a resource directory.
    logger.debug('Preparing system for resource download')
    Resource.prepareSystem()
    try:
        if 'userScript' in job:
            job['userScript'].register()

        # Start the child process
        logger.debug("Invoking command: '%s'", job['command'])
        child = subprocess.Popen(job['command'],
                                 preexec_fn=lambda: os.setpgrp(),
                                 shell=True)

        # Reproduce child's exit code
        exit_code = child.wait()
    except:
        # This will print a traceback for us, since exit() in the finally
        # will bypass the normal way of getting one.
        logger.exception('Encountered exception running child')
    finally:
        logger.debug('Cleaning up resources')
        # TODO: Change resource system to use a shared resource directory for everyone.
        # Then move this into worker cleanup somehow
        Resource.cleanSystem()
        logger.debug('Shutting down')
        sys.exit(exit_code)
Esempio n. 11
0
    def _test(self,
              module_name,
              shouldBelongToToil=False,
              expectedContents=None):
        module = ModuleDescriptor.forModule(module_name)
        # Assert basic attributes and properties
        self.assertEqual(module.belongsToToil, shouldBelongToToil)
        self.assertEquals(module.name, module_name)
        if shouldBelongToToil:
            self.assertTrue(module.dirPath.endswith('/src'))

        # Before the module is saved as a resource, localize() and globalize() are identity
        # methods. This should log warnings.
        self.assertIs(module.localize(), module)
        self.assertIs(module.globalize(), module)
        # Create a mock job store ...
        jobStore = MagicMock()
        # ... to generate a fake URL for the resource ...
        url = 'file://foo.zip'
        jobStore.getSharedPublicUrl.return_value = url
        # ... and save the resource to it.
        resource = module.saveAsResourceTo(jobStore)
        # Ensure that the URL generation method is actually called, ...
        jobStore.getSharedPublicUrl.assert_called_once_with(resource.pathHash)
        # ... and that ensure that writeSharedFileStream is called.
        jobStore.writeSharedFileStream.assert_called_once_with(
            resource.pathHash, isProtected=False)
        # Now it gets a bit complicated: Ensure that the context manager returned by the
        # jobStore's writeSharedFileStream() method is entered and that the file handle yielded
        # by the context manager is written to once with the zipped source tree from which
        # 'toil.resource' was orginally imported. Keep the zipped tree around such that we can
        # mock the download later.
        file_handle = jobStore.writeSharedFileStream.return_value.__enter__.return_value
        # The first 0 index selects the first call of write(), the second 0 selects positional
        # instead of keyword arguments, and the third 0 selects the first positional, i.e. the
        # contents. This is a bit brittle since it assumes that all the data is written in a
        # single call to write(). If more calls are made we can easily concatenate them.
        zipFile = file_handle.write.call_args_list[0][0][0]
        self.assertTrue(
            zipFile.startswith('PK'))  # the magic header for ZIP files

        # Check contents if requested
        if expectedContents is not None:
            with ZipFile(BytesIO(zipFile)) as _zipFile:
                self.assertEqual(set(_zipFile.namelist()), expectedContents)

        self.assertEquals(resource.url, url)
        # Now we're on the worker. Prepare the storage for localized resources
        Resource.prepareSystem()
        # Register the resource for subsequent lookup.
        resource.register()
        # Lookup the resource and ensure that the result is equal to but not the same as the
        # original resource. Lookup will also be used when we localize the module that was
        # originally used to create the resource.
        localResource = Resource.lookup(module._resourcePath)
        self.assertEquals(resource, localResource)
        self.assertIsNot(resource, localResource)
        # Now show that we can localize the module using the registered resource. Set up a mock
        # urlopen() that yields the zipped tree ...
        mock_urlopen = MagicMock()
        mock_urlopen.return_value.read.return_value = zipFile
        with patch('toil.resource.urlopen', mock_urlopen):
            # ... and use it to download and unpack the resource
            localModule = module.localize()
        # The name should be equal between original and localized resource ...
        self.assertEquals(module.name, localModule.name)
        # ... but the directory should be different.
        self.assertNotEquals(module.dirPath, localModule.dirPath)
        # Show that we can 'undo' localization. This is necessary when the user script's jobs are
        # invoked on the worker where they generate more child jobs.
        self.assertEquals(localModule.globalize(), module)
Esempio n. 12
0
def executor():
    """
    Main function of the _toil_kubernetes_executor entrypoint.

    Runs inside the Toil container.

    Responsible for setting up the user script and running the command for the
    job (which may in turn invoke the Toil worker entrypoint).

    """

    logging.basicConfig(level=logging.DEBUG)
    logger.debug("Starting executor")
    
    # If we don't manage to run the child, what should our exit code be?
    exit_code = EXIT_STATUS_UNAVAILABLE_VALUE

    if len(sys.argv) != 2:
        logger.error('Executor requires exactly one base64-encoded argument')
        sys.exit(exit_code)

    # Take in a base64-encoded pickled dict as our first argument and decode it
    try:
        # Make sure to encode the text arguments to bytes before base 64 decoding
        job = pickle.loads(base64.b64decode(sys.argv[1].encode('utf-8')))
    except:
        exc_info = sys.exc_info()
        logger.error('Exception while unpickling task: ', exc_info=exc_info)
        sys.exit(exit_code)

    if 'environment' in job:
        # Adopt the job environment into the executor.
        # This lets us use things like TOIL_WORKDIR when figuring out how to talk to other executors.
        logger.debug('Adopting environment: %s', str(job['environment'].keys()))
        for var, value in job['environment'].items():
            os.environ[var] = value
    
    # Set JTRES_ROOT and other global state needed for resource
    # downloading/deployment to work.
    # TODO: Every worker downloads resources independently.
    # We should have a way to share a resource directory.
    logger.debug('Preparing system for resource download')
    Resource.prepareSystem()
    try:
        if 'userScript' in job:
            job['userScript'].register()
            
        # We need to tell other workers in this workflow not to do cleanup now that
        # we are here, or else wait for them to finish. So get the cleanup info
        # that knows where the work dir is.
        cleanupInfo = job['workerCleanupInfo']
        
        # Join a Last Process Standing arena, so we know which process should be
        # responsible for cleanup.
        # We need to use the real workDir, not just the override from cleanupInfo.
        # This needs to happen after the environment is applied.
        arena = LastProcessStandingArena(Toil.getToilWorkDir(cleanupInfo.workDir), 
            cleanupInfo.workflowID + '-kube-executor')
        arena.enter()
        try:
            
            # Start the child process
            logger.debug("Invoking command: '%s'", job['command'])
            child = subprocess.Popen(job['command'],
                                     preexec_fn=lambda: os.setpgrp(),
                                     shell=True)

            # Reproduce child's exit code
            exit_code = child.wait()
            
        finally:
            for _ in arena.leave():
                # We are the last concurrent executor to finish.
                # Do batch system cleanup.
                logger.debug('Cleaning up worker')
                BatchSystemSupport.workerCleanup(cleanupInfo)
    finally:
        logger.debug('Cleaning up resources')
        # TODO: Change resource system to use a shared resource directory for everyone.
        # Then move this into the last-process-standing cleanup
        Resource.cleanSystem()
        logger.debug('Shutting down')
        sys.exit(exit_code)
Esempio n. 13
0
 def shutdown(self, driver):
     log.critical("Shutting down executor...")
     for taskId, pid in self.runningTasks.items():
         self.killTask(driver, taskId)
     Resource.cleanSystem()
     log.critical("Executor shut down")
Esempio n. 14
0
 def __init__(self):
     super(MesosExecutor, self).__init__()
     self.popenLock = threading.Lock()
     self.runningTasks = {}
     Resource.prepareSystem()
Esempio n. 15
0
 def shutdown(self, driver):
     log.critical("Shutting down executor...")
     for taskId, pid in self.runningTasks.items():
         self.killTask(driver, taskId)
     Resource.cleanSystem()
     log.critical("Executor shut down")
Esempio n. 16
0
 def __init__(self):
     super(MesosExecutor, self).__init__()
     self.popenLock = threading.Lock()
     self.runningTasks = {}
     Resource.prepareSystem()
Esempio n. 17
0
    def _test(self, module_name,
              shouldBelongToToil=False, expectedContents=None, allowExtraContents=True):
        module = ModuleDescriptor.forModule(module_name)
        # Assert basic attributes and properties
        self.assertEqual(module.belongsToToil, shouldBelongToToil)
        self.assertEquals(module.name, module_name)
        if shouldBelongToToil:
            self.assertTrue(module.dirPath.endswith('/src'))

        # Before the module is saved as a resource, localize() and globalize() are identity
        # methods. This should log warnings.
        self.assertIs(module.localize(), module)
        self.assertIs(module.globalize(), module)
        # Create a mock job store ...
        jobStore = MagicMock()
        # ... to generate a fake URL for the resource ...
        url = 'file://foo.zip'
        jobStore.getSharedPublicUrl.return_value = url
        # ... and save the resource to it.
        resource = module.saveAsResourceTo(jobStore)
        # Ensure that the URL generation method is actually called, ...
        jobStore.getSharedPublicUrl.assert_called_once_with(resource.pathHash)
        # ... and that ensure that writeSharedFileStream is called.
        jobStore.writeSharedFileStream.assert_called_once_with(resource.pathHash,
                                                               isProtected=False)
        # Now it gets a bit complicated: Ensure that the context manager returned by the
        # jobStore's writeSharedFileStream() method is entered and that the file handle yielded
        # by the context manager is written to once with the zipped source tree from which
        # 'toil.resource' was orginally imported. Keep the zipped tree around such that we can
        # mock the download later.
        file_handle = jobStore.writeSharedFileStream.return_value.__enter__.return_value
        # The first 0 index selects the first call of write(), the second 0 selects positional
        # instead of keyword arguments, and the third 0 selects the first positional, i.e. the
        # contents. This is a bit brittle since it assumes that all the data is written in a
        # single call to write(). If more calls are made we can easily concatenate them.
        zipFile = file_handle.write.call_args_list[0][0][0]
        self.assertTrue(zipFile.startswith('PK'))  # the magic header for ZIP files

        # Check contents if requested
        if expectedContents is not None:
            with ZipFile(BytesIO(zipFile)) as _zipFile:
                actualContents = set(_zipFile.namelist())
                if allowExtraContents:
                    self.assertTrue(actualContents.issuperset(expectedContents))
                else:
                    self.assertEqual(actualContents, expectedContents)

        self.assertEquals(resource.url, url)
        # Now we're on the worker. Prepare the storage for localized resources
        Resource.prepareSystem()
        try:
            # Register the resource for subsequent lookup.
            resource.register()
            # Lookup the resource and ensure that the result is equal to but not the same as the
            # original resource. Lookup will also be used when we localize the module that was
            # originally used to create the resource.
            localResource = Resource.lookup(module._resourcePath)
            self.assertEquals(resource, localResource)
            self.assertIsNot(resource, localResource)
            # Now show that we can localize the module using the registered resource. Set up a mock
            # urlopen() that yields the zipped tree ...
            mock_urlopen = MagicMock()
            mock_urlopen.return_value.read.return_value = zipFile
            with patch('toil.resource.urlopen', mock_urlopen):
                # ... and use it to download and unpack the resource
                localModule = module.localize()
            # The name should be equal between original and localized resource ...
            self.assertEquals(module.name, localModule.name)
            # ... but the directory should be different.
            self.assertNotEquals(module.dirPath, localModule.dirPath)
            # Show that we can 'undo' localization. This is necessary when the user script's jobs
            #  are invoked on the worker where they generate more child jobs.
            self.assertEquals(localModule.globalize(), module)
        finally:
            Resource.cleanSystem()
Esempio n. 18
0
    def createBatchSystem(config, jobStore=None, userScript=None):
        """
        Creates an instance of the batch system specified in the given config. If a job store and
        a user script are given then the user script can be hot deployed into the workflow.

        :param toil.common.Config config: the current configuration

        :param toil.jobStores.abstractJobStore.AbstractJobStore jobStore: an instance of a jobStore

        :param ModuleDescriptor userScript: a handle to the Python module defining the root job

        :return: an instance of a concrete subclass of AbstractBatchSystem

        :rtype: batchSystems.abstractBatchSystem.AbstractBatchSystem
        """
        kwargs = dict(config=config,
                      maxCores=config.maxCores,
                      maxMemory=config.maxMemory,
                      maxDisk=config.maxDisk)

        if config.batchSystem == 'parasol':
            from toil.batchSystems.parasol import ParasolBatchSystem
            batchSystemClass = ParasolBatchSystem

        elif config.batchSystem == 'single_machine' or config.batchSystem == 'singleMachine':
            from toil.batchSystems.singleMachine import SingleMachineBatchSystem
            batchSystemClass = SingleMachineBatchSystem

        elif config.batchSystem == 'gridengine' or config.batchSystem == 'gridEngine':
            from toil.batchSystems.gridengine import GridengineBatchSystem
            batchSystemClass = GridengineBatchSystem

        elif config.batchSystem == 'lsf' or config.batchSystem == 'LSF':
            from toil.batchSystems.lsf import LSFBatchSystem
            batchSystemClass = LSFBatchSystem

        elif config.batchSystem == 'mesos' or config.batchSystem == 'Mesos':
            from toil.batchSystems.mesos.batchSystem import MesosBatchSystem
            batchSystemClass = MesosBatchSystem

            kwargs['masterAddress'] = config.mesosMasterAddress

        elif config.batchSystem == 'slurm' or config.batchSystem == 'Slurm':
            from toil.batchSystems.slurm import SlurmBatchSystem
            batchSystemClass = SlurmBatchSystem

        else:
            raise RuntimeError('Unrecognised batch system: %s' % config.batchSystem)

        if not config.disableCaching and not batchSystemClass.supportsWorkerCleanup():
            raise RuntimeError('%s currently does not support shared caching.  Set the '
                               '--disableCaching flag if you want to '
                               'use this batch system.' % config.batchSystem)
        logger.info('Using the %s' %
                    re.sub("([a-z])([A-Z])", "\g<1> \g<2>", batchSystemClass.__name__).lower())

        if jobStore is not None:
            if userScript is not None:
                if not userScript.belongsToToil and batchSystemClass.supportsHotDeployment():
                    userScriptResource = userScript.saveAsResourceTo(jobStore)
                    with jobStore.writeSharedFileStream('userScript') as f:
                        f.write(userScriptResource.pickle())
                    kwargs['userScript'] = userScriptResource
            else:
                from toil.jobStores.abstractJobStore import NoSuchFileException
                try:
                    with jobStore.readSharedFileStream('userScript') as f:
                        userScriptResource = Resource.unpickle(f.read())
                    kwargs['userScript'] = userScriptResource
                except NoSuchFileException:
                    pass

        return batchSystemClass(**kwargs)