Exemple #1
0
class WorkerCleanupContext:
    """
    Context manager used by :class:`BatchSystemCleanupSupport` to implement
    cleanup on a node after the last worker is done working.
    
    Gets wrapped around the worker's work.
    """
    def __init__(self, workerCleanupInfo):
        """
        Wrap the given workerCleanupInfo in a context manager.
        
        :param WorkerCleanupInfo workerCleanupInfo: Info to use to clean up the worker if we are
                                                    the last to exit the context manager.
        """

        self.workerCleanupInfo = workerCleanupInfo
        self.arena = None

    def __enter__(self):
        # Set up an arena so we know who is the last worker to leave
        self.arena = LastProcessStandingArena(
            Toil.getToilWorkDir(self.workerCleanupInfo.workDir),
            self.workerCleanupInfo.workflowID + '-cleanup')
        self.arena.enter()

    def __exit__(self, type, value, traceback):
        for _ in self.arena.leave():
            # We are the last concurrent worker to finish.
            # Do batch system cleanup.
            logger.debug('Cleaning up worker')
            BatchSystemSupport.workerCleanup(self.workerCleanupInfo)
        # We have nothing to say about exceptions
        return False
Exemple #2
0
def _testLastProcessStandingTask(scope, arena_name, number):
    try:
        arena = LastProcessStandingArena(scope, arena_name)

        arena.enter()
        log.info('PID %d = num %d entered arena', os.getpid(), number)
        try:
            # We all make files
            my_precious = os.path.join(scope, 'precious' + str(number))

            # Put our name there
            with open(my_precious, 'w') as out_stream:
                out_stream.write(str(number))

            # Wait
            time.sleep(random.random() * 0.01)

            # Make sure our file is still there unmodified
            assert os.path.exists(
                my_precious), "Precious file {} has been stolen!".format(
                    my_precious)
            with open(my_precious, 'r') as in_stream:
                seen = in_stream.read().rstrip()
                assert seen == str(
                    number
                ), "We are {} but saw {} in our precious file!".format(
                    number, seen)
        finally:
            was_last = False
            for _ in arena.leave():
                was_last = True
                log.info('PID %d = num %d is last standing', os.getpid(),
                         number)

                # Clean up all the files
                for filename in os.listdir(scope):
                    if filename.startswith('precious'):
                        log.info('PID %d = num %d cleaning up %s', os.getpid(),
                                 number, filename)
                        os.unlink(os.path.join(scope, filename))

            log.info('PID %d = num %d left arena', os.getpid(), number)

        return True
    except:
        traceback.print_exc()
        return False
Exemple #3
0
def executor():
    """
    Main function of the _toil_kubernetes_executor entrypoint.

    Runs inside the Toil container.

    Responsible for setting up the user script and running the command for the
    job (which may in turn invoke the Toil worker entrypoint).

    """

    logging.basicConfig(level=logging.DEBUG)
    logger.debug("Starting executor")
    
    # If we don't manage to run the child, what should our exit code be?
    exit_code = EXIT_STATUS_UNAVAILABLE_VALUE

    if len(sys.argv) != 2:
        logger.error('Executor requires exactly one base64-encoded argument')
        sys.exit(exit_code)

    # Take in a base64-encoded pickled dict as our first argument and decode it
    try:
        # Make sure to encode the text arguments to bytes before base 64 decoding
        job = pickle.loads(base64.b64decode(sys.argv[1].encode('utf-8')))
    except:
        exc_info = sys.exc_info()
        logger.error('Exception while unpickling task: ', exc_info=exc_info)
        sys.exit(exit_code)

    if 'environment' in job:
        # Adopt the job environment into the executor.
        # This lets us use things like TOIL_WORKDIR when figuring out how to talk to other executors.
        logger.debug('Adopting environment: %s', str(job['environment'].keys()))
        for var, value in job['environment'].items():
            os.environ[var] = value
    
    # Set JTRES_ROOT and other global state needed for resource
    # downloading/deployment to work.
    # TODO: Every worker downloads resources independently.
    # We should have a way to share a resource directory.
    logger.debug('Preparing system for resource download')
    Resource.prepareSystem()
    try:
        if 'userScript' in job:
            job['userScript'].register()
            
        # We need to tell other workers in this workflow not to do cleanup now that
        # we are here, or else wait for them to finish. So get the cleanup info
        # that knows where the work dir is.
        cleanupInfo = job['workerCleanupInfo']
        
        # Join a Last Process Standing arena, so we know which process should be
        # responsible for cleanup.
        # We need to use the real workDir, not just the override from cleanupInfo.
        # This needs to happen after the environment is applied.
        arena = LastProcessStandingArena(Toil.getToilWorkDir(cleanupInfo.workDir), 
            cleanupInfo.workflowID + '-kube-executor')
        arena.enter()
        try:
            
            # Start the child process
            logger.debug("Invoking command: '%s'", job['command'])
            child = subprocess.Popen(job['command'],
                                     preexec_fn=lambda: os.setpgrp(),
                                     shell=True)

            # Reproduce child's exit code
            exit_code = child.wait()
            
        finally:
            for _ in arena.leave():
                # We are the last concurrent executor to finish.
                # Do batch system cleanup.
                logger.debug('Cleaning up worker')
                BatchSystemSupport.workerCleanup(cleanupInfo)
    finally:
        logger.debug('Cleaning up resources')
        # TODO: Change resource system to use a shared resource directory for everyone.
        # Then move this into the last-process-standing cleanup
        Resource.cleanSystem()
        logger.debug('Shutting down')
        sys.exit(exit_code)
Exemple #4
0
 def __enter__(self):
     # Set up an arena so we know who is the last worker to leave
     self.arena = LastProcessStandingArena(
         Toil.getToilWorkDir(self.workerCleanupInfo.workDir),
         self.workerCleanupInfo.workflowID + '-cleanup')
     self.arena.enter()