Esempio n. 1
0
def _run_container(image, container_args, **kwargs):
    # TODO we could allow configuration of non default socket
    if 'DOCKER_CLIENT_TIMEOUT' in os.environ:
        timeout = int(os.environ['DOCKER_CLIENT_TIMEOUT'])
        client = docker.from_env(version='auto', timeout=timeout)
    else:
        client = docker.from_env(version='auto')

    runtime = kwargs.pop('runtime', None)
    origRuntime = runtime
    if runtime is None and nvidia.is_nvidia_image(client.api, image):
        runtime = 'nvidia'

    container_args = [str(arg) for arg in container_args]

    logger.info(
        'Running container: image: %s args: %s runtime: %s kwargs: %s' %
        (image, container_args, runtime, kwargs))
    try:
        try:
            return client.containers.run(image,
                                         container_args,
                                         runtime=runtime,
                                         **kwargs)
        except APIError:
            if origRuntime is None and runtime is not None:
                return client.containers.run(image, container_args, **kwargs)
            else:
                raise
    except DockerException:
        logger.exception('Exception when running docker container')
        raise
Esempio n. 2
0
def _run_container(image, container_args, **kwargs):
    # TODO we could allow configuration of non default socket
    client = docker.from_env(version='auto')

    runtime = kwargs.pop('runtime', None)
    origRuntime = runtime
    if runtime is None and nvidia.is_nvidia_image(client.api, image):
        runtime = 'nvidia'

    logger.info(
        'Running container: image: %s args: %s runtime: %s kwargs: %s' %
        (image, container_args, runtime, kwargs))
    try:
        try:
            return client.containers.run(image,
                                         container_args,
                                         runtime=runtime,
                                         **kwargs)
        except APIError:
            if origRuntime is None and runtime is not None:
                return client.containers.run(image, container_args, **kwargs)
            else:
                raise
    except DockerException:
        logger.exception('Exception when running docker container')
        raise
Esempio n. 3
0
def _run_container(image, container_args, **kwargs):
    # TODO we could allow configuration of non default socket
    client = docker.from_env(version='auto')
    if nvidia.is_nvidia_image(client.api, image):
        client = nvidia.NvidiaDockerClient.from_env(version='auto')

    logger.info('Running container: image: %s args: %s kwargs: %s' %
                (image, container_args, kwargs))
    try:
        return client.containers.run(image, container_args, **kwargs)
    except nvidia.NvidiaConnectionError:
        try:
            logger.info(
                'Running nvidia container without nvidia support: image: %s' %
                image)
            client = docker.from_env(version='auto')
            return client.containers.run(image, container_args, **kwargs)
        except DockerException:
            logger.exception(
                'Exception when running docker container without nvidia support.'
            )
            raise
    except DockerException:
        logger.exception('Exception when running docker container')
        raise
Esempio n. 4
0
def _run_container(image, args, **kwargs):
    # TODO we could allow configuration of non default socket
    client = docker.from_env(version='auto')

    logger.info('Running container: image: %s args: %s kwargs: %s' % (image, args, kwargs))
    try:
        return client.containers.run(image, args, **kwargs)
    except DockerException as dex:
        logger.error(dex)
        raise
Esempio n. 5
0
def _run_container(image, container_args, **kwargs):
    # TODO we could allow configuration of non default socket
    if 'DOCKER_CLIENT_TIMEOUT' in os.environ:
        timeout = int(os.environ['DOCKER_CLIENT_TIMEOUT'])
        client = docker.from_env(version='auto', timeout=timeout)
    else:
        client = docker.from_env(version='auto')

    runtime = kwargs.pop('runtime', None)
    origRuntime = runtime
    if runtime is None and nvidia.is_nvidia_image(client.api, image):
        runtime = 'nvidia'

    container_args = [str(arg) for arg in container_args]

    docker_network = _get_docker_network()
    if docker_network and 'network' not in kwargs:
        kwargs = kwargs.copy()
        kwargs['network'] = docker_network

    logger.info(
        'Running container: image: %s args: %s runtime: %s kwargs: %s' %
        (image, container_args, runtime, kwargs))
    try:
        try:
            if runtime == 'nvidia' and kwargs.get('device_requests') is None:
                # Docker < 19.03 required the runtime='nvidia' argument.
                # Newer versions require a device request for some number of
                # GPUs.  This should handle either version of the docker
                # daemon.
                try:
                    device_requests_kwargs = kwargs.copy()
                    device_requests_kwargs['device_requests'] = [
                        docker.types.DeviceRequest(count=-1,
                                                   capabilities=[['gpu']])
                    ]
                    return client.containers.run(image, container_args,
                                                 **device_requests_kwargs)
                except (APIError, InvalidVersion):
                    pass
            return client.containers.run(image,
                                         container_args,
                                         runtime=runtime,
                                         **kwargs)
        except APIError:
            if origRuntime is None and runtime is not None:
                return client.containers.run(image, container_args, **kwargs)
            else:
                raise
    except DockerException:
        logger.exception('Exception when running docker container')
        raise
Esempio n. 6
0
def docker_gc(e):
    """
    Garbage collect containers that have not been run in the last hour using the
    https://github.com/spotify/docker-gc project's script, which is copied in
    the same directory as this file. After that, deletes all images that are
    no longer used by any containers.
    """
    if not _read_bool_from_config('gc', False):
        return
    stampfile = os.path.join(config.get('girder_worker', 'tmp_root'),
                             '.dockergcstamp')
    if os.path.exists(stampfile) and time.time() - os.path.getmtime(
            stampfile) < MIN_GC_INTERVAL:
        return
    else:  # touch the file
        with open(stampfile, 'w') as f:
            f.write('')

    logger.info('Garbage collecting docker containers and images.')
    gc_dir = tempfile.mkdtemp()

    try:
        script = os.path.join(os.path.dirname(__file__), 'docker-gc')
        if not os.path.isfile(script):
            raise Exception('Docker GC script %s not found.' % script)
        if not os.access(script, os.X_OK):
            raise Exception('Docker GC script %s is not executable.' % script)

        env = os.environ.copy()
        env['FORCE_CONTAINER_REMOVAL'] = '1'
        env['STATE_DIR'] = gc_dir
        env['PID_DIR'] = gc_dir
        env['GRACE_PERIOD_SECONDS'] = str(
            _read_from_config('cache_timeout', 3600))

        # Handle excluded images
        excluded = _read_from_config('exclude_images', '').split(',')
        excluded = [img for img in excluded if img.strip()]
        if excluded:
            exclude_file = os.path.join(gc_dir, '.docker-gc-exclude')
            with open(exclude_file, 'w') as fd:
                fd.write('\n'.join(excluded) + '\n')
            env['EXCLUDE_FROM_GC'] = exclude_file

        p = subprocess.Popen(args=(script, ), env=env)
        p.wait()  # Wait for garbage collection subprocess to finish

        if p.returncode != 0:
            raise Exception('Docker GC returned code %d.' % p.returncode)
    finally:
        shutil.rmtree(gc_dir)
Esempio n. 7
0
def docker_gc(e):
    """
    Garbage collect containers that have not been run in the last hour using the
    https://github.com/spotify/docker-gc project's script, which is copied in
    the same directory as this file. After that, deletes all images that are
    no longer used by any containers.
    """
    if not _read_bool_from_config('gc', False):
        return
    stampfile = os.path.join(config.get('girder_worker', 'tmp_root'), '.dockergcstamp')
    if os.path.exists(stampfile) and time.time() - os.path.getmtime(stampfile) < MIN_GC_INTERVAL:
        return
    else:  # touch the file
        with open(stampfile, 'w') as f:
            f.write('')

    logger.info('Garbage collecting docker containers and images.')
    gc_dir = tempfile.mkdtemp()

    try:
        script = os.path.join(os.path.dirname(__file__), 'docker-gc')
        if not os.path.isfile(script):
            raise Exception('Docker GC script %s not found.' % script)
        if not os.access(script, os.X_OK):
            raise Exception('Docker GC script %s is not executable.' % script)

        env = os.environ.copy()
        env['FORCE_CONTAINER_REMOVAL'] = '1'
        env['STATE_DIR'] = gc_dir
        env['PID_DIR'] = gc_dir
        env['GRACE_PERIOD_SECONDS'] = str(_read_from_config('cache_timeout', 3600))

        # Handle excluded images
        excluded = _read_from_config('exclude_images', '').split(',')
        excluded = [img for img in excluded if img.strip()]
        if excluded:
            exclude_file = os.path.join(gc_dir, '.docker-gc-exclude')
            with open(exclude_file, 'w') as fd:
                fd.write('\n'.join(excluded) + '\n')
            env['EXCLUDE_FROM_GC'] = exclude_file

        p = subprocess.Popen(args=(script,), env=env)
        p.wait()  # Wait for garbage collection subprocess to finish

        if p.returncode != 0:
            raise Exception('Docker GC returned code %d.' % p.returncode)
    finally:
        shutil.rmtree(gc_dir)
Esempio n. 8
0
# pre-3.0 plugins worked, it still falls back to the built-in version of worker
# and functions correctly.  The logic below eliminates that error message.

# First ensure girder is installed, otherwise it doesn't make sense to import
# this module at all.
import girder  # noqa

from girder_worker import logger

# Detect if girder>=3 is installed by checking an import that was added in 3.0.
_isGirder3 = False
try:
    from girder.plugin import getPlugin, GirderPlugin
    _isGirder3 = True
except ImportError:
    logger.info(
        'Girder 2.x is detected skipping incompatible entrypoint definition.')

# If girder>=3 is installed, it is safe to continue defining the plugin class, otherwise
# just define a dummy class to prevent error messages from propagating.
if _isGirder3:
    from girder import events
    from girder.constants import AccessType
    from girder_jobs.models.job import Job

    from .api.worker import Worker
    from . import event_handlers

    class WorkerPlugin(GirderPlugin):
        DISPLAY_NAME = 'Worker'
        CLIENT_SOURCE_PATH = 'web_client'
Esempio n. 9
0
def _docker_run(task,
                image,
                pull_image=True,
                entrypoint=None,
                container_args=None,
                volumes=None,
                remove_container=True,
                stream_connectors=None,
                **kwargs):
    volumes = volumes or {}
    stream_connectors = stream_connectors or []
    container_args = container_args or []

    if pull_image:
        logger.info('Pulling Docker image: %s', image)
        _pull_image(image)

    if entrypoint is not None and not isinstance(entrypoint, (list, tuple)):
        entrypoint = [entrypoint]

    run_kwargs = {'tty': False, 'volumes': volumes, 'detach': True}

    # Allow run args to be overridden,filter out any we don't want to override
    extra_run_kwargs = {
        k: v
        for k, v in kwargs.items() if k not in BLACKLISTED_DOCKER_RUN_ARGS
    }
    run_kwargs.update(extra_run_kwargs)

    if entrypoint is not None:
        run_kwargs['entrypoint'] = entrypoint

    container_args, read_streams, write_streams = _handle_streaming_args(
        container_args)

    for connector in stream_connectors:
        if isinstance(connector, FDReadStreamConnector):
            read_streams.append(connector)
        elif isinstance(connector, FDWriteStreamConnector):
            write_streams.append(connector)
        else:
            raise TypeError(
                "Expected 'FDReadStreamConnector' or 'FDWriterStreamConnector', received '%s'"
                % type(connector))

    # We need to open any read streams before starting the container, so the
    # underling named pipes are opened for read.
    for stream in read_streams:
        stream.open()

    container = _run_container(image, container_args, **run_kwargs)
    try:
        _run_select_loop(task, container, read_streams, write_streams)
    finally:
        if container and remove_container:
            container.reload()
            # If the container is still running issue a warning
            if container.status == 'running':
                logger.warning('Container is still running, unable to remove.')
            else:
                container.remove()

    # return an array of None's equal to number of entries in the girder_result_hooks
    # header, in order to trigger processing of the container outputs.
    results = []
    if hasattr(task.request, 'girder_result_hooks'):
        results = (None, ) * len(task.request.girder_result_hooks)

    return results
Esempio n. 10
0
def run(task, inputs, outputs, task_inputs, task_outputs, **kwargs):
    image = task['docker_image']

    if task.get('pull_image', True):
        logger.info('Pulling Docker image: %s', image)
        _pull_image(image)

    progress_pipe = task.get('progress_pipe', False)

    tempdir = kwargs.get('_tempdir')
    job_mgr = kwargs.get('_job_manager')
    args = _expand_args(task.get('container_args', []), inputs, task_inputs, tempdir)

    ipipes, opipes = _setup_pipes(
        task_inputs, inputs, task_outputs, outputs, tempdir, job_mgr, progress_pipe)

    if 'entrypoint' in task:
        if isinstance(task['entrypoint'], (list, tuple)):
            ep_args = task['entrypoint']
        else:
            ep_args = [task['entrypoint']]
    else:
        ep_args = []

    run_kwargs = {
        'tty': True,
        'volumes': {
            tempdir: {
                'bind': DATA_VOLUME,
                'mode': 'rw'
            }
        },
        'detach': True
    }

    if ep_args:
        run_kwargs['entrypoint'] = ep_args

    # Allow run args to overriden
    extra_run_kwargs = task.get('docker_run_args', {})
    # Filter out any we don't want to override
    extra_run_kwargs = {k: v for k, v in extra_run_kwargs.items() if k not
                        in BLACKLISTED_DOCKER_RUN_ARGS}
    run_kwargs.update(extra_run_kwargs)

    container = _run_container(image, args, **run_kwargs)

    try:
        _run_select_loop(container, opipes, ipipes)
    finally:
        if container and kwargs.get('_rm_container'):
            container.remove()

    for name, spec in task_outputs.iteritems():
        if spec.get('target') == 'filepath' and not spec.get('stream'):
            path = spec.get('path', name)
            if not path.startswith('/'):
                # Assume relative paths are relative to the data volume
                path = os.path.join(DATA_VOLUME, path)

            # Convert data volume refs to the temp dir on the host
            path = path.replace(DATA_VOLUME, tempdir, 1)
            if not os.path.exists(path):
                raise Exception('Output filepath %s does not exist.' % path)
            outputs[name]['script_data'] = path