def _run_container(image, container_args, **kwargs): # TODO we could allow configuration of non default socket if 'DOCKER_CLIENT_TIMEOUT' in os.environ: timeout = int(os.environ['DOCKER_CLIENT_TIMEOUT']) client = docker.from_env(version='auto', timeout=timeout) else: client = docker.from_env(version='auto') runtime = kwargs.pop('runtime', None) origRuntime = runtime if runtime is None and nvidia.is_nvidia_image(client.api, image): runtime = 'nvidia' container_args = [str(arg) for arg in container_args] logger.info( 'Running container: image: %s args: %s runtime: %s kwargs: %s' % (image, container_args, runtime, kwargs)) try: try: return client.containers.run(image, container_args, runtime=runtime, **kwargs) except APIError: if origRuntime is None and runtime is not None: return client.containers.run(image, container_args, **kwargs) else: raise except DockerException: logger.exception('Exception when running docker container') raise
def _run_container(image, container_args, **kwargs): # TODO we could allow configuration of non default socket client = docker.from_env(version='auto') runtime = kwargs.pop('runtime', None) origRuntime = runtime if runtime is None and nvidia.is_nvidia_image(client.api, image): runtime = 'nvidia' logger.info( 'Running container: image: %s args: %s runtime: %s kwargs: %s' % (image, container_args, runtime, kwargs)) try: try: return client.containers.run(image, container_args, runtime=runtime, **kwargs) except APIError: if origRuntime is None and runtime is not None: return client.containers.run(image, container_args, **kwargs) else: raise except DockerException: logger.exception('Exception when running docker container') raise
def _run_container(image, container_args, **kwargs): # TODO we could allow configuration of non default socket client = docker.from_env(version='auto') if nvidia.is_nvidia_image(client.api, image): client = nvidia.NvidiaDockerClient.from_env(version='auto') logger.info('Running container: image: %s args: %s kwargs: %s' % (image, container_args, kwargs)) try: return client.containers.run(image, container_args, **kwargs) except nvidia.NvidiaConnectionError: try: logger.info( 'Running nvidia container without nvidia support: image: %s' % image) client = docker.from_env(version='auto') return client.containers.run(image, container_args, **kwargs) except DockerException: logger.exception( 'Exception when running docker container without nvidia support.' ) raise except DockerException: logger.exception('Exception when running docker container') raise
def _run_container(image, args, **kwargs): # TODO we could allow configuration of non default socket client = docker.from_env(version='auto') logger.info('Running container: image: %s args: %s kwargs: %s' % (image, args, kwargs)) try: return client.containers.run(image, args, **kwargs) except DockerException as dex: logger.error(dex) raise
def _run_container(image, container_args, **kwargs): # TODO we could allow configuration of non default socket if 'DOCKER_CLIENT_TIMEOUT' in os.environ: timeout = int(os.environ['DOCKER_CLIENT_TIMEOUT']) client = docker.from_env(version='auto', timeout=timeout) else: client = docker.from_env(version='auto') runtime = kwargs.pop('runtime', None) origRuntime = runtime if runtime is None and nvidia.is_nvidia_image(client.api, image): runtime = 'nvidia' container_args = [str(arg) for arg in container_args] docker_network = _get_docker_network() if docker_network and 'network' not in kwargs: kwargs = kwargs.copy() kwargs['network'] = docker_network logger.info( 'Running container: image: %s args: %s runtime: %s kwargs: %s' % (image, container_args, runtime, kwargs)) try: try: if runtime == 'nvidia' and kwargs.get('device_requests') is None: # Docker < 19.03 required the runtime='nvidia' argument. # Newer versions require a device request for some number of # GPUs. This should handle either version of the docker # daemon. try: device_requests_kwargs = kwargs.copy() device_requests_kwargs['device_requests'] = [ docker.types.DeviceRequest(count=-1, capabilities=[['gpu']]) ] return client.containers.run(image, container_args, **device_requests_kwargs) except (APIError, InvalidVersion): pass return client.containers.run(image, container_args, runtime=runtime, **kwargs) except APIError: if origRuntime is None and runtime is not None: return client.containers.run(image, container_args, **kwargs) else: raise except DockerException: logger.exception('Exception when running docker container') raise
def docker_gc(e): """ Garbage collect containers that have not been run in the last hour using the https://github.com/spotify/docker-gc project's script, which is copied in the same directory as this file. After that, deletes all images that are no longer used by any containers. """ if not _read_bool_from_config('gc', False): return stampfile = os.path.join(config.get('girder_worker', 'tmp_root'), '.dockergcstamp') if os.path.exists(stampfile) and time.time() - os.path.getmtime( stampfile) < MIN_GC_INTERVAL: return else: # touch the file with open(stampfile, 'w') as f: f.write('') logger.info('Garbage collecting docker containers and images.') gc_dir = tempfile.mkdtemp() try: script = os.path.join(os.path.dirname(__file__), 'docker-gc') if not os.path.isfile(script): raise Exception('Docker GC script %s not found.' % script) if not os.access(script, os.X_OK): raise Exception('Docker GC script %s is not executable.' % script) env = os.environ.copy() env['FORCE_CONTAINER_REMOVAL'] = '1' env['STATE_DIR'] = gc_dir env['PID_DIR'] = gc_dir env['GRACE_PERIOD_SECONDS'] = str( _read_from_config('cache_timeout', 3600)) # Handle excluded images excluded = _read_from_config('exclude_images', '').split(',') excluded = [img for img in excluded if img.strip()] if excluded: exclude_file = os.path.join(gc_dir, '.docker-gc-exclude') with open(exclude_file, 'w') as fd: fd.write('\n'.join(excluded) + '\n') env['EXCLUDE_FROM_GC'] = exclude_file p = subprocess.Popen(args=(script, ), env=env) p.wait() # Wait for garbage collection subprocess to finish if p.returncode != 0: raise Exception('Docker GC returned code %d.' % p.returncode) finally: shutil.rmtree(gc_dir)
def docker_gc(e): """ Garbage collect containers that have not been run in the last hour using the https://github.com/spotify/docker-gc project's script, which is copied in the same directory as this file. After that, deletes all images that are no longer used by any containers. """ if not _read_bool_from_config('gc', False): return stampfile = os.path.join(config.get('girder_worker', 'tmp_root'), '.dockergcstamp') if os.path.exists(stampfile) and time.time() - os.path.getmtime(stampfile) < MIN_GC_INTERVAL: return else: # touch the file with open(stampfile, 'w') as f: f.write('') logger.info('Garbage collecting docker containers and images.') gc_dir = tempfile.mkdtemp() try: script = os.path.join(os.path.dirname(__file__), 'docker-gc') if not os.path.isfile(script): raise Exception('Docker GC script %s not found.' % script) if not os.access(script, os.X_OK): raise Exception('Docker GC script %s is not executable.' % script) env = os.environ.copy() env['FORCE_CONTAINER_REMOVAL'] = '1' env['STATE_DIR'] = gc_dir env['PID_DIR'] = gc_dir env['GRACE_PERIOD_SECONDS'] = str(_read_from_config('cache_timeout', 3600)) # Handle excluded images excluded = _read_from_config('exclude_images', '').split(',') excluded = [img for img in excluded if img.strip()] if excluded: exclude_file = os.path.join(gc_dir, '.docker-gc-exclude') with open(exclude_file, 'w') as fd: fd.write('\n'.join(excluded) + '\n') env['EXCLUDE_FROM_GC'] = exclude_file p = subprocess.Popen(args=(script,), env=env) p.wait() # Wait for garbage collection subprocess to finish if p.returncode != 0: raise Exception('Docker GC returned code %d.' % p.returncode) finally: shutil.rmtree(gc_dir)
# pre-3.0 plugins worked, it still falls back to the built-in version of worker # and functions correctly. The logic below eliminates that error message. # First ensure girder is installed, otherwise it doesn't make sense to import # this module at all. import girder # noqa from girder_worker import logger # Detect if girder>=3 is installed by checking an import that was added in 3.0. _isGirder3 = False try: from girder.plugin import getPlugin, GirderPlugin _isGirder3 = True except ImportError: logger.info( 'Girder 2.x is detected skipping incompatible entrypoint definition.') # If girder>=3 is installed, it is safe to continue defining the plugin class, otherwise # just define a dummy class to prevent error messages from propagating. if _isGirder3: from girder import events from girder.constants import AccessType from girder_jobs.models.job import Job from .api.worker import Worker from . import event_handlers class WorkerPlugin(GirderPlugin): DISPLAY_NAME = 'Worker' CLIENT_SOURCE_PATH = 'web_client'
def _docker_run(task, image, pull_image=True, entrypoint=None, container_args=None, volumes=None, remove_container=True, stream_connectors=None, **kwargs): volumes = volumes or {} stream_connectors = stream_connectors or [] container_args = container_args or [] if pull_image: logger.info('Pulling Docker image: %s', image) _pull_image(image) if entrypoint is not None and not isinstance(entrypoint, (list, tuple)): entrypoint = [entrypoint] run_kwargs = {'tty': False, 'volumes': volumes, 'detach': True} # Allow run args to be overridden,filter out any we don't want to override extra_run_kwargs = { k: v for k, v in kwargs.items() if k not in BLACKLISTED_DOCKER_RUN_ARGS } run_kwargs.update(extra_run_kwargs) if entrypoint is not None: run_kwargs['entrypoint'] = entrypoint container_args, read_streams, write_streams = _handle_streaming_args( container_args) for connector in stream_connectors: if isinstance(connector, FDReadStreamConnector): read_streams.append(connector) elif isinstance(connector, FDWriteStreamConnector): write_streams.append(connector) else: raise TypeError( "Expected 'FDReadStreamConnector' or 'FDWriterStreamConnector', received '%s'" % type(connector)) # We need to open any read streams before starting the container, so the # underling named pipes are opened for read. for stream in read_streams: stream.open() container = _run_container(image, container_args, **run_kwargs) try: _run_select_loop(task, container, read_streams, write_streams) finally: if container and remove_container: container.reload() # If the container is still running issue a warning if container.status == 'running': logger.warning('Container is still running, unable to remove.') else: container.remove() # return an array of None's equal to number of entries in the girder_result_hooks # header, in order to trigger processing of the container outputs. results = [] if hasattr(task.request, 'girder_result_hooks'): results = (None, ) * len(task.request.girder_result_hooks) return results
def run(task, inputs, outputs, task_inputs, task_outputs, **kwargs): image = task['docker_image'] if task.get('pull_image', True): logger.info('Pulling Docker image: %s', image) _pull_image(image) progress_pipe = task.get('progress_pipe', False) tempdir = kwargs.get('_tempdir') job_mgr = kwargs.get('_job_manager') args = _expand_args(task.get('container_args', []), inputs, task_inputs, tempdir) ipipes, opipes = _setup_pipes( task_inputs, inputs, task_outputs, outputs, tempdir, job_mgr, progress_pipe) if 'entrypoint' in task: if isinstance(task['entrypoint'], (list, tuple)): ep_args = task['entrypoint'] else: ep_args = [task['entrypoint']] else: ep_args = [] run_kwargs = { 'tty': True, 'volumes': { tempdir: { 'bind': DATA_VOLUME, 'mode': 'rw' } }, 'detach': True } if ep_args: run_kwargs['entrypoint'] = ep_args # Allow run args to overriden extra_run_kwargs = task.get('docker_run_args', {}) # Filter out any we don't want to override extra_run_kwargs = {k: v for k, v in extra_run_kwargs.items() if k not in BLACKLISTED_DOCKER_RUN_ARGS} run_kwargs.update(extra_run_kwargs) container = _run_container(image, args, **run_kwargs) try: _run_select_loop(container, opipes, ipipes) finally: if container and kwargs.get('_rm_container'): container.remove() for name, spec in task_outputs.iteritems(): if spec.get('target') == 'filepath' and not spec.get('stream'): path = spec.get('path', name) if not path.startswith('/'): # Assume relative paths are relative to the data volume path = os.path.join(DATA_VOLUME, path) # Convert data volume refs to the temp dir on the host path = path.replace(DATA_VOLUME, tempdir, 1) if not os.path.exists(path): raise Exception('Output filepath %s does not exist.' % path) outputs[name]['script_data'] = path