def _run_container(image, container_args, **kwargs): # TODO we could allow configuration of non default socket client = docker.from_env(version='auto') runtime = kwargs.pop('runtime', None) origRuntime = runtime if runtime is None and nvidia.is_nvidia_image(client.api, image): runtime = 'nvidia' logger.info( 'Running container: image: %s args: %s runtime: %s kwargs: %s' % (image, container_args, runtime, kwargs)) try: try: return client.containers.run(image, container_args, runtime=runtime, **kwargs) except APIError: if origRuntime is None and runtime is not None: return client.containers.run(image, container_args, **kwargs) else: raise except DockerException: logger.exception('Exception when running docker container') raise
def task_cleanup(e): """ Since files written by docker containers are owned by root, we can't clean them up in the worker process since that typically doesn't run as root. So, we run a lightweight container to make the temp dir cleanable. """ from .executor import DATA_VOLUME if e.info['task']['mode'] == 'docker' and '_tempdir' in e.info['kwargs']: tmpdir = e.info['kwargs']['_tempdir'] client = docker.from_env(version='auto') config = { 'tty': True, 'volumes': { tmpdir: { 'bind': DATA_VOLUME, 'mode': 'rw' } }, 'detach': False, 'remove': True } args = ['chmod', '-R', 'a+rw', DATA_VOLUME] try: client.containers.run('busybox:latest', args, **config) except DockerException as dex: logger.error('Error setting perms on docker tempdir %s.' % tmpdir) logger.exception(dex) raise
def chmod_writable(host_paths): """ Since files written by docker containers are owned by root, we can't clean them up in the worker process since that typically doesn't run as root. So, we run a lightweight container to make the temp dir cleanable. """ if not isinstance(host_paths, (list, tuple)): host_paths = (host_paths, ) client = docker.from_env(version='auto') config = {'tty': True, 'volumes': {}, 'detach': False, 'remove': True} container_paths = [] for host_path in host_paths: container_path = os.path.join(CONTAINER_PATH, uuid.uuid4().hex) container_paths.append(container_path) config['volumes'][host_path] = {'bind': container_path, 'mode': 'rw'} args = ['chmod', '-R', 'a+rw'] + container_paths try: client.containers.run('busybox:latest', args, **config) except DockerException: logger.exception('Error setting perms on docker volumes %s.' % host_paths) raise
def _run_container(image, container_args, **kwargs): # TODO we could allow configuration of non default socket if 'DOCKER_CLIENT_TIMEOUT' in os.environ: timeout = int(os.environ['DOCKER_CLIENT_TIMEOUT']) client = docker.from_env(version='auto', timeout=timeout) else: client = docker.from_env(version='auto') runtime = kwargs.pop('runtime', None) origRuntime = runtime if runtime is None and nvidia.is_nvidia_image(client.api, image): runtime = 'nvidia' container_args = [str(arg) for arg in container_args] logger.info( 'Running container: image: %s args: %s runtime: %s kwargs: %s' % (image, container_args, runtime, kwargs)) try: try: return client.containers.run(image, container_args, runtime=runtime, **kwargs) except APIError: if origRuntime is None and runtime is not None: return client.containers.run(image, container_args, **kwargs) else: raise except DockerException: logger.exception('Exception when running docker container') raise
def _run_container(image, container_args, **kwargs): # TODO we could allow configuration of non default socket client = docker.from_env(version='auto') if nvidia.is_nvidia_image(client.api, image): client = nvidia.NvidiaDockerClient.from_env(version='auto') logger.info('Running container: image: %s args: %s kwargs: %s' % (image, container_args, kwargs)) try: return client.containers.run(image, container_args, **kwargs) except nvidia.NvidiaConnectionError: try: logger.info( 'Running nvidia container without nvidia support: image: %s' % image) client = docker.from_env(version='auto') return client.containers.run(image, container_args, **kwargs) except DockerException: logger.exception( 'Exception when running docker container without nvidia support.' ) raise except DockerException: logger.exception('Exception when running docker container') raise
def girder_before_task_publish(sender=None, body=None, exchange=None, routing_key=None, headers=None, properties=None, declare=None, retry_policy=None, **kwargs): if is_builtin_celery_task(sender): return job = None try: context = get_context() if 'jobInfoSpec' not in headers: job = context.create_task_job( Task.girder_job_defaults(), sender=sender, body=body, exchange=exchange, routing_key=routing_key, headers=headers, properties=properties, declare=declare, retry_policy=retry_policy, **kwargs) if 'girder_api_url' not in headers: context.attach_girder_api_url(sender=sender, body=body, exchange=exchange, routing_key=routing_key, headers=headers, properties=properties, declare=declare, retry_policy=retry_policy, **kwargs) if 'girder_client_token' not in headers: context.attach_girder_client_token(sender=sender, body=body, exchange=exchange, routing_key=routing_key, headers=headers, properties=properties, declare=declare, retry_policy=retry_policy, **kwargs) if 'girder_result_hooks' in headers: if job is not None: for result_hook in headers['girder_result_hooks']: if isinstance(result_hook, ResultTransform): result_hook.job = job # Celery task headers are not automatically serialized by celery # before being passed off to ampq for byte packing. We will have # to do that here. p = jsonpickle.pickler.Pickler() headers['girder_result_hooks'] = \ [p.flatten(grh) for grh in headers['girder_result_hooks']] # Finally, remove all reserved_options from headers for key in Task.reserved_options: headers.pop(key, None) except Exception: logger.exception('An error occurred in girder_before_task_publish.') raise
def _pull_image(image): """ Pulls the specified Docker image onto this worker. """ client = docker.from_env(version='auto') try: client.images.pull(image) except DockerException: logger.exception('Error pulling Docker image %s:' % image) raise
def _pull_image(image): """ Pulls the specified Docker image onto this worker. """ client = docker.from_env(version='auto') try: client.images.pull(image) except DockerException as dex: logger.error('Error pulling Docker image %s:' % image) logger.exception(dex) raise
def _run_container(image, container_args, **kwargs): # TODO we could allow configuration of non default socket if 'DOCKER_CLIENT_TIMEOUT' in os.environ: timeout = int(os.environ['DOCKER_CLIENT_TIMEOUT']) client = docker.from_env(version='auto', timeout=timeout) else: client = docker.from_env(version='auto') runtime = kwargs.pop('runtime', None) origRuntime = runtime if runtime is None and nvidia.is_nvidia_image(client.api, image): runtime = 'nvidia' container_args = [str(arg) for arg in container_args] docker_network = _get_docker_network() if docker_network and 'network' not in kwargs: kwargs = kwargs.copy() kwargs['network'] = docker_network logger.info( 'Running container: image: %s args: %s runtime: %s kwargs: %s' % (image, container_args, runtime, kwargs)) try: try: if runtime == 'nvidia' and kwargs.get('device_requests') is None: # Docker < 19.03 required the runtime='nvidia' argument. # Newer versions require a device request for some number of # GPUs. This should handle either version of the docker # daemon. try: device_requests_kwargs = kwargs.copy() device_requests_kwargs['device_requests'] = [ docker.types.DeviceRequest(count=-1, capabilities=[['gpu']]) ] return client.containers.run(image, container_args, **device_requests_kwargs) except (APIError, InvalidVersion): pass return client.containers.run(image, container_args, runtime=runtime, **kwargs) except APIError: if origRuntime is None and runtime is not None: return client.containers.run(image, container_args, **kwargs) else: raise except DockerException: logger.exception('Exception when running docker container') raise
def _get_docker_network(): try: ip = socket.gethostbyname(socket.gethostname()) if 'DOCKER_CLIENT_TIMEOUT' in os.environ: timeout = int(os.environ['DOCKER_CLIENT_TIMEOUT']) client = docker.from_env(version='auto', timeout=timeout) else: client = docker.from_env(version='auto') for container in client.containers.list(all=True, filters={'status': 'running'}): for nw in container.attrs['NetworkSettings']['Networks'].values(): if nw['IPAddress'] == ip: return 'container:%s' % container.id except Exception: logger.exception('Failed to get docker network')
def girder_before_task_publish(sender=None, body=None, exchange=None, routing_key=None, headers=None, properties=None, declare=None, retry_policy=None, **kwargs): try: if 'jobInfoSpec' not in headers: try: # Note: If we can import these objects from the girder packages we # assume our producer is in a girder REST request. This allows # us to create the job model's directly. Otherwise there will be an # ImportError and we can create the job via a REST request using # the jobInfoSpec in headers. from girder.utility.model_importer import ModelImporter from girder.plugins.worker import utils from girder.api.rest import getCurrentUser job_model = ModelImporter.model('job', 'jobs') user = headers.pop('girder_user', getCurrentUser()) # Sanitize any Transform objects task_args = tuple(_walk_obj(body[0], _maybe_model_repr)) task_kwargs = _walk_obj(body[1], _maybe_model_repr) job = job_model.createJob( **{ 'title': headers.pop('girder_job_title', Task._girder_job_title), 'type': headers.pop('girder_job_type', Task._girder_job_type), 'handler': headers.pop('girder_job_handler', Task._girder_job_handler), 'public': headers.pop('girder_job_public', Task._girder_job_public), 'user': user, 'args': task_args, 'kwargs': task_kwargs, 'otherFields': dict(celeryTaskId=headers['id'], **headers.pop('girder_job_other_fields', Task._girder_job_other_fields)) }) headers['jobInfoSpec'] = utils.jobInfoSpec(job) except ImportError: # TODO: Check for self.job_manager to see if we have # tokens etc to contact girder and create a job model # we may be in a chain or a chord or some-such pass if 'girder_api_url' not in headers: try: from girder.plugins.worker import utils headers['girder_api_url'] = utils.getWorkerApiUrl() except ImportError: # TODO: handle situation where girder_worker is producing # the message Note - this may not come up at all # depending on how we pass girder_api_url through to # the next task (e.g. in the context of chaining # events) pass if 'girder_client_token' not in headers: try: from girder.utility.model_importer import ModelImporter headers['girder_client_token'] = \ ModelImporter.model('token').createToken() except ImportError: # TODO: handle situation where girder_worker is producing # the message Note - this may not come up at all # depending on how we pass girder_token through to # the next task (e.g. in the context of chaining # events) pass if 'girder_result_hooks' in headers: # Celery task headers are not automatically serialized by celery # before being passed off to ampq for byte packing. We will have # to do that here. p = jsonpickle.pickler.Pickler() headers['girder_result_hooks'] = \ [p.flatten(grh) for grh in headers['girder_result_hooks']] # Finally, remove all reserved_options from headers for key in Task.reserved_options: headers.pop(key, None) except Exception: logger.exception('An error occurred in girder_before_task_publish.') raise