def test_ulimit_invalid_type(self): with pytest.raises(ValueError): Ulimit(name=None) with pytest.raises(ValueError): Ulimit(name='hello', soft='123') with pytest.raises(ValueError): Ulimit(name='hello', hard='456')
def create_ulimits(limits): ulimits = [] if limits['cputime']: cpu = limits['cputime'] ulimits.append(Ulimit(name='cpu', soft=cpu, hard=cpu)) if 'file_size' in limits: fsize = limits['file_size'] ulimits.append(Ulimit(name='fsize', soft=fsize, hard=fsize)) return ulimits or None
def test_compare_ulimits_neg(self): self.fake_data['params']['dimensions'] = { 'ulimits': { 'nofile': { 'soft': 131072, 'hard': 131072 } } } ulimits_nofile = Ulimit(name='nofile', soft=131072, hard=131072) container_info = dict() container_info['HostConfig'] = { 'CpuPeriod': 0, 'KernelMemory': 0, 'Memory': 0, 'CpuQuota': 0, 'CpusetCpus': '', 'CpuShares': 0, 'BlkioWeight': 0, 'CpusetMems': '', 'MemorySwap': 0, 'MemoryReservation': 0, 'Ulimits': [ulimits_nofile] } self.dw = get_DockerWorker(self.fake_data['params']) self.assertFalse(self.dw.compare_dimensions(container_info))
def test_create_host_config_obj_ulimit(self): ulimit_dct = Ulimit(name='nofile', soft=8096) config = create_host_config(ulimits=[ulimit_dct], version=DEFAULT_DOCKER_API_VERSION) assert 'Ulimits' in config assert len(config['Ulimits']) == 1 ulimit_obj = config['Ulimits'][0] assert isinstance(ulimit_obj, Ulimit) assert ulimit_obj == ulimit_dct
def test_create_host_config_obj_ulimit(self): ulimit_dct = Ulimit(name='nofile', soft=8096) config = create_host_config(ulimits=[ulimit_dct], version=DEFAULT_DOCKER_API_VERSION) self.assertIn('Ulimits', config) self.assertEqual(len(config['Ulimits']), 1) ulimit_obj = config['Ulimits'][0] self.assertTrue(isinstance(ulimit_obj, Ulimit)) self.assertEqual(ulimit_obj, ulimit_dct)
def start(self) -> None: """ Start Triton Server Container """ devices = [ DeviceRequest(capabilities=[["gpu"]], device_ids=self._devices), ] LOGGER.info( f"Triton environment: {json.dumps(self._environment, indent=4)}") LOGGER.info(f"Starting Triton container {self.name}.") self._container = self._docker_client.containers.run( image=self._image, name=self.name, device_requests=devices, detach=True, tty=True, shm_size=self._shm_size, ulimits=[ Ulimit(name="memlock", soft=-1, hard=-1), Ulimit(name="stack", soft=67108864, hard=67108864), ], volumes=self._volumes, environment=self._environment, network_mode=self._network, auto_remove=True, ipc_mode="host", ) LOGGER.info(f"Triton command:") LOGGER.info(f" {self._command}") LOGGER.info(f"Starting Triton Server {self.name}.") self._triton_exec = self._docker_api_client.exec_create( container=self._container.id, cmd=self._command, ) stream_generator = self._docker_api_client.exec_start( exec_id=self._triton_exec["Id"], stream=True) self._logging_thread = Thread(target=TritonServerContainer._logging, args=(self, stream_generator), daemon=True) self._logging_thread.start()
def test_ulimit_invalid_type(self): self.assertRaises(ValueError, lambda: Ulimit(name=None)) self.assertRaises(ValueError, lambda: Ulimit(name='hello', soft='123')) self.assertRaises(ValueError, lambda: Ulimit(name='hello', hard='456'))
def serve( save_path: Union[Path, str], device: str = 'cpu', name: str = None, batch_size: int = 16, ) -> Container: """Serve the given model save path in a Docker container. Args: save_path (Union[Path, str]): Saved path to the model. device (str): Device name. E.g.: cpu, cuda, cuda:1. name (str): Container name. Default to None. batch_size (int): Batch size for passing to serving containers. Returns: Container: Docker container object created. """ info = parse_path(Path(save_path)) architecture: str = info['architecture'] engine: Engine = info['engine'] cuda, device_num = get_device(device) docker_client = docker.from_env() # set mount mounts = [ Mount(target=f'/models/{architecture}', source=str(info['base_dir']), type='bind', read_only=True) ] common_kwargs = remove_dict_null({ 'detach': True, 'auto_remove': True, 'mounts': mounts, 'name': name }) environment = dict() if cuda: common_kwargs['runtime'] = 'nvidia' environment['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID' environment['CUDA_VISIBLE_DEVICES'] = device_num if engine == Engine.TFS: # Tensorflow Serving 2.2.0 has the issue: https://github.com/tensorflow/serving/issues/1663 docker_tag = '2.1.0-gpu' if cuda else '2.1.0' ports = {'8501': config.TFS_HTTP_PORT, '8500': config.TFS_GRPC_PORT} environment['MODEL_NAME'] = architecture container = docker_client.containers.run( f'tensorflow/serving:{docker_tag}', environment=environment, ports=ports, **common_kwargs) elif engine == Engine.TORCHSCRIPT: docker_tag = 'latest-gpu' if cuda else 'latest' ports = { '8000': config.TORCHSCRIPT_HTTP_PORT, '8001': config.TORCHSCRIPT_GRPC_PORT } environment['MODEL_NAME'] = architecture container = docker_client.containers.run( f'mlmodelci/pytorch-serving:{docker_tag}', environment=environment, ports=ports, **common_kwargs) elif engine == Engine.ONNX: docker_tag = 'latest-gpu' if cuda else 'latest' ports = {'8000': config.ONNX_HTTP_PORT, '8001': config.ONNX_GRPC_PORT} environment['MODEL_NAME'] = architecture container = docker_client.containers.run( f'mlmodelci/onnx-serving:{docker_tag}', environment=environment, ports=ports, **common_kwargs) elif engine == Engine.TRT: if not cuda: raise RuntimeError( 'TensorRT cannot be run without CUDA. Please specify a CUDA device.' ) ports = { '8000': config.TRT_HTTP_PORT, '8001': config.TRT_GRPC_PORT, '8002': config.TRT_PROMETHEUS_PORT } ulimits = [ Ulimit(name='memlock', soft=-1, hard=-1), Ulimit(name='stack', soft=67100864, hard=67100864) ] trt_kwargs = {'ulimits': ulimits, 'shm_size': '1G'} container = docker_client.containers.run( f'nvcr.io/nvidia/tensorrtserver:19.10-py3', 'trtserver --model-repository=/models', environment=environment, ports=ports, **common_kwargs, **trt_kwargs, ) else: raise RuntimeError( f'Not able to serve model with path `{str(save_path)}`.') return container
DOCKER_PARAMETERS = { "image": "pl:latest", "auto_remove": True, "cpu_period": 1000, "cpu_shares": 1024, "cpu_quota": 0, "cpuset_cpus": "0", "detach": True, "environment": {}, "mem_limit": "100m", "memswap_limit": "200m", "network_mode": "none", "network_disabled": True, # "storage_opt": {}, "tty": True, "ulimits": [Ulimit(name="core", soft=0, hard=0)], } # Check if any of the above settings are override by a config.py file. logger = logging.getLogger(__name__) try: from config import * # noqa logger.info("Using config.py...") except ModuleNotFoundError: logger.info("No config file found") del logger # Override some settings from testing purpose if TESTING: DOCKER_COUNT = 5
def _run_start_start(self): """Start the container and wait for it to finish starting""" # Get the value of the URL prefix that the app will have to the outside. url_prefix = reverse( "dockerapps:docker-proxy", kwargs={ "project": self.process.project.sodar_uuid, "image": self.image.sodar_uuid, "process": self.image.process.sodar_uuid, "path": "", }, ) with transaction.atomic(): self.process.refresh_from_db() if self.process.state in (STATE_IDLE, STATE_FAILED): self.job.add_log_entry("Starting container for %s:%s..." % (self.image.repository, self.image.tag)) # Build environment, interpreting placeholders. environment = {} for entry in self.process.environment: if "__KIOSC_URL_PREFIX__" in entry["value"]: environment[entry["name"]] = entry["value"].replace( "__KIOSC_URL_PREFIX__", url_prefix) else: environment[entry["name"]] = entry["value"] # Create and start the Docker container, update database record. host_config = self.cli.create_host_config() container = self.cli.create_container( detach=True, image=self.image.image_id, environment=environment, command=shlex.split(self.process.command) if self.process.command else None, ports=[self.process.internal_port], host_config=self.cli.create_host_config( port_bindings={ self.process.internal_port: self.process.host_port }, ulimits=[ Ulimit( name="nofile", soft=settings. KIOSC_DOCKER_MAX_ULIMIT_NOFILE_SOFT, hard=settings. KIOSC_DOCKER_MAX_ULIMIT_NOFILE_HARD, ) ], ), ) self.cli.start(container=container.get("Id")) self.process.container_id = container.get("Id") self.process.state = STATE_STARTING self.process.save() else: self.job.add_log_entry( "Process state is %s, not attempting to start" % self.process.state) self.job.add_log_entry("Waiting for container to start...") timeout_start = time.time() while time.time() < timeout_start + self.timeout: if (self.cli.inspect_container(self.process.container_id).get( "State", {}).get("Running")): self.job.add_log_entry("Container is running...") with transaction.atomic(): self.process.refresh_from_db() self.process.state = STATE_RUNNING self.process.save() break time.sleep(self.sleep_time) else: raise RuntimeError("Container did not start on time")
def create_container(self, name, image, ram, working_directory, gpus=None, environment=None, enable_fuse=False): """ Creates a docker container with the given arguments. This docker container is running endlessly until container.stop() is called. If nvidia gpus are specified, the nvidia runtime is used, if available. Otherwise a device request for nvidia gpus is added. :param name: The name of the container :type name: str :param image: The image to use for this container :type image: str :param ram: The ram limit for this container in megabytes :type ram: int :param working_directory: The working directory inside the docker container :type working_directory: str :param gpus: A specification of gpus to enable in this docker container :type gpus: List[GPUDevice] :param environment: A dictionary containing environment variables, which should be set inside the container :type environment: Dict[str, Any] :param enable_fuse: If True, SYS_ADMIN capabilities are granted for this container and /dev/fuse is mounted :type enable_fuse: bool :return: The created container :rtype: Container :raise RuntimeNotSupportedError: If the specified runtime is not installed on the docker host """ if environment is None: environment = {} mem_limit = None if ram is not None: mem_limit = '{}m'.format(ram) gpu_ids = None if gpus: set_nvidia_environment_variables( environment, map(lambda gpu: gpu.device_id, gpus)) gpu_ids = [gpu.device_id for gpu in gpus] # enable fuse devices = [] capabilities = [] if enable_fuse: devices.append('/dev/fuse') capabilities.append('SYS_ADMIN') container = create_container_with_gpus( self._client, image, command='/bin/sh', gpus=gpu_ids, available_runtimes=self._runtimes, name=name, user='******', working_dir=working_directory, mem_limit=mem_limit, memswap_limit=mem_limit, environment=environment, cap_add=capabilities, devices=devices, ulimits=[ Ulimit(name='nofile', soft=NOFILE_LIMIT, hard=NOFILE_LIMIT) ], # needed to run the container endlessly tty=True, stdin_open=True, auto_remove=False, ) container.start() return container
def get_ulimits_config(config): return list(map(lambda ulimit: Ulimit(name=ulimit['name'], soft=ulimit['soft'], hard=ulimit['hard']), config))