def get_labels(self, task_type, task_idx, job_uuid): labels = self.resource_manager.get_labels(task_type=task_type, task_idx=task_idx, job_uuid=job_uuid) return get_labels(default_labels=labels, labels=self.spec.master_labels)
def start_dockerizer(self, secret_refs=None, config_map_refs=None, resources=None, labels=None, annotations=None, node_selector=None, affinity=None, tolerations=None, reconcile_url=None, max_restarts=None): volumes, volume_mounts = get_docker_volumes() context_volumes, context_mounts = get_build_context_volumes() volumes += context_volumes volume_mounts += context_mounts registry_auth_volumes, registry_auth_mounts = self.get_docker_credentials_volumes() volumes += registry_auth_volumes volume_mounts += registry_auth_mounts resource_name = self.resource_manager.get_resource_name() command, args = self.get_pod_command_args() init_command, init_args = self.get_init_command_args() labels = get_labels(default_labels=self.resource_manager.labels, labels=labels) pod = self.resource_manager.get_pod( resource_name=resource_name, volume_mounts=volume_mounts, volumes=volumes, labels=labels, env_vars=self.get_env_vars(), command=command, args=args, init_command=init_command, init_args=init_args, init_env_vars=self.get_init_env_vars(), persistence_outputs=None, persistence_data=None, outputs_refs_jobs=None, outputs_refs_experiments=None, secret_refs=secret_refs, config_map_refs=config_map_refs, resources=resources, annotations=annotations, ephemeral_token=None, node_selector=node_selector, affinity=affinity, tolerations=tolerations, init_context_mounts=context_mounts, reconcile_url=reconcile_url, max_restarts=max_restarts, restart_policy=get_pod_restart_policy(max_restarts)) pod_resp, _ = self.create_or_update_pod(name=resource_name, body=pod, reraise=True) return pod_resp.to_dict()
def get_labels(self, task_type, task_idx, job_uuid): labels = self.resource_manager.get_labels(task_type=task_type, task_idx=task_idx, job_uuid=job_uuid) return get_labels(default_labels=labels, labels=self.labels.get(task_type, {}).get(task_idx))
def start_tensorboard(self, outputs_path, persistence_outputs, outputs_specs=None, outputs_refs_jobs=None, outputs_refs_experiments=None, resources=None, labels=None, annotations=None, node_selector=None, affinity=None, tolerations=None, max_restarts=None, reconcile_url=None): ports = [self.request_tensorboard_port()] target_ports = [self.port] volumes, volume_mounts = get_pod_outputs_volume(persistence_outputs) refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes( outputs_refs=outputs_refs_jobs, persistence_outputs=persistence_outputs) volumes += refs_volumes volume_mounts += refs_volume_mounts refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes( outputs_refs=outputs_specs, persistence_outputs=persistence_outputs) volumes += refs_volumes volume_mounts += refs_volume_mounts refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes( outputs_refs=outputs_refs_experiments, persistence_outputs=persistence_outputs) volumes += refs_volumes volume_mounts += refs_volume_mounts # Add volumes for persistence outputs secrets stores_secrets = get_stores_secrets(specs=outputs_specs) self.validate_stores_secrets_keys(stores_secrets=stores_secrets) secrets_volumes, secrets_volume_mounts = self.get_stores_secrets_volumes( stores_secrets=stores_secrets) volumes += secrets_volumes volume_mounts += secrets_volume_mounts resource_name = self.resource_manager.get_resource_name() tensorboard_url = self._get_proxy_url(namespace=self.namespace, job_name=TENSORBOARD_JOB_NAME, deployment_name=resource_name) # Get persistence outputs secrets auth commands command_args = self.get_stores_secrets_command_args( stores_secrets=stores_secrets) command_args.append("tensorboard " "--logdir={log_dir} " "--port={port} " "--path_prefix={path_prefix}".format( log_dir=outputs_path, port=self.port, path_prefix=tensorboard_url)) args = [' && '.join(command_args)] command = ["/bin/sh", "-c"] labels = get_labels(default_labels=self.resource_manager.labels, labels=labels) deployment = self.resource_manager.get_deployment( resource_name=resource_name, volume_mounts=volume_mounts, volumes=volumes, labels=labels, env_vars=None, command=command, args=args, persistence_outputs=persistence_outputs, outputs_refs_jobs=outputs_refs_jobs, outputs_refs_experiments=outputs_refs_experiments, resources=resources, annotations=annotations, ephemeral_token=None, node_selector=node_selector, affinity=affinity, tolerations=tolerations, ports=target_ports, reconcile_url=reconcile_url, max_restarts=max_restarts, restart_policy=get_deployment_restart_policy(max_restarts)) dep_resp, _ = self.create_or_update_deployment(name=resource_name, body=deployment, reraise=True) service = services.get_service( namespace=self.namespace, name=resource_name, labels=self.resource_manager.get_labels(), ports=ports, target_ports=target_ports, service_type=self._get_service_type()) service_resp, _ = self.create_or_update_service(name=resource_name, body=service, reraise=True) results = { 'deployment': dep_resp.to_dict(), 'service': service_resp.to_dict() } if self._use_ingress(): annotations = json.loads(conf.get(K8S_INGRESS_ANNOTATIONS)) paths = [{ 'path': '/tensorboards/{}'.format(self.project_name.replace('.', '/')), 'backend': { 'serviceName': resource_name, 'servicePort': ports[0] } }] ingress = ingresses.get_ingress( namespace=self.namespace, name=resource_name, labels=self.resource_manager.get_labels(), annotations=annotations, paths=paths) self.create_or_update_ingress(name=resource_name, body=ingress, reraise=True) return results
def start_notebook(self, persistence_outputs=None, persistence_data=None, outputs_refs_jobs=None, outputs_refs_experiments=None, resources=None, labels=None, annotations=None, secret_refs=None, config_map_refs=None, node_selector=None, affinity=None, tolerations=None, backend=None, max_restarts=None, reconcile_url=None, mount_code_in_notebooks=False): ports = [self.request_notebook_port()] target_ports = [self.port] volumes, volume_mounts = get_pod_volumes(persistence_outputs=persistence_outputs, persistence_data=persistence_data) refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes( outputs_refs=outputs_refs_jobs, persistence_outputs=persistence_outputs) volumes += refs_volumes volume_mounts += refs_volume_mounts refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes( outputs_refs=outputs_refs_experiments, persistence_outputs=persistence_outputs) volumes += refs_volumes volume_mounts += refs_volume_mounts shm_volumes, shm_volume_mounts = get_shm_volumes() volumes += shm_volumes volume_mounts += shm_volume_mounts context_volumes, context_mounts = get_auth_context_volumes() volumes += context_volumes volume_mounts += context_mounts if mount_code_in_notebooks: code_volume, code_volume_mount = self.get_notebook_code_volume() volumes.append(code_volume) volume_mounts.append(code_volume_mount) resource_name = self.resource_manager.get_resource_name() args = self.get_notebook_args(deployment_name=resource_name, mount_code_in_notebooks=mount_code_in_notebooks, backend=backend) command = ["/bin/sh", "-c"] labels = get_labels(default_labels=self.resource_manager.labels, labels=labels) deployment = self.resource_manager.get_deployment( resource_name=resource_name, volume_mounts=volume_mounts, volumes=volumes, labels=labels, env_vars=None, command=command, args=args, init_env_vars=self.get_init_env_vars(), persistence_outputs=persistence_outputs, persistence_data=persistence_data, outputs_refs_jobs=outputs_refs_jobs, outputs_refs_experiments=outputs_refs_experiments, secret_refs=secret_refs, config_map_refs=config_map_refs, resources=resources, annotations=annotations, ephemeral_token=None, node_selector=node_selector, affinity=affinity, tolerations=tolerations, ports=target_ports, init_context_mounts=context_mounts, reconcile_url=reconcile_url, max_restarts=max_restarts, restart_policy=get_deployment_restart_policy(max_restarts)) dep_resp, _ = self.create_or_update_deployment(name=resource_name, body=deployment, reraise=True) service = services.get_service( namespace=self.namespace, name=resource_name, labels=self.resource_manager.get_labels(), ports=ports, target_ports=target_ports, service_type=self._get_service_type()) service_resp, _ = self.create_or_update_service(name=resource_name, body=service, reraise=True) results = {'deployment': dep_resp.to_dict(), 'service': service_resp.to_dict()} if self._use_ingress(): annotations = json.loads(conf.get(K8S_INGRESS_ANNOTATIONS)) paths = [{ 'path': '/notebooks/{}'.format(self.project_name.replace('.', '/')), 'backend': { 'serviceName': resource_name, 'servicePort': ports[0] } }] ingress = ingresses.get_ingress(namespace=self.namespace, name=resource_name, labels=self.resource_manager.get_labels(), annotations=annotations, paths=paths) self.create_or_update_ingress(name=resource_name, body=ingress, reraise=True) return results
def start_job(self, container_cmd_callback, persistence_outputs=None, persistence_data=None, outputs_refs_jobs=None, outputs_refs_experiments=None, resources=None, labels=None, annotations=None, secret_refs=None, config_map_refs=None, node_selector=None, affinity=None, tolerations=None, reconcile_url=None, max_restarts=None): # Set and validate volumes volumes, volume_mounts = get_pod_volumes( persistence_outputs=persistence_outputs, persistence_data=persistence_data) refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes( outputs_refs=outputs_refs_jobs, persistence_outputs=persistence_outputs) volumes += refs_volumes volume_mounts += refs_volume_mounts refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes( outputs_refs=outputs_refs_experiments, persistence_outputs=persistence_outputs) volumes += refs_volumes volume_mounts += refs_volume_mounts shm_volumes, shm_volume_mounts = get_shm_volumes() volumes += shm_volumes volume_mounts += shm_volume_mounts context_volumes, context_mounts = get_auth_context_volumes() volumes += context_volumes volume_mounts += context_mounts command, args = container_cmd_callback() resource_name = self.resource_manager.get_resource_name() labels = get_labels(default_labels=self.resource_manager.labels, labels=labels) pod = self.resource_manager.get_pod( resource_name=resource_name, volume_mounts=volume_mounts, volumes=volumes, labels=labels, env_vars=None, command=command, args=args, init_env_vars=self.get_init_env_vars(), persistence_outputs=persistence_outputs, persistence_data=persistence_data, outputs_refs_jobs=outputs_refs_jobs, outputs_refs_experiments=outputs_refs_experiments, secret_refs=secret_refs, config_map_refs=config_map_refs, resources=resources, annotations=annotations, ephemeral_token=None, node_selector=node_selector, affinity=affinity, tolerations=tolerations, init_context_mounts=context_mounts, reconcile_url=reconcile_url, max_restarts=max_restarts, restart_policy=get_pod_restart_policy(max_restarts)) pod_resp, _ = self.create_or_update_pod(name=resource_name, body=pod, reraise=True) return pod_resp.to_dict()