def stop_notebook(self): deployment_name = JOB_NAME_FORMAT.format(name=NOTEBOOK_JOB_NAME, job_uuid=self.job_uuid) try: self.delete_deployment(name=deployment_name, reraise=True) self.delete_service(name=deployment_name) if self._use_ingress(): self.delete_ingress(name=deployment_name) return True except PolyaxonK8SError: return False
def stop_tensorboard(self): deployment_name = JOB_NAME_FORMAT.format(name=TENSORBOARD_JOB_NAME, job_uuid=self.job_uuid) try: self.delete_deployment(name=deployment_name) self.delete_service(name=deployment_name) if self._use_ingress(): self.delete_ingress(name=deployment_name) return True except (PolyaxonK8SError, ConfigException): return False
def get_deployment_spec( namespace, app, name, project_name, project_uuid, job_name, job_uuid, volume_mounts, volumes, image, command, args, ports, env_vars=None, env_from=None, container_name=None, resources=None, node_selector=None, affinity=None, tolerations=None, role=None, type=None, # pylint:disable=redefined-builtin replicas=1, service_account_name=None): labels = get_labels(app=app, project_name=project_name, project_uuid=project_uuid, job_name=job_name, job_uuid=job_uuid, role=role, type=type) metadata = client.V1ObjectMeta(name=JOB_NAME_FORMAT.format( name=name, job_uuid=job_uuid), labels=labels, namespace=namespace) pod_spec = get_project_pod_spec(volume_mounts=volume_mounts, volumes=volumes, image=image, container_name=container_name, command=command, args=args, resources=resources, node_selector=node_selector, affinity=affinity, tolerations=tolerations, ports=ports, env_vars=env_vars, env_from=env_from, service_account_name=service_account_name) template_spec = client.V1PodTemplateSpec(metadata=metadata, spec=pod_spec) return client.AppsV1beta1DeploymentSpec(replicas=replicas, template=template_spec)
def process_logs(build, temp=True): pod_id = JOB_NAME_FORMAT.format(name=DOCKERIZER_JOB_NAME, job_uuid=build.uuid.hex) k8s_manager = K8SManager(namespace=settings.K8S_NAMESPACE, in_cluster=True) log_lines = base.process_logs( k8s_manager=k8s_manager, pod_id=pod_id, container_job_name=settings.CONTAINER_NAME_DOCKERIZER_JOB) safe_log_job(job_name=build.unique_name, log_lines=log_lines, temp=temp, append=False)
def start_dockerizer(self, resources=None, node_selector=None, affinity=None, tolerations=None): volumes, volume_mounts = get_docker_volumes() node_selector = get_node_selector( node_selector=node_selector, default_node_selector=conf.get('NODE_SELECTOR_BUILDS')) affinity = get_affinity( affinity=affinity, default_affinity=conf.get('AFFINITY_BUILDS')) tolerations = get_tolerations( tolerations=tolerations, default_tolerations=conf.get('TOLERATIONS_BUILDS')) pod = pods.get_pod( namespace=self.namespace, app=conf.get('APP_LABELS_DOCKERIZER'), name=DOCKERIZER_JOB_NAME, project_name=self.project_name, project_uuid=self.project_uuid, job_name=self.job_name, job_uuid=self.job_uuid, volume_mounts=volume_mounts, volumes=volumes, image=conf.get('JOB_DOCKERIZER_IMAGE'), image_pull_policy=conf.get('JOB_DOCKERIZER_IMAGE_PULL_POLICY'), command=None, args=[self.job_uuid], ports=[], env_vars=self.get_env_vars(), container_name=conf.get('CONTAINER_NAME_DOCKERIZER_JOB'), resources=resources, node_selector=node_selector, affinity=affinity, tolerations=tolerations, role=conf.get('ROLE_LABELS_WORKER'), type=conf.get('TYPE_LABELS_RUNNER'), service_account_name=conf.get('K8S_SERVICE_ACCOUNT_BUILDS'), restart_policy='Never') pod_name = JOB_NAME_FORMAT.format(job_uuid=self.job_uuid, name=DOCKERIZER_JOB_NAME) pod_resp, _ = self.create_or_update_pod(name=pod_name, data=pod) return pod_resp.to_dict()
async def job_logs_v2(request, ws, username, project_name, job_id): job, message = validate_job(request=request, username=username, project_name=project_name, job_id=job_id) if job is None: await ws.send(get_error_message(message)) return job_uuid = job.uuid.hex auditor.record(event_type=JOB_LOGS_VIEWED, instance=job, actor_id=request.app.user.id, actor_name=request.app.user.username) pod_id = JOB_NAME_FORMAT.format(name=JOB_NAME, job_uuid=job_uuid) # Stream logs await log_job(request=request, ws=ws, job=job, pod_id=pod_id, container=settings.CONTAINER_NAME_JOB, namespace=settings.K8S_NAMESPACE)
def pod_id(self) -> str: return JOB_NAME_FORMAT.format(name=DOCKERIZER_JOB_NAME, job_uuid=self.uuid.hex)
def start_notebook(self, image, persistence_outputs=None, persistence_data=None, outputs_refs_jobs=None, outputs_refs_experiments=None, resources=None, secret_refs=None, configmap_refs=None, node_selector=None, affinity=None, tolerations=None, allow_commits=False): ports = [self.request_notebook_port()] target_ports = [self.PORT] volumes, volume_mounts = get_pod_volumes( persistence_outputs=persistence_outputs, persistence_data=persistence_data) refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes( outputs_refs=outputs_refs_jobs, persistence_outputs=persistence_outputs) volumes += refs_volumes volume_mounts += refs_volume_mounts refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes( outputs_refs=outputs_refs_experiments, persistence_outputs=persistence_outputs) volumes += refs_volumes volume_mounts += refs_volume_mounts shm_volumes, shm_volume_mounts = get_shm_volumes() volumes += shm_volumes volume_mounts += shm_volume_mounts env_vars = get_job_env_vars( persistence_outputs=persistence_outputs, outputs_path=get_notebook_job_outputs_path( persistence_outputs=persistence_outputs, notebook_job=self.job_name), persistence_data=persistence_data, outputs_refs_jobs=outputs_refs_jobs, outputs_refs_experiments=outputs_refs_experiments) secret_refs = validate_secret_refs(secret_refs) configmap_refs = validate_configmap_refs(configmap_refs) env_from = get_pod_env_from(secret_refs=secret_refs, configmap_refs=configmap_refs) code_volume, code_volume_mount = self.get_notebook_code_volume() volumes.append(code_volume) volume_mounts.append(code_volume_mount) deployment_name = JOB_NAME_FORMAT.format(name=NOTEBOOK_JOB_NAME, job_uuid=self.job_uuid) node_selector = get_node_selector( node_selector=node_selector, default_node_selector=settings.NODE_SELECTOR_EXPERIMENTS) affinity = get_affinity(affinity=affinity, default_affinity=settings.AFFINITY_EXPERIMENTS) tolerations = get_tolerations( tolerations=tolerations, default_tolerations=settings.TOLERATIONS_EXPERIMENTS) deployment = deployments.get_deployment( namespace=self.namespace, app=settings.APP_LABELS_NOTEBOOK, name=NOTEBOOK_JOB_NAME, project_name=self.project_name, project_uuid=self.project_uuid, job_name=self.job_name, job_uuid=self.job_uuid, volume_mounts=volume_mounts, volumes=volumes, image=image, command=["/bin/sh", "-c"], args=self.get_notebook_args(deployment_name=deployment_name, ports=ports, allow_commits=allow_commits), ports=target_ports, container_name=settings.CONTAINER_NAME_PLUGIN_JOB, env_vars=env_vars, env_from=env_from, resources=resources, node_selector=node_selector, affinity=affinity, tolerations=tolerations, role=settings.ROLE_LABELS_DASHBOARD, type=settings.TYPE_LABELS_RUNNER, service_account_name=settings.K8S_SERVICE_ACCOUNT_EXPERIMENTS) deployment_labels = deployments.get_labels( app=settings.APP_LABELS_NOTEBOOK, project_name=self.project_name, project_uuid=self.project_uuid, job_name=self.job_name, job_uuid=self.job_uuid, role=settings.ROLE_LABELS_DASHBOARD, type=settings.TYPE_LABELS_RUNNER) dep_resp, _ = self.create_or_update_deployment(name=deployment_name, data=deployment) service = services.get_service(namespace=self.namespace, name=deployment_name, labels=deployment_labels, ports=ports, target_ports=target_ports, service_type=self._get_service_type()) service_resp, _ = self.create_or_update_service(name=deployment_name, data=service) results = { 'deployment': dep_resp.to_dict(), 'service': service_resp.to_dict() } if self._use_ingress(): annotations = json.loads(settings.K8S_INGRESS_ANNOTATIONS) paths = [{ 'path': '/notebook/{}'.format(self.project_name.replace('.', '/')), 'backend': { 'serviceName': deployment_name, 'servicePort': ports[0] } }] ingress = ingresses.get_ingress(namespace=self.namespace, name=deployment_name, labels=deployment_labels, annotations=annotations, paths=paths) self.create_or_update_ingress(name=deployment_name, data=ingress) return results
def pod_id(self) -> str: return JOB_NAME_FORMAT.format(name=TENSORBOARD_JOB_NAME, job_uuid=self.uuid.hex)
def get_resource_name(self): return JOB_NAME_FORMAT.format(name=self.name, job_uuid=self.job_uuid)
def pod_id(self): return JOB_NAME_FORMAT.format(name=JOB_NAME, job_uuid=self.uuid.hex)
def get_deployment( namespace, app, name, project_name, project_uuid, job_name, job_uuid, volume_mounts, volumes, image, command, args, ports, container_name, env_vars=None, env_from=None, resources=None, node_selector=None, affinity=None, tolerations=None, role=None, type=None, # pylint:disable=redefined-builtin replicas=1, service_account_name=None): labels = get_labels(app=app, project_name=project_name, project_uuid=project_uuid, job_name=job_name, job_uuid=job_uuid, role=role, type=type) metadata = client.V1ObjectMeta(name=JOB_NAME_FORMAT.format( name=name, job_uuid=job_uuid), labels=labels, namespace=namespace) spec = get_deployment_spec(namespace=namespace, app=app, name=name, project_name=project_name, project_uuid=project_uuid, job_name=job_name, job_uuid=job_uuid, volume_mounts=volume_mounts, volumes=volumes, image=image, command=command, args=args, ports=ports, env_vars=env_vars, env_from=env_from, container_name=container_name, resources=resources, node_selector=node_selector, affinity=affinity, tolerations=tolerations, role=role, type=type, replicas=replicas, service_account_name=service_account_name) return client.AppsV1beta1Deployment( api_version=k8s_constants.K8S_API_VERSION_EXTENSIONS_V1_BETA1, kind=k8s_constants.K8S_DEPLOYMENT_KIND, metadata=metadata, spec=spec)
def pod_id(self) -> str: return JOB_NAME_FORMAT.format(name=NOTEBOOK_JOB_NAME, job_uuid=self.uuid.hex)
def start_tensorboard(self, image, outputs_path, persistence_outputs, outputs_specs=None, outputs_refs_jobs=None, outputs_refs_experiments=None, resources=None, node_selector=None, affinity=None, tolerations=None): ports = [self.request_tensorboard_port()] target_ports = [self.PORT] volumes, volume_mounts = get_pod_outputs_volume(persistence_outputs) refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes( outputs_refs=outputs_refs_jobs, persistence_outputs=persistence_outputs) volumes += refs_volumes volume_mounts += refs_volume_mounts refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes( outputs_refs=outputs_specs, persistence_outputs=persistence_outputs) volumes += refs_volumes volume_mounts += refs_volume_mounts refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes( outputs_refs=outputs_refs_experiments, persistence_outputs=persistence_outputs) volumes += refs_volumes volume_mounts += refs_volume_mounts # Add volumes for persistence outputs secrets stores_secrets = get_stores_secrets(specs=outputs_specs) self.validate_stores_secrets_keys(stores_secrets=stores_secrets) secrets_volumes, secrets_volume_mounts = self.get_stores_secrets_volumes( stores_secrets=stores_secrets) volumes += secrets_volumes volume_mounts += secrets_volume_mounts # Get persistence outputs secrets auth commands command_args = self.get_stores_secrets_command_args( stores_secrets=stores_secrets) command_args.append("tensorboard --logdir={} --port={}".format( outputs_path, self.PORT)) node_selector = get_node_selector( node_selector=node_selector, default_node_selector=settings.NODE_SELECTOR_TENSORBOARDS) affinity = get_affinity( affinity=affinity, default_affinity=settings.AFFINITY_TENSORBOARDS) tolerations = get_tolerations( tolerations=tolerations, default_tolerations=settings.TOLERATIONS_TENSORBOARDS) deployment = deployments.get_deployment( namespace=self.namespace, app=settings.APP_LABELS_TENSORBOARD, name=TENSORBOARD_JOB_NAME, project_name=self.project_name, project_uuid=self.project_uuid, job_name=self.job_name, job_uuid=self.job_uuid, volume_mounts=volume_mounts, volumes=volumes, image=image, command=["/bin/sh", "-c"], args=[' && '.join(command_args)], ports=target_ports, container_name=settings.CONTAINER_NAME_PLUGIN_JOB, resources=resources, node_selector=node_selector, affinity=affinity, tolerations=tolerations, role=settings.ROLE_LABELS_DASHBOARD, type=settings.TYPE_LABELS_RUNNER) deployment_name = JOB_NAME_FORMAT.format(name=TENSORBOARD_JOB_NAME, job_uuid=self.job_uuid) deployment_labels = deployments.get_labels( app=settings.APP_LABELS_TENSORBOARD, project_name=self.project_name, project_uuid=self.project_uuid, job_name=self.job_name, job_uuid=self.job_uuid, role=settings.ROLE_LABELS_DASHBOARD, type=settings.TYPE_LABELS_RUNNER) dep_resp, _ = self.create_or_update_deployment(name=deployment_name, data=deployment) service = services.get_service(namespace=self.namespace, name=deployment_name, labels=deployment_labels, ports=ports, target_ports=target_ports, service_type=self._get_service_type()) service_resp, _ = self.create_or_update_service(name=deployment_name, data=service) results = { 'deployment': dep_resp.to_dict(), 'service': service_resp.to_dict() } if self._use_ingress(): annotations = json.loads(settings.K8S_INGRESS_ANNOTATIONS) paths = [{ 'path': '/tensorboard/{}'.format(self.project_name.replace('.', '/')), 'backend': { 'serviceName': deployment_name, 'servicePort': ports[0] } }] ingress = ingresses.get_ingress(namespace=self.namespace, name=deployment_name, labels=deployment_labels, annotations=annotations, paths=paths) self.create_or_update_ingress(name=deployment_name, data=ingress) return results
def get_k8s_job_name(self): return JOB_NAME_FORMAT.format(name=self.name, job_uuid=self.job_uuid)