def test_get_pod_outputs_volume_update_settings(self): volumes, _ = get_pod_outputs_volume(persistence_outputs='outputs1') assert len(volumes) == 1 assert volumes[ 0].persistent_volume_claim.claim_name == 'test-claim-outputs-1' volumes, _ = get_pod_outputs_volume(persistence_outputs='outputs2') assert len(volumes) == 1 assert volumes[0].host_path.path == '/root/outputs' volumes, _ = get_pod_outputs_volume(persistence_outputs='outputs3') self.assertEqual(len(volumes), 0)
def get_init_container(self, init_command, init_args, env_vars, context_mounts, persistence_outputs, persistence_data): """Pod init container for setting outputs path.""" env_vars = to_list(env_vars, check_none=True) outputs_path = stores.get_job_outputs_path( persistence=persistence_outputs, job_name=self.job_name) _, outputs_volume_mount = get_pod_outputs_volume( persistence_outputs=persistence_outputs) volume_mounts = outputs_volume_mount + to_list(context_mounts, check_none=True) init_command = init_command or ["/bin/sh", "-c"] init_args = init_args or to_list( get_output_args(command=InitCommands.CREATE, outputs_path=outputs_path)) init_args += to_list( get_auth_context_args(entity='job', entity_name=self.job_name)) return client.V1Container( name=self.init_container_name, image=self.init_docker_image, image_pull_policy=self.init_docker_image_pull_policy, command=init_command, args=[''.join(init_args)], env=env_vars, volume_mounts=volume_mounts)
def get_init_container(self, persistence_outputs): """Pod init container for setting outputs path.""" if self.original_name is not None and self.cloning_strategy == CloningStrategy.RESUME: return [] if self.original_name is not None and self.cloning_strategy == CloningStrategy.COPY: command = InitCommands.COPY original_outputs_path = get_experiment_outputs_path( persistence_outputs=persistence_outputs, experiment_name=self.original_name) else: command = InitCommands.CREATE original_outputs_path = None outputs_path = get_experiment_outputs_path(persistence_outputs=persistence_outputs, experiment_name=self.experiment_name) _, outputs_volume_mount = get_pod_outputs_volume(persistence_outputs=persistence_outputs) return [ client.V1Container( name=self.init_container_name, image=self.init_docker_image, command=["/bin/sh", "-c"], args=to_list(get_output_args(command=command, outputs_path=outputs_path, original_outputs_path=original_outputs_path)), volume_mounts=outputs_volume_mount) ]
def get_init_container(self, persistence_outputs): """Pod init container for setting outputs path.""" outputs_path = get_job_outputs_path(persistence_outputs=persistence_outputs, job_name=self.job_name) _, outputs_volume_mount = get_pod_outputs_volume(persistence_outputs=persistence_outputs) return client.V1Container( name=self.init_container_name, image=self.init_docker_image, command=["/bin/sh", "-c"], args=to_list(get_output_args(command=InitCommands.CREATE, outputs_path=outputs_path)), volume_mounts=outputs_volume_mount)
def get_init_container(self, init_command, init_args, env_vars, context_mounts, persistence_outputs, persistence_data): """Pod init container for setting outputs path.""" outputs_path = stores.get_job_outputs_path( persistence=persistence_outputs, job_name=self.job_name) _, outputs_volume_mount = get_pod_outputs_volume( persistence_outputs=persistence_outputs) init_command = init_command or ["/bin/sh", "-c"] init_args = init_args or to_list( get_output_args(command=InitCommands.CREATE, outputs_path=outputs_path)) return client.V1Container( name=self.init_container_name, image=self.init_docker_image, image_pull_policy=self.init_docker_image_pull_policy, command=init_command, args=init_args, volume_mounts=outputs_volume_mount)
def get_init_container(self, init_command, init_args, env_vars, context_mounts, persistence_outputs, persistence_data): """Pod init container for setting outputs path.""" env_vars = to_list(env_vars, check_none=True) if self.original_name is not None and self.cloning_strategy == CloningStrategy.RESUME: return [] if self.original_name is not None and self.cloning_strategy == CloningStrategy.COPY: command = InitCommands.COPY original_outputs_path = stores.get_experiment_outputs_path( persistence=persistence_outputs, experiment_name=self.original_name) else: command = InitCommands.CREATE original_outputs_path = None outputs_path = stores.get_experiment_outputs_path( persistence=persistence_outputs, experiment_name=self.experiment_name) _, outputs_volume_mount = get_pod_outputs_volume(persistence_outputs=persistence_outputs) volume_mounts = outputs_volume_mount + to_list(context_mounts, check_none=True) init_command = init_command or ["/bin/sh", "-c"] init_args = init_args or to_list( get_output_args(command=command, outputs_path=outputs_path, original_outputs_path=original_outputs_path)) init_args += to_list(get_auth_context_args(entity='experiment', entity_name=self.experiment_name)) return [ client.V1Container( name=self.init_container_name, image=self.init_docker_image, image_pull_policy=self.init_docker_image_pull_policy, command=init_command, args=[''.join(init_args)], env=env_vars, resources=get_init_resources(), volume_mounts=volume_mounts) ]
def test_get_pod_outputs_volume_wrong_values(self): with self.assertRaises(VolumeNotFoundError): get_pod_outputs_volume(persistence_outputs='foo')
def test_get_pod_outputs_volume(self): volumes, _ = get_pod_outputs_volume(None) assert len(volumes) == 1 assert volumes[ 0].persistent_volume_claim.claim_name == 'test-claim-outputs'
def start_tensorboard(self, image, outputs_path, persistence_outputs, outputs_refs_jobs=None, outputs_refs_experiments=None, resources=None, node_selectors=None): ports = [self.request_tensorboard_port()] target_ports = [self.PORT] volumes, volume_mounts = get_pod_outputs_volume(persistence_outputs) refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes( outputs_refs=outputs_refs_jobs, persistence_outputs=persistence_outputs) volumes += refs_volumes volume_mounts += refs_volume_mounts refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes( outputs_refs=outputs_refs_experiments, persistence_outputs=persistence_outputs) volumes += refs_volumes volume_mounts += refs_volume_mounts deployment = deployments.get_deployment( namespace=self.namespace, app=settings.APP_LABELS_TENSORBOARD, name=self.TENSORBOARD_JOB_NAME, project_name=self.project_name, project_uuid=self.project_uuid, job_name=self.job_name, job_uuid=self.job_uuid, volume_mounts=volume_mounts, volumes=volumes, image=image, command=["/bin/sh", "-c"], args=[ "tensorboard --logdir={} --port={}".format( outputs_path, self.PORT) ], ports=target_ports, container_name=settings.CONTAINER_NAME_PLUGIN_JOB, resources=resources, node_selector=node_selectors, role=settings.ROLE_LABELS_DASHBOARD, type=settings.TYPE_LABELS_EXPERIMENT) deployment_name = constants.JOB_NAME.format( name=self.TENSORBOARD_JOB_NAME, job_uuid=self.job_uuid) deployment_labels = deployments.get_labels( app=settings.APP_LABELS_TENSORBOARD, project_name=self.project_name, project_uuid=self.project_uuid, job_name=self.job_name, job_uuid=self.job_uuid, role=settings.ROLE_LABELS_DASHBOARD, type=settings.TYPE_LABELS_EXPERIMENT) dep_resp, _ = self.create_or_update_deployment(name=deployment_name, data=deployment) service = services.get_service(namespace=self.namespace, name=deployment_name, labels=deployment_labels, ports=ports, target_ports=target_ports, service_type=self._get_service_type()) service_resp, _ = self.create_or_update_service(name=deployment_name, data=service) results = { 'deployment': dep_resp.to_dict(), 'service': service_resp.to_dict() } if self._use_ingress(): annotations = json.loads(settings.K8S_INGRESS_ANNOTATIONS) paths = [{ 'path': '/tensorboard/{}'.format(self.project_name.replace('.', '/')), 'backend': { 'serviceName': deployment_name, 'servicePort': ports[0] } }] ingress = ingresses.get_ingress(namespace=self.namespace, name=deployment_name, labels=deployment_labels, annotations=annotations, paths=paths) self.create_or_update_ingress(name=deployment_name, data=ingress) return results
def start_tensorboard(self, outputs_path, persistence_outputs, outputs_specs=None, outputs_refs_jobs=None, outputs_refs_experiments=None, resources=None, node_selector=None, affinity=None, tolerations=None): ports = [self.request_tensorboard_port()] target_ports = [self.PORT] volumes, volume_mounts = get_pod_outputs_volume(persistence_outputs) refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes( outputs_refs=outputs_refs_jobs, persistence_outputs=persistence_outputs) volumes += refs_volumes volume_mounts += refs_volume_mounts refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes( outputs_refs=outputs_specs, persistence_outputs=persistence_outputs) volumes += refs_volumes volume_mounts += refs_volume_mounts refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes( outputs_refs=outputs_refs_experiments, persistence_outputs=persistence_outputs) volumes += refs_volumes volume_mounts += refs_volume_mounts # Add volumes for persistence outputs secrets stores_secrets = get_stores_secrets(specs=outputs_specs) self.validate_stores_secrets_keys(stores_secrets=stores_secrets) secrets_volumes, secrets_volume_mounts = self.get_stores_secrets_volumes( stores_secrets=stores_secrets) volumes += secrets_volumes volume_mounts += secrets_volume_mounts resource_name = self.resource_manager.get_resource_name() # Get persistence outputs secrets auth commands command_args = self.get_stores_secrets_command_args( stores_secrets=stores_secrets) command_args.append("tensorboard --logdir={} --port={}".format( outputs_path, self.PORT)) args = [' && '.join(command_args)] command = ["/bin/sh", "-c"] deployment = self.resource_manager.get_deployment( resource_name=resource_name, volume_mounts=volume_mounts, volumes=volumes, labels=self.resource_manager.labels, env_vars=None, command=command, args=args, persistence_outputs=persistence_outputs, outputs_refs_jobs=outputs_refs_jobs, outputs_refs_experiments=outputs_refs_experiments, resources=resources, ephemeral_token=None, node_selector=node_selector, affinity=affinity, tolerations=tolerations, ports=target_ports, restart_policy='Never') dep_resp, _ = self.create_or_update_deployment(name=resource_name, data=deployment) service = services.get_service( namespace=self.namespace, name=resource_name, labels=self.resource_manager.get_labels(), ports=ports, target_ports=target_ports, service_type=self._get_service_type()) service_resp, _ = self.create_or_update_service(name=resource_name, data=service) results = { 'deployment': dep_resp.to_dict(), 'service': service_resp.to_dict() } if self._use_ingress(): annotations = json.loads(conf.get('K8S_INGRESS_ANNOTATIONS')) paths = [{ 'path': '/tensorboards/{}'.format(self.project_name.replace('.', '/')), 'backend': { 'serviceName': resource_name, 'servicePort': ports[0] } }] ingress = ingresses.get_ingress( namespace=self.namespace, name=resource_name, labels=self.resource_manager.get_labels(), annotations=annotations, paths=paths) self.create_or_update_ingress(name=resource_name, data=ingress) return results
def start_tensorboard(self, image, outputs_path, persistence_outputs, outputs_specs=None, outputs_refs_jobs=None, outputs_refs_experiments=None, resources=None, node_selector=None, affinity=None, tolerations=None): ports = [self.request_tensorboard_port()] target_ports = [self.PORT] volumes, volume_mounts = get_pod_outputs_volume(persistence_outputs) refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes( outputs_refs=outputs_refs_jobs, persistence_outputs=persistence_outputs) volumes += refs_volumes volume_mounts += refs_volume_mounts refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes( outputs_refs=outputs_specs, persistence_outputs=persistence_outputs) volumes += refs_volumes volume_mounts += refs_volume_mounts refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes( outputs_refs=outputs_refs_experiments, persistence_outputs=persistence_outputs) volumes += refs_volumes volume_mounts += refs_volume_mounts # Add volumes for persistence outputs secrets stores_secrets = get_stores_secrets(specs=outputs_specs) self.validate_stores_secrets_keys(stores_secrets=stores_secrets) secrets_volumes, secrets_volume_mounts = self.get_stores_secrets_volumes( stores_secrets=stores_secrets) volumes += secrets_volumes volume_mounts += secrets_volume_mounts # Get persistence outputs secrets auth commands command_args = self.get_stores_secrets_command_args( stores_secrets=stores_secrets) command_args.append("tensorboard --logdir={} --port={}".format( outputs_path, self.PORT)) node_selector = get_node_selector( node_selector=node_selector, default_node_selector=settings.NODE_SELECTOR_TENSORBOARDS) affinity = get_affinity( affinity=affinity, default_affinity=settings.AFFINITY_TENSORBOARDS) tolerations = get_tolerations( tolerations=tolerations, default_tolerations=settings.TOLERATIONS_TENSORBOARDS) deployment = deployments.get_deployment( namespace=self.namespace, app=settings.APP_LABELS_TENSORBOARD, name=TENSORBOARD_JOB_NAME, project_name=self.project_name, project_uuid=self.project_uuid, job_name=self.job_name, job_uuid=self.job_uuid, volume_mounts=volume_mounts, volumes=volumes, image=image, command=["/bin/sh", "-c"], args=[' && '.join(command_args)], ports=target_ports, container_name=settings.CONTAINER_NAME_PLUGIN_JOB, resources=resources, node_selector=node_selector, affinity=affinity, tolerations=tolerations, role=settings.ROLE_LABELS_DASHBOARD, type=settings.TYPE_LABELS_RUNNER) deployment_name = JOB_NAME_FORMAT.format(name=TENSORBOARD_JOB_NAME, job_uuid=self.job_uuid) deployment_labels = deployments.get_labels( app=settings.APP_LABELS_TENSORBOARD, project_name=self.project_name, project_uuid=self.project_uuid, job_name=self.job_name, job_uuid=self.job_uuid, role=settings.ROLE_LABELS_DASHBOARD, type=settings.TYPE_LABELS_RUNNER) dep_resp, _ = self.create_or_update_deployment(name=deployment_name, data=deployment) service = services.get_service(namespace=self.namespace, name=deployment_name, labels=deployment_labels, ports=ports, target_ports=target_ports, service_type=self._get_service_type()) service_resp, _ = self.create_or_update_service(name=deployment_name, data=service) results = { 'deployment': dep_resp.to_dict(), 'service': service_resp.to_dict() } if self._use_ingress(): annotations = json.loads(settings.K8S_INGRESS_ANNOTATIONS) paths = [{ 'path': '/tensorboard/{}'.format(self.project_name.replace('.', '/')), 'backend': { 'serviceName': deployment_name, 'servicePort': ports[0] } }] ingress = ingresses.get_ingress(namespace=self.namespace, name=deployment_name, labels=deployment_labels, annotations=annotations, paths=paths) self.create_or_update_ingress(name=deployment_name, data=ingress) return results
def start_tensorboard(self, outputs_path, persistence_outputs, outputs_specs=None, outputs_refs_jobs=None, outputs_refs_experiments=None, resources=None, labels=None, annotations=None, node_selector=None, affinity=None, tolerations=None, max_restarts=None, reconcile_url=None): ports = [self.request_tensorboard_port()] target_ports = [self.port] volumes, volume_mounts = get_pod_outputs_volume(persistence_outputs) refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes( outputs_refs=outputs_refs_jobs, persistence_outputs=persistence_outputs) volumes += refs_volumes volume_mounts += refs_volume_mounts refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes( outputs_refs=outputs_specs, persistence_outputs=persistence_outputs) volumes += refs_volumes volume_mounts += refs_volume_mounts refs_volumes, refs_volume_mounts = get_pod_refs_outputs_volumes( outputs_refs=outputs_refs_experiments, persistence_outputs=persistence_outputs) volumes += refs_volumes volume_mounts += refs_volume_mounts # Add volumes for persistence outputs secrets stores_secrets = get_stores_secrets(specs=outputs_specs) self.validate_stores_secrets_keys(stores_secrets=stores_secrets) secrets_volumes, secrets_volume_mounts = self.get_stores_secrets_volumes( stores_secrets=stores_secrets) volumes += secrets_volumes volume_mounts += secrets_volume_mounts resource_name = self.resource_manager.get_resource_name() tensorboard_url = self._get_proxy_url(namespace=self.namespace, job_name=TENSORBOARD_JOB_NAME, deployment_name=resource_name) # Get persistence outputs secrets auth commands command_args = self.get_stores_secrets_command_args( stores_secrets=stores_secrets) command_args.append("tensorboard " "--logdir={log_dir} " "--port={port} " "--path_prefix={path_prefix}".format( log_dir=outputs_path, port=self.port, path_prefix=tensorboard_url)) args = [' && '.join(command_args)] command = ["/bin/sh", "-c"] labels = get_labels(default_labels=self.resource_manager.labels, labels=labels) deployment = self.resource_manager.get_deployment( resource_name=resource_name, volume_mounts=volume_mounts, volumes=volumes, labels=labels, env_vars=None, command=command, args=args, persistence_outputs=persistence_outputs, outputs_refs_jobs=outputs_refs_jobs, outputs_refs_experiments=outputs_refs_experiments, resources=resources, annotations=annotations, ephemeral_token=None, node_selector=node_selector, affinity=affinity, tolerations=tolerations, ports=target_ports, reconcile_url=reconcile_url, max_restarts=max_restarts, restart_policy=get_deployment_restart_policy(max_restarts)) dep_resp, _ = self.create_or_update_deployment(name=resource_name, body=deployment, reraise=True) service = services.get_service( namespace=self.namespace, name=resource_name, labels=self.resource_manager.get_labels(), ports=ports, target_ports=target_ports, service_type=self._get_service_type()) service_resp, _ = self.create_or_update_service(name=resource_name, body=service, reraise=True) results = { 'deployment': dep_resp.to_dict(), 'service': service_resp.to_dict() } if self._use_ingress(): annotations = json.loads(conf.get(K8S_INGRESS_ANNOTATIONS)) paths = [{ 'path': '/tensorboards/{}'.format(self.project_name.replace('.', '/')), 'backend': { 'serviceName': resource_name, 'servicePort': ports[0] } }] ingress = ingresses.get_ingress( namespace=self.namespace, name=resource_name, labels=self.resource_manager.get_labels(), annotations=annotations, paths=paths) self.create_or_update_ingress(name=resource_name, body=ingress, reraise=True) return results