def get_pod_volumes( self, docker_volumes: Sequence[DockerVolume], aws_ebs_volumes: Sequence[AwsEbsVolume], ) -> Sequence[V1Volume]: pod_volumes = [] unique_docker_volumes = { self.get_docker_volume_name(docker_volume): docker_volume for docker_volume in docker_volumes } for name, docker_volume in unique_docker_volumes.items(): pod_volumes.append( V1Volume( host_path=V1HostPathVolumeSource( path=docker_volume['hostPath'], ), name=name, ), ) unique_aws_ebs_volumes = { self.get_aws_ebs_volume_name(aws_ebs_volume): aws_ebs_volume for aws_ebs_volume in aws_ebs_volumes } for name, aws_ebs_volume in unique_aws_ebs_volumes.items(): pod_volumes.append( V1Volume( aws_elastic_block_store=V1AWSElasticBlockStoreVolumeSource( volume_id=aws_ebs_volume['volume_id'], fs_type=aws_ebs_volume.get('fs_type'), partition=aws_ebs_volume.get('partition'), # k8s wants RW volume even if it's later mounted RO read_only=False, ), name=name, ), ) return pod_volumes
def test_get_pod_volumes(self): with mock.patch( 'paasta_tools.kubernetes_tools.KubernetesDeploymentConfig.get_sanitised_volume_name', autospec=True, return_value='some-volume', ): mock_volumes = [ { 'hostPath': '/nail/blah', 'containerPath': '/nail/foo' }, { 'hostPath': '/nail/thing', 'containerPath': '/nail/bar' }, ] expected_volumes = [ V1Volume( host_path=V1HostPathVolumeSource(path='/nail/blah', ), name='some-volume', ), V1Volume( host_path=V1HostPathVolumeSource(path='/nail/thing', ), name='some-volume', ), ] assert self.deployment.get_pod_volumes( mock_volumes) == expected_volumes
def _get_pod_spec(self, container_command: str, pvc: str) -> V1PodSpec: config_map_volume_names = [] config_map_volumes = [] if self.ca_config_maps: for map_number, config_map in enumerate(self.ca_config_maps): config_map_name = f"ca-config-map-{map_number}" config_map_volumes.append( V1Volume( name=config_map_name, config_map=V1ConfigMapVolumeSource( name=config_map, items=[V1KeyToPath(key="ca_cert", path=config_map)] ) ) ) config_map_volume_names.append(config_map_name) pod_spec = V1PodSpec( containers=[ self._get_container(command=container_command, config_map_volume_names=config_map_volume_names) ], volumes=[ V1Volume(name=self.data_volume_name, persistent_volume_claim=V1PersistentVolumeClaimVolumeSource( claim_name=pvc) ) ] + config_map_volumes, restart_policy="Never" ) return pod_spec
def define_configmap(self, data): """This returns a k8s configmap using the data from the new-workflow POST. """ with start_action(action_type="define_configmap"): ni_cmd = data["command"] idkey = list_digest(ni_cmd) cm_name = "command.{}.json".format(idkey) k8s_vol = V1Volume( name="noninteractive-command", config_map=V1ConfigMapVolumeSource(name=cm_name), ) k8s_mt = V1VolumeMount( name="noninteractive-command", mount_path=("/opt/lsst/software/jupyterlab/" + "noninteractive/command/"), read_only=True, ) self.cmd_vol = k8s_vol self.cmd_mt = k8s_mt # Now the configmap cm_data = {} cm_data.update(data) del cm_data["image"] del cm_data["size"] jd = json.dumps(data, sort_keys=True, indent=4) k8s_configmap = V1ConfigMap( metadata=V1ObjectMeta(name=cm_name), data={"command.json": json.dumps(data)}, ) self.log.debug("Created configmap '{}': {}".format(cm_name, jd)) self.cfg_map = k8s_configmap
def __init__(self, name, mount, fs_type, image, monitors, pool, secret_name, sub_path, user="******", read_only=False): self.mount = V1VolumeMount(name=name, mount_path=mount, read_only=read_only, sub_path=sub_path) self.volume = V1Volume( name=name, rbd=V1RBDVolumeSource( fs_type=fs_type, image=image, monitors=monitors.split(","), pool=pool, secret_ref=V1LocalObjectReference(secret_name), read_only=read_only, user=user))
def create_volumes(task_volumes) -> Tuple[List[V1Volume], List[V1VolumeMount]]: index = 0 mounts = [] volumes = [] for target, volume in task_volumes.items(): index += 1 name = volume.get('name', f'volume{index}') for source_type, VolumeSource in VOLUME_SOURCES.items(): if source_type not in volume: continue volume_config = volume[source_type] volumes.append( V1Volume(**{ 'name': name, source_type: VolumeSource(**volume_config), })) mounts.append( V1VolumeMount( name=name, read_only=volume.get('read_only', False), mount_path=target, )) return volumes, mounts
def __init__(self, name, mount, claim_name, read_only=False): self.mount = V1VolumeMount(name=name, mount_path=mount, read_only=read_only) self.volume = V1Volume( name=name, persistent_volume_claim=V1PersistentVolumeClaimVolumeSource( claim_name=claim_name))
def get_pod_volumes(self, volumes: Sequence[DockerVolume]) -> Sequence[V1Volume]: pod_volumes = [] for volume in volumes: pod_volumes.append( V1Volume( host_path=V1HostPathVolumeSource( path=volume['hostPath'], ), name=self.get_sanitised_volume_name(volume['containerPath']), ), ) return pod_volumes
def _create_flush_job( batch_api: BatchV1Api, command: List[str], env: List[V1EnvVar], image: str, name: str, namespace: str, service_account_name: str, ) -> V1Job: logger.info(f"creating job: {name}") try: return batch_api.create_namespaced_job( namespace=namespace, body=V1Job( api_version="batch/v1", kind="Job", metadata=V1ObjectMeta(name=name, namespace=namespace), spec=V1JobSpec( template=V1PodTemplateSpec( spec=V1PodSpec( containers=[ V1Container( image=image, command=command, name="flush", volume_mounts=[ V1VolumeMount(mount_path="/data", name="queue") ], env=env, ) ], restart_policy="OnFailure", volumes=[ V1Volume( name="queue", persistent_volume_claim=( V1PersistentVolumeClaimVolumeSource( claim_name=name ) ), ) ], service_account_name=service_account_name, ) ) ), ), ) except ApiException as e: if e.reason == CONFLICT and json.loads(e.body)["reason"] == ALREADY_EXISTS: logger.info(f"using existing job: {name}") return batch_api.read_namespaced_job(name, namespace) raise
def test_get_pod_volumes(self): mock_docker_volumes = [ { 'hostPath': '/nail/blah', 'containerPath': '/nail/foo' }, { 'hostPath': '/nail/thing', 'containerPath': '/nail/bar' }, ] mock_aws_ebs_volumes = [ { 'volume_id': 'vol-ZZZZZZZZZZZZZZZZZ', 'fs_type': 'ext4', 'container_path': '/nail/qux' }, ] expected_volumes = [ V1Volume( host_path=V1HostPathVolumeSource(path='/nail/blah', ), name='host--slash-nailslash-blah', ), V1Volume( host_path=V1HostPathVolumeSource(path='/nail/thing', ), name='host--slash-nailslash-thing', ), V1Volume( aws_elastic_block_store=V1AWSElasticBlockStoreVolumeSource( volume_id='vol-ZZZZZZZZZZZZZZZZZ', fs_type='ext4', read_only=False, ), name='aws-ebs--vol-ZZZZZZZZZZZZZZZZZ', ), ] assert self.deployment.get_pod_volumes( docker_volumes=mock_docker_volumes, aws_ebs_volumes=mock_aws_ebs_volumes, ) == expected_volumes
def __init__(self) -> None: metadata = V1ObjectMeta(name="postgres", labels={"app": "postgres"}) label_selector = V1LabelSelector(match_labels={"app": "postgres"}) env = [V1EnvVar(name="POSTGRES_HOST_AUTH_METHOD", value="trust")] ports = [V1ContainerPort(container_port=5432, name="sql")] volume_mounts = [ V1VolumeMount(name="data", mount_path="/data"), V1VolumeMount( name="postgres-init", mount_path="/docker-entrypoint-initdb.d" ), ] volume_config = V1ConfigMapVolumeSource( name="postgres-init", ) volumes = [V1Volume(name="postgres-init", config_map=volume_config)] container = V1Container( name="postgres", image="postgres:14.3", env=env, ports=ports, volume_mounts=volume_mounts, ) pod_spec = V1PodSpec(containers=[container], volumes=volumes) template_spec = V1PodTemplateSpec(metadata=metadata, spec=pod_spec) claim_templates = [ V1PersistentVolumeClaim( metadata=V1ObjectMeta(name="data"), spec=V1PersistentVolumeClaimSpec( access_modes=["ReadWriteOnce"], resources=V1ResourceRequirements(requests={"storage": "1Gi"}), ), ) ] self.stateful_set = V1StatefulSet( api_version="apps/v1", kind="StatefulSet", metadata=metadata, spec=V1StatefulSetSpec( service_name="postgres", replicas=1, selector=label_selector, template=template_spec, volume_claim_templates=claim_templates, ), )
def volume_pipeline(): op1 = op1_op() op1.add_volume( V1Volume(name='gcp-credentials', secret=V1SecretVolumeSource(secret_name='user-gcp-sa'))) op1.container.add_volume_mount( V1VolumeMount(mount_path='/secret/gcp-credentials', name='gcp-credentials')) op1.container.add_env_variable( V1EnvVar(name='GOOGLE_APPLICATION_CREDENTIALS', value='/secret/gcp-credentials/user-gcp-sa.json')) op1.container.add_env_variable(V1EnvVar(name='Foo', value='bar')) op2 = op2_op(op1.output)
def core_config_mount(self, name, config_map, key, target_path, read_only=True): if name not in self.core_config_volumes: self.core_config_volumes[name] = V1Volume( name=name, config_map=V1ConfigMapVolumeSource( name=config_map, optional=False ) ) self.core_config_mounts[target_path] = V1VolumeMount( name=name, mount_path=target_path, sub_path=key, read_only=read_only )
def get_pod_volumes(volumes: PVector['DockerVolume']) -> List[V1Volume]: """ Given a list of volume mounts, return a list corresponding to the Kubernetes objects needed to tie the mounts to a Pod. """ unique_volumes: Dict[str, 'DockerVolume'] = { get_sanitised_volume_name(f"host--{volume['host_path']}", length_limit=63): volume for volume in volumes } return [ V1Volume( host_path=V1HostPathVolumeSource(path=volume["host_path"]), name=name, ) for name, volume in unique_volumes.items() ]
def volume_pipeline(): op1 = dsl.ContainerOp(name='download', image='google/cloud-sdk', command=['sh', '-c'], arguments=['ls | tee /tmp/results.txt'], file_outputs={'downloaded': '/tmp/results.txt'}) \ .add_volume(V1Volume(name='gcp-credentials', secret=V1SecretVolumeSource(secret_name='user-gcp-sa'))) \ .add_volume_mount(V1VolumeMount(mount_path='/secret/gcp-credentials', name='gcp-credentials')) \ .add_env_variable(V1EnvVar(name='GOOGLE_APPLICATION_CREDENTIALS', value='/secret/gcp-credentials/user-gcp-sa.json')) \ .add_env_variable(V1EnvVar(name='Foo', value='bar')) op2 = dsl.ContainerOp(name='echo', image='library/bash', command=['sh', '-c'], arguments=['echo %s' % op1.output])
def start_stateful_container(self, service_name, container_name, spec, labels): # Setup PVC deployment_name = service_name + '-' + container_name mounts, volumes = [], [] for volume_name, volume_spec in spec.volumes.items(): mount_name = deployment_name + volume_name # Check if the PVC exists, create if not self._ensure_pvc(mount_name, volume_spec.storage_class, volume_spec.capacity) # Create the volume info volumes.append(V1Volume( name=mount_name, persistent_volume_claim=V1PersistentVolumeClaimVolumeSource(mount_name) )) mounts.append(V1VolumeMount(mount_path=volume_spec.mount_path, name=mount_name)) self._create_deployment(service_name, deployment_name, spec.container, 30, 1, labels, volumes=volumes, mounts=mounts)
def create_cluster(self, spec, cluster_management, namespace_name, volume_claim_name): count = int(spec[FRAMEWORK_RESOURCES][FRAMEWORK_DPU_COUNT]) version = str(spec[FRAMEWORK_VERSION]) image = "tensorflow/tensorflow:" + version + "-gpu" ### v1_api = cluster_management.kube_api api_response_list = [] for i in range(count): body = kubernetes.client.V1Pod() body.api_version = "v1" body.kind = "Pod" meta = V1ObjectMeta() meta.generate_name = "tensorflow-" body.metadata = meta uuid = str(uuid4()) container = V1Container(name=uuid, image=image) pod_spec = V1PodSpec(containers=[container]) container_mounts = V1VolumeMount( mount_path=GLUSTER_DEFAULT_MOUNT_PATH, name=CONTAINER_VOLUME_PREFIX) container.volume_mounts = [container_mounts] compute_resource = V1ResourceRequirements() compute_resource.limits = {"nvidia.com/gpu": 1} compute_resource.requests = {"nvidia.com/gpu": 1} container.resources = compute_resource claim = V1PersistentVolumeClaimVolumeSource( claim_name=volume_claim_name) volume_claim = V1Volume(name=CONTAINER_VOLUME_PREFIX, persistent_volume_claim=claim) volume_claim.persistent_volume_claim = claim pod_spec.volumes = [volume_claim] body.spec = pod_spec try: api_response = v1_api.create_namespaced_pod( namespace_name, body) except ApiException as e: raise Exception( "Exception when calling CoreV1Api->create_namespaced_pod: %s\n" % e) api_response_list.append(api_response) return api_response_list
def __init__(self, name, mount, monitors, secret_name, fs_path, sub_path, user="******", read_only=False): self.mount = V1VolumeMount(name=name, mount_path=mount, read_only=read_only, sub_path=sub_path) self.volume = V1Volume( name=name, cephfs=V1CephFSVolumeSource( monitors=monitors.split(","), path=fs_path, secret_ref=V1LocalObjectReference(secret_name), read_only=read_only, user=user))
def _create_volumes(self, service_name): volumes, mounts = [], [] # Attach the mount that provides the config file volumes.extend(self.config_volumes.values()) mounts.extend(self.config_mounts.values()) # Attach the mount that provides the update volumes.append(V1Volume( name='update-directory', persistent_volume_claim=V1PersistentVolumeClaimVolumeSource( claim_name=FILE_UPDATE_VOLUME, read_only=True ), )) mounts.append(V1VolumeMount( name='update-directory', mount_path=CONTAINER_UPDATE_DIRECTORY, sub_path=service_name, read_only=True, )) return volumes, mounts
def test_delete_detached_pvcs(api: MagicMock): api.list_namespaced_pod.return_value = V1PodList(items=[ # pvc is attached V1Pod(spec=V1PodSpec( containers=[], volumes=[ V1Volume( name="queue", persistent_volume_claim=V1PersistentVolumeClaimVolumeSource( claim_name="queue-web-3", ), ) ], ), ), # pvc not attached because spec is missing V1Pod(), # pvc not attached because volumes are missing V1Pod(spec=V1PodSpec(containers=[], ), ), # pvc not attached because volume is not persistent V1Pod(spec=V1PodSpec(containers=[], volumes=[V1Volume( name="queue")]), ), # pvc not attached because pod is unschedulable due to pvc V1Pod( metadata=V1ObjectMeta( name="web-0", namespace="default", uid="uid-web-0", resource_version="1", owner_references=[V1ObjectReference(kind="StatefulSet")], ), status=V1PodStatus( phase="Pending", conditions=[ V1PodCondition( status="Not Ready", type="False", reason="Unschedulable", message='persistentvolumeclaim "queue-web-0" not found', ) ], ), ), ]) api.list_namespaced_persistent_volume_claim.return_value = V1PersistentVolumeClaimList( items=[ # should delete 0-2, 3 is in attached pvcs *(V1PersistentVolumeClaim( metadata=V1ObjectMeta( name=f"queue-web-{i}", uid=f"uid-queue-web-{i}", resource_version=f"{i}", ), spec=V1PersistentVolumeClaimSpec(volume_name=f"pv-{i}"), ) for i in range(4)), # name does not start with claim prefix V1PersistentVolumeClaim(metadata=V1ObjectMeta( name="other-web-0"), ), ]) def delete_pvc(name, namespace, body): if name == "queue-web-1": raise ApiException(reason="Conflict") if name == "queue-web-2": raise ApiException(reason="Not Found") api.delete_namespaced_persistent_volume_claim.side_effect = delete_pvc pvc_cleanup_delay = timedelta(microseconds=1) delay_complete = datetime.utcnow() - pvc_cleanup_delay cache = { # wrong pv name, should be overwritten "queue-web-0": PvcCacheEntry(pv="wrong", time=delay_complete), # no longer detached, should be removed "queue-web-3": PvcCacheEntry(pv="pv-3", time=delay_complete), } delete_detached_pvcs(api, "namespace", "queue-", pvc_cleanup_delay, cache) api.list_namespaced_pod.assert_called_once_with("namespace") api.list_namespaced_persistent_volume_claim.assert_called_once_with( "namespace") api.delete_namespaced_persistent_volume_claim.assert_not_called() assert {f"queue-web-{i}": f"pv-{i}" for i in range(3)} == {k: v.pv for k, v in cache.items()} api.list_namespaced_pod.reset_mock() api.list_namespaced_persistent_volume_claim.reset_mock() previous_cache = {**cache} delete_detached_pvcs(api, "namespace", "queue-", pvc_cleanup_delay, cache) api.list_namespaced_pod.assert_called_once_with("namespace") api.list_namespaced_persistent_volume_claim.assert_called_once_with( "namespace") assert previous_cache == cache assert [ (f"queue-web-{i}", "namespace", f"uid-queue-web-{i}", f"{i}") for i in range(3) ] == [( call.kwargs["name"], call.kwargs["namespace"], call.kwargs["body"].preconditions.uid, call.kwargs["body"].preconditions.resource_version, ) for call in api.delete_namespaced_persistent_volume_claim.call_args_list]
def apply_pod_profile(self, username, pod, profile, gpu_types, default_mount_path, gpu_mode=None, selected_gpu_type="ALL"): api_client = kubernetes.client.ApiClient() pod.metadata.labels['jupyterhub.opendatahub.io/user'] = escape( username) profile_volumes = profile.get('volumes') if profile_volumes: for volume in profile_volumes: volume_name = re.sub('[^a-zA-Z0-9\.]', '-', volume['name']).lower() read_only = volume['persistentVolumeClaim'].get('readOnly') pvc = V1PersistentVolumeClaimVolumeSource( volume['persistentVolumeClaim']['claimName'], read_only=read_only) mount_path = self.generate_volume_path(volume.get('mountPath'), default_mount_path, volume_name) pod.spec.volumes.append( V1Volume(name=volume_name, persistent_volume_claim=pvc)) pod.spec.containers[0].volume_mounts.append( V1VolumeMount(name=volume_name, mount_path=mount_path)) profile_environment = profile.get('env') if profile_environment: # Kept for backwards compatibility with simplified env var definitions if isinstance(profile_environment, dict): for k, v in profile['env'].items(): update = False for e in pod.spec.containers[0].env: if e.name == k: e.value = v update = True break if not update: pod.spec.containers[0].env.append(V1EnvVar(k, v)) elif isinstance(profile_environment, list): for i in profile_environment: r = type("Response", (), {}) r.data = json.dumps(i) env_var = api_client.deserialize(r, V1EnvVar) pod.spec.containers[0].env.append(env_var) resource_var = None resource_json = type("Response", (), {}) resource_json.data = json.dumps(profile.get('resources')) resource_var = api_client.deserialize(resource_json, V1ResourceRequirements) if resource_var: pod.spec.containers[0].resources = resource_var mem_limit = resource_var.limits.get('memory', '') if mem_limit: pod.spec.containers[0].env.append( V1EnvVar(name='MEM_LIMIT', value=self.get_mem_limit(mem_limit))) for c in pod.spec.containers: update = False if type(c) is dict: env = c['env'] else: env = c.env for e in env: if type(e) is dict: if e['name'] == _JUPYTERHUB_USER_NAME_ENV: e['value'] = username update = True break else: if e.name == _JUPYTERHUB_USER_NAME_ENV: e.value = username update = True break if not update: env.append(V1EnvVar(_JUPYTERHUB_USER_NAME_ENV, username)) self.apply_gpu_config(gpu_mode, profile, gpu_types, pod, selected_gpu_type) node_tolerations = profile.get('node_tolerations', []) node_affinity = profile.get('node_affinity', {}) self.apply_pod_schedulers(node_tolerations, node_affinity, pod) return pod
def start_stateful_container(self, service_name: str, container_name: str, spec, labels: dict[str, str], change_key: str): # Setup PVC deployment_name = self._dependency_name(service_name, container_name) mounts, volumes = [], [] for volume_name, volume_spec in spec.volumes.items(): mount_name = f'{deployment_name}-{volume_name}' # Check if the PVC exists, create if not self._ensure_pvc(mount_name, volume_spec.storage_class, volume_spec.capacity, deployment_name) # Create the volume info volumes.append(V1Volume( name=mount_name, persistent_volume_claim=V1PersistentVolumeClaimVolumeSource(mount_name) )) mounts.append(V1VolumeMount(mount_path=volume_spec.mount_path, name=mount_name)) # Read the key being used for the deployment instance or generate a new one instance_key = uuid.uuid4().hex try: old_deployment = self.apps_api.read_namespaced_deployment(deployment_name, self.namespace) for container in old_deployment.spec.template.spec.containers: for env in container.env: if env.name == 'AL_INSTANCE_KEY': instance_key = env.value break except ApiException as error: if error.status != 404: raise # Setup the deployment itself labels['container'] = container_name spec.container.environment.append({'name': 'AL_INSTANCE_KEY', 'value': instance_key}) self._create_deployment(service_name, deployment_name, spec.container, 30, 1, labels, volumes=volumes, mounts=mounts, core_mounts=spec.run_as_core, change_key=change_key) # Setup a service to direct to the deployment try: service = self.api.read_namespaced_service(deployment_name, self.namespace) service.metadata.labels = labels service.spec.selector = labels service.spec.ports = [V1ServicePort(port=int(_p)) for _p in spec.container.ports] self.api.patch_namespaced_service(deployment_name, self.namespace, service) except ApiException as error: if error.status != 404: raise service = V1Service( metadata=V1ObjectMeta(name=deployment_name, labels=labels), spec=V1ServiceSpec( cluster_ip='None', selector=labels, ports=[V1ServicePort(port=int(_p)) for _p in spec.container.ports] ) ) self.api.create_namespaced_service(self.namespace, service) # Add entries to the environment variable list to point to this container self._service_limited_env[service_name][f'{container_name}_host'] = deployment_name self._service_limited_env[service_name][f'{container_name}_key'] = instance_key if spec.container.ports: self._service_limited_env[service_name][f'{container_name}_port'] = spec.container.ports[0]
def test_run(mock_get_node_affinity, k8s_executor): task_config = KubernetesTaskConfig( name="fake_task_name", uuid="fake_id", image="fake_docker_image", command="fake_command", cpus=1, memory=1024, disk=1024, volumes=[{ "host_path": "/a", "container_path": "/b", "mode": "RO" }], node_selectors={"hello": "world"}, node_affinities=[dict(key="a_label", operator="In", value=[])], labels={ "some_label": "some_label_value", }, annotations={ "paasta.yelp.com/some_annotation": "some_value", }, service_account_name="testsa", ) expected_container = V1Container( image=task_config.image, name="main", command=["/bin/sh", "-c"], args=[task_config.command], security_context=V1SecurityContext( capabilities=V1Capabilities(drop=list(task_config.cap_drop)), ), resources=V1ResourceRequirements(limits={ "cpu": 1.0, "memory": "1024.0Mi", "ephemeral-storage": "1024.0Mi", }), env=[], volume_mounts=[ V1VolumeMount( mount_path="/b", name="host--slash-a", read_only=True, ) ], ) expected_pod = V1Pod( metadata=V1ObjectMeta( name=task_config.pod_name, namespace="task_processing_tests", labels={ "some_label": "some_label_value", }, annotations={ "paasta.yelp.com/some_annotation": "some_value", }, ), spec=V1PodSpec( restart_policy=task_config.restart_policy, containers=[expected_container], volumes=[ V1Volume( host_path=V1HostPathVolumeSource(path="/a"), name="host--slash-a", ) ], share_process_namespace=True, security_context=V1PodSecurityContext( fs_group=task_config.fs_group, ), node_selector={"hello": "world"}, affinity=V1Affinity( node_affinity=mock_get_node_affinity.return_value), dns_policy="Default", service_account_name=task_config.service_account_name, ), ) assert k8s_executor.run(task_config) == task_config.pod_name assert k8s_executor.kube_client.core.create_namespaced_pod.call_args_list == [ mock.call(body=expected_pod, namespace='task_processing_tests') ] assert mock_get_node_affinity.call_args_list == [ mock.call(pvector([dict(key="a_label", operator="In", value=[])])), ]
def __init__(self, name, mount, path, read_only=True): self.mount = V1VolumeMount(name=name, mount_path=mount, read_only=read_only) self.volume = V1Volume(name=name, host_path=V1HostPathVolumeSource(path=path))
def run(provider, provider_kwargs, cluster=None, job=None, storage=None): # TODO, temp fix s3 = storage["s3"] _validate_fields( provider=provider_kwargs, cluster=cluster, job=job, storage=storage, s3=s3 ) _required_run_arguments(provider_kwargs, cluster, job, storage, s3) response = {"job": {}} if "name" not in job["meta"] or not job["meta"]["name"]: since_epoch = int(time.time()) job["meta"]["name"] = "{}-{}".format(JOB_DEFAULT_NAME, since_epoch) if "bucket_name" not in s3 or not s3["bucket_name"]: s3["bucket_name"] = job["meta"]["name"] container_engine_client = new_client( ContainerEngineClient, composite_class=ContainerEngineClientCompositeOperations, name=provider_kwargs["profile"]["name"], ) compute_cluster = get_cluster_by_name( container_engine_client, provider_kwargs["profile"]["compartment_id"], name=cluster["name"], ) if not compute_cluster: response["msg"] = "Failed to find a cluster with name: {}".format( cluster["name"] ) return False, response refreshed = refresh_kube_config( compute_cluster.id, name=provider_kwargs["profile"]["name"] ) if not refreshed: response["msg"] = "Failed to refresh the kubernetes config" return False, response node_manager = NodeManager() if not node_manager.discover(): response["msg"] = "Failed to discover any nodes to schedule jobs on" return False, response node = node_manager.select() if not node: response["msg"] = "Failed to select a node to schedule on" return False, response # Ensure we have the newest config scheduler = KubenetesScheduler() jobio_args = [ "jobio", "run", ] jobio_args.extend(job["commands"]) jobio_args.extend(["--job-meta-name", job["meta"]["name"]]) if "output_path" in job: jobio_args.extend( ["--job-output-path", job["output_path"],] ) if "capture" in job and job["capture"]: jobio_args.append("--job-capture") if "debug" in job["meta"]: jobio_args.append("--job-meta-debug") if "env_override" in job["meta"]: jobio_args.append("--job-meta-env-override") # Maintained by the pod volumes = [] # Maintained by the container volume_mounts = [] # Environment to pass to the container envs = [] # Prepare config for the scheduler scheduler_config = {} if storage and storage["enable"]: validate_dict_values(storage, required_storage_fields, throw=True) jobio_args.append("--storage-enable") # Means that results should be exported to the specified storage # Create kubernetes secrets core_api = client.CoreV1Api() # storage_api = client.StorageV1Api() # Storage endpoint credentials secret (Tied to a profile and job) secret_profile_name = "{}-{}-{}".format( STORAGE_CREDENTIALS_NAME, s3["name"], job["meta"]["name"] ) try: storage_credentials_secret = core_api.read_namespaced_secret( secret_profile_name, KUBERNETES_NAMESPACE ) except ApiException: storage_credentials_secret = None # volumes secret_volume_source = V1SecretVolumeSource(secret_name=secret_profile_name) secret_volume = V1Volume(name=secret_profile_name, secret=secret_volume_source) volumes.append(secret_volume) # Where the storage credentials should be mounted # in the compute unit secret_mount = V1VolumeMount( name=secret_profile_name, mount_path=storage["credentials_path"], read_only=True, ) volume_mounts.append(secret_mount) if s3: validate_dict_values(s3, required_staging_values, verbose=True, throw=True) jobio_args.append("--storage-s3") # S3 storage # Look for s3 credentials and config files s3_config = load_aws_config( s3["config_file"], s3["credentials_file"], profile_name=s3["name"], ) s3_config["endpoint_url"] = storage["endpoint"] if not storage_credentials_secret: secret_data = dict( aws_access_key_id=s3_config["aws_access_key_id"], aws_secret_access_key=s3_config["aws_secret_access_key"], ) secret_metadata = V1ObjectMeta(name=secret_profile_name) secrets_config = dict(metadata=secret_metadata, string_data=secret_data) scheduler_config.update(dict(secret_kwargs=secrets_config)) # If `access_key` # TODO, unify argument endpoint, with s3 config endpoint' s3_resource = boto3.resource("s3", **s3_config) bucket = bucket_exists(s3_resource.meta.client, s3["bucket_name"]) if not bucket: bucket = s3_resource.create_bucket( Bucket=s3["bucket_name"], CreateBucketConfiguration={ "LocationConstraint": s3_config["region_name"] }, ) if "upload_path" in storage and storage["upload_path"]: # Upload local path to the bucket as designated input for the job uploaded = None if os.path.exists(storage["upload_path"]): if os.path.isdir(storage["upload_path"]): uploaded = upload_directory_to_s3( s3_resource.meta.client, storage["upload_path"], s3["bucket_name"], s3_prefix=s3["bucket_input_prefix"], ) elif os.path.isfile(storage["upload_path"]): s3_path = os.path.basename(storage["upload_path"]) if s3["bucket_input_prefix"]: s3_path = os.path.join(s3["bucket_input_prefix"], s3_path) # Upload uploaded = upload_to_s3( s3_resource.meta.client, storage["upload_path"], s3_path, s3["bucket_name"], ) if not uploaded: response[ "msg" ] = "Failed to local path: {} in the upload folder to s3".format( storage["upload_path"] ) return False, response jobio_args.extend( [ "--s3-region-name", s3_config["region_name"], "--storage-secrets-dir", storage["credentials_path"], "--storage-endpoint", storage["endpoint"], "--storage-input-path", storage["input_path"], "--storage-output-path", storage["output_path"], "--bucket-name", s3["bucket_name"], "--bucket-input-prefix", s3["bucket_input_prefix"], "--bucket-output-prefix", s3["bucket_output_prefix"], ] ) # Provide a way to allow pod specific output prefixes field_ref = client.V1ObjectFieldSelector(field_path="metadata.name") env_var_source = client.V1EnvVarSource(field_ref=field_ref) # HACK, Set the output prefix in the bucket to the name of the pod env_output_prefix = client.V1EnvVar( name="JOBIO_BUCKET_OUTPUT_PREFIX", value_from=env_var_source ) envs.append(env_output_prefix) if scheduler_config: prepared = scheduler.prepare(**scheduler_config) if not prepared: response["msg"] = "Failed to prepare the scheduler" return False, response container_spec = dict( name=job["meta"]["name"], image=cluster["image"], env=envs, args=jobio_args, volume_mounts=volume_mounts, ) # If the working directory does not exist inside the container # It will set permissions where it will be unable to expand the # s3 bucket if the user doesn't have root permissions if "working_dir" in job: container_spec.update({"working_dir": job["working_dir"]}) # If the container requires a specific set of resources resources = {} if "min_cores" in job: resources["requests"] = {"cpu": job["min_cores"]} if "max_cores" in job: resources["limits"] = {"cpu": job["max_cores"]} if "min_memory" in job: resources["requests"].update({"memory": job["min_memory"]}) if "max_memory" in job: resources["limits"].update({"memory": job["max_memory"]}) if resources: resource_req = client.V1ResourceRequirements(**resources) container_spec.update({"resources": resource_req}) # args=jobio_args, pod_spec = dict(node_name=node.metadata.name, volumes=volumes, dns_policy="Default") job_spec = dict( backoff_limit=2, parallelism=job["meta"]["num_parallel"], completions=job["meta"]["num_jobs"], ) task = dict( container_kwargs=container_spec, pod_spec_kwargs=pod_spec, job_spec_kwargs=job_spec, ) job = scheduler.submit(**task) if not job: response["msg"] = "Failed to submit the job" return False, response response["job"] = job response["msg"] = "Job submitted" return True, response
def generate_secrets_server_deployment( secrets_server_config: SecretsServerConfig, ): service_name = secrets_server_config.service_name() secret_name = secrets_server_config.secrets().concourse_secret_name() # We need to ensure that the labels and selectors match for both the deployment and the service, # therefore we base them on the configured service name. labels = {'app': service_name} return V1Deployment( kind='Deployment', metadata=V1ObjectMeta(name=service_name, labels=labels), spec=V1DeploymentSpec( replicas=1, selector=V1LabelSelector(match_labels=labels), template=V1PodTemplateSpec( metadata=V1ObjectMeta(labels=labels), spec=V1PodSpec(containers=[ V1Container( image='eu.gcr.io/gardener-project/cc/job-image:latest', image_pull_policy='IfNotPresent', name='secrets-server', resources=V1ResourceRequirements( requests={ 'cpu': '50m', 'memory': '50Mi' }, limits={ 'cpu': '50m', 'memory': '50Mi' }, ), command=['bash'], args=[ '-c', ''' # chdir to secrets dir; create if absent mkdir -p /secrets && cd /secrets # make Kubernetes serviceaccount secrets available by default cp -r /var/run/secrets/kubernetes.io/serviceaccount serviceaccount # store Kubernetes service endpoint env as file for consumer env | grep KUBERNETES_SERVICE > serviceaccount/env # launch secrets server serving secrets dir contents on all IFs python3 -m http.server 8080 ''' ], ports=[ V1ContainerPort(container_port=8080), ], liveness_probe=V1Probe( tcp_socket=V1TCPSocketAction(port=8080), initial_delay_seconds=10, period_seconds=10, ), volume_mounts=[ V1VolumeMount( name=secret_name, mount_path='/secrets/concourse-secrets', read_only=True, ), ], ), ], node_selector={ "worker.garden.sapcloud.io/group": "cc-control" }, volumes=[ V1Volume(name=secret_name, secret=V1SecretVolumeSource( secret_name=secret_name, )) ]))))
def launch(self, name, docker_config: DockerConfig, mounts, env, blocking: bool = True): name = (self.prefix + 'update-' + name.lower()).replace('_', '-') # If we have been given a username or password for the registry, we have to # update it, if we haven't been, make sure its been cleaned up in the system # so we don't leave passwords lying around pull_secret_name = f'{name}-job-pull-secret' use_pull_secret = False try: # Check if there is already a username/password defined for this job current_pull_secret = self.api.read_namespaced_secret(pull_secret_name, self.namespace, _request_timeout=API_TIMEOUT) except ApiException as error: if error.status != 404: raise current_pull_secret = None if docker_config.registry_username or docker_config.registry_password: use_pull_secret = True # Build the secret we want to make new_pull_secret = V1Secret( metadata=V1ObjectMeta(name=pull_secret_name, namespace=self.namespace), type='kubernetes.io/dockerconfigjson', string_data={ '.dockerconfigjson': create_docker_auth_config( image=docker_config.image, username=docker_config.registry_username, password=docker_config.registry_password, ) } ) # Send it to the server if current_pull_secret: self.api.replace_namespaced_secret(pull_secret_name, namespace=self.namespace, body=new_pull_secret, _request_timeout=API_TIMEOUT) else: self.api.create_namespaced_secret(namespace=self.namespace, body=new_pull_secret, _request_timeout=API_TIMEOUT) elif current_pull_secret: # If there is a password set in kubernetes, but not in our configuration clear it out self.api.delete_namespaced_secret(pull_secret_name, self.namespace, _request_timeout=API_TIMEOUT) try: self.batch_api.delete_namespaced_job(name=name, namespace=self.namespace, propagation_policy='Background', _request_timeout=API_TIMEOUT) while True: self.batch_api.read_namespaced_job(namespace=self.namespace, name=name, _request_timeout=API_TIMEOUT) time.sleep(1) except ApiException: pass volumes = [] volume_mounts = [] for index, mnt in enumerate(mounts): volumes.append(V1Volume( name=f'mount-{index}', persistent_volume_claim=V1PersistentVolumeClaimVolumeSource( claim_name=mnt['volume'], read_only=False ), )) volume_mounts.append(V1VolumeMount( name=f'mount-{index}', mount_path=mnt['dest_path'], sub_path=mnt['source_path'], read_only=False, )) if CONFIGURATION_CONFIGMAP: volumes.append(V1Volume( name='mount-configuration', config_map=V1ConfigMapVolumeSource( name=CONFIGURATION_CONFIGMAP ), )) volume_mounts.append(V1VolumeMount( name='mount-configuration', mount_path='/etc/assemblyline/config.yml', sub_path="config", read_only=True, )) section = 'service' labels = { 'app': 'assemblyline', 'section': section, 'privilege': 'core', 'component': 'update-script', } labels.update(self.extra_labels) metadata = V1ObjectMeta( name=name, labels=labels ) environment_variables = [V1EnvVar(name=_e.name, value=_e.value) for _e in docker_config.environment] environment_variables.extend([V1EnvVar(name=k, value=v) for k, v in env.items()]) environment_variables.extend([V1EnvVar(name=k, value=os.environ[k]) for k in INHERITED_VARIABLES if k in os.environ]) environment_variables.append(V1EnvVar(name="LOG_LEVEL", value=self.log_level)) cores = docker_config.cpu_cores memory = docker_config.ram_mb memory_min = min(docker_config.ram_mb_min, memory) container = V1Container( name=name, image=docker_config.image, command=docker_config.command, env=environment_variables, image_pull_policy='Always', volume_mounts=volume_mounts, resources=V1ResourceRequirements( limits={'cpu': cores, 'memory': f'{memory}Mi'}, requests={'cpu': cores / 4, 'memory': f'{memory_min}Mi'}, ) ) pod = V1PodSpec( volumes=volumes, restart_policy='Never', containers=[container], priority_class_name=self.priority_class, ) if use_pull_secret: pod.image_pull_secrets = [V1LocalObjectReference(name=pull_secret_name)] job = V1Job( metadata=metadata, spec=V1JobSpec( backoff_limit=1, completions=1, template=V1PodTemplateSpec( metadata=metadata, spec=pod ) ) ) status = self.batch_api.create_namespaced_job(namespace=self.namespace, body=job, _request_timeout=API_TIMEOUT).status if blocking: try: while not (status.failed or status.succeeded): time.sleep(3) status = self.batch_api.read_namespaced_job(namespace=self.namespace, name=name, _request_timeout=API_TIMEOUT).status self.batch_api.delete_namespaced_job(name=name, namespace=self.namespace, propagation_policy='Background', _request_timeout=API_TIMEOUT) except ApiException as error: if error.status != 404: raise
def __init__(self, name, mount, read_only=False): self.mount = V1VolumeMount(name=name, mount_path=mount, read_only=read_only) self.volume = V1Volume(name=name, empty_dir=V1EmptyDirVolumeSource())
def perform_cloud_ops(): # set GOOGLE_APPLICATION_CREDENTIALS env to credentials file # set GOOGLE_CLOUD_PROJECT env to project id domain = os.getenv('DOMAIN') assert domain logger.info(f'using domain: {domain}') static_ip = os.getenv('STATIC_IP') assert static_ip logger.info(f'using static IP: {static_ip}') admin_email = os.getenv('ADMIN_EMAIL') assert admin_email logger.info(f'using ACME admin email: {admin_email}') oauth_client_id = os.getenv('OAUTH_CLIENT_ID') assert oauth_client_id logger.info(f'using oauth client id: {oauth_client_id}') oauth_client_secret = os.getenv('OAUTH_CLIENT_SECRET') assert oauth_client_secret logger.info(f'using oauth client secret: {oauth_client_secret}') oauth_secret = os.getenv('OAUTH_SECRET') assert oauth_secret logger.info(f'using oauth secret: {oauth_secret}') oauth_domain = os.getenv('OAUTH_DOMAIN') assert oauth_domain logger.info(f'using domain: {oauth_domain}') django_secret_key = os.getenv('DJANGO_SECRET_KEY') assert django_secret_key logger.info(f'using DJANGO_SECRET_KEY: {django_secret_key}') credentials, project = google.auth.default() gcloud_client = container_v1.ClusterManagerClient(credentials=credentials) scan_clusters(gcloud_client, project) # FIXME add the k8s cert to a trust store urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) auth_gcloud_k8s(credentials) api_core_v1 = client.CoreV1Api() api_apps_v1 = client.AppsV1Api() api_storage_v1 = client.StorageV1Api() api_custom = client.CustomObjectsApi() api_extensions_v1_beta1 = client.ExtensionsV1beta1Api() api_ext_v1_beta1 = client.ApiextensionsV1beta1Api() api_rbac_auth_v1_b1 = client.RbacAuthorizationV1beta1Api() ensure_traefik(api_core_v1, api_ext_v1_beta1, api_apps_v1, api_custom, api_rbac_auth_v1_b1, admin_email, domain, static_ip, oauth_client_id, oauth_client_secret, oauth_domain, oauth_secret) with open(os.getenv('GOOGLE_APPLICATION_CREDENTIALS'), 'rb') as f: gcloud_credentials_b64 = b64encode(f.read()).decode('UTF-8') ensure_secret(api=api_core_v1, name='webui-credentials', namespace='default', secret=V1Secret( metadata=client.V1ObjectMeta(name='webui-credentials'), data={'gcloud-credentials': gcloud_credentials_b64})) webui_volume_paths = [ ('data', '/opt/nipyapi/data', '20Gi', 'standard'), ] webui_volume_mounts = [ V1VolumeMount(name=path[0], mount_path=path[1]) for path in webui_volume_paths ] webui_volume_mounts.append( V1VolumeMount(name='webui-credentials', mount_path='/root/webui', read_only=True)) dind_volume_paths = [ ('docker', '/var/lib/docker', '200Gi', 'standard'), ] dind_volume_mounts = [ V1VolumeMount(name=path[0], mount_path=path[1]) for path in dind_volume_paths ] shared_volume_mounts = [ V1VolumeMount(name='dind-socket', mount_path='/var/run-shared') ] ensure_statefulset_with_containers( api_apps_v1=api_apps_v1, name='admin', namespace='default', replicas=1, containers=[ V1Container( name='webui', image='aichrist/nipyapi-ds:latest', env=[ # FIXME use k8s secrets for these values V1EnvVar(name='DOMAIN', value=domain), V1EnvVar(name='STATIC_IP', value=static_ip), V1EnvVar(name='ADMIN_EMAIL', value=admin_email), V1EnvVar(name='OAUTH_CLIENT_ID', value=oauth_client_id), V1EnvVar(name='OAUTH_CLIENT_SECRET', value=oauth_client_secret), V1EnvVar(name='OAUTH_SECRET', value=oauth_secret), V1EnvVar(name='OAUTH_DOMAIN', value=oauth_domain), V1EnvVar(name='DJANGO_SECRET_KEY', value=django_secret_key), V1EnvVar(name='GOOGLE_APPLICATION_CREDENTIALS', value='/root/webui/gcloud_credentials.json'), V1EnvVar(name='CLOUDSDK_AUTH_CREDENTIAL_FILE_OVERRIDE', value='/root/webui/gcloud_credentials.json'), V1EnvVar(name='GOOGLE_CLOUD_PROJECT', value=os.getenv('GOOGLE_CLOUD_PROJECT')), V1EnvVar(name='DOCKER_HOST', value='unix:///var/run-shared/docker.sock'), ], ports=[V1ContainerPort(container_port=8000)], volume_mounts=webui_volume_mounts + shared_volume_mounts), V1Container( name='dind', image='docker:19-dind', security_context=V1SecurityContext(privileged=True), command=[ 'dockerd', '-H', 'unix:///var/run-shared/docker.sock' ], volume_mounts=dind_volume_mounts + shared_volume_mounts) ], volumes=[ V1Volume(name='dind-socket', empty_dir={}), V1Volume(name='webui-credentials', projected=V1ProjectedVolumeSource(sources=[ V1VolumeProjection(secret=V1SecretProjection( name='webui-credentials', items=[ V1KeyToPath(key='gcloud-credentials', path='gcloud_credentials.json') ])) ])) ], volume_paths=webui_volume_paths + dind_volume_paths) ensure_ingress_routed_svc(api_core_v1=api_core_v1, api_custom=api_custom, domain=domain, hostname='admin', name='admin', target_name='admin', namespace='default', port_name='web', svc_port=80, target_port=8000) reg_volume_paths = [ ('database', '/opt/nifi-registry/nifi-registry-current/database', '10Gi', 'standard'), ('flow-storage', '/opt/nifi-registry/nifi-registry-current/flow_storage', '20Gi', 'standard'), ] reg_volume_mounts = [ V1VolumeMount(name=path[0], mount_path=path[1]) for path in reg_volume_paths ] ensure_statefulset_with_containers( api_apps_v1=api_apps_v1, name='registry', namespace='default', replicas=1, containers=[ V1Container(name='registry', image='apache/nifi-registry:latest', env=[ V1EnvVar(name='NIFI_REGISTRY_WEB_HTTP_PORT', value='19090'), ], ports=[V1ContainerPort(container_port=19090)], volume_mounts=reg_volume_mounts), ], init_containers=[ V1Container( name='init-permissions', image='busybox', command=[ 'sh', '-c', 'chown -R 1000:1000 /opt/nifi-registry/nifi-registry-current' ], volume_mounts=[ V1VolumeMount(name=path[0], mount_path=path[1]) for path in reg_volume_paths ]) ], volumes=[], volume_paths=reg_volume_paths) ensure_ingress_routed_svc(api_core_v1=api_core_v1, api_custom=api_custom, domain=domain, hostname='registry', name='registry', target_name='registry', namespace='default', port_name='web', svc_port=80, target_port=19090) perform_nifi_ops(api_apps_v1, api_core_v1, api_custom, domain) perform_build_ops_bg() perform_mirror_ops_bg()
name= "host--slash-aslash-bslash-cdslash-eslash-fslash-gslash-hs--f2c8", read_only=False), ]), )) def test_get_kubernetes_volume_mounts(volumes, expected): assert get_kubernetes_volume_mounts(volumes) == expected @pytest.mark.parametrize("volumes,expected", ( (v({ "container_path": "/a", "host_path": "/b", "mode": "RO" }), [V1Volume(name="host--slash-b", host_path=V1HostPathVolumeSource("/b"))]), (v( { "container_path": "/a", "host_path": "/b", "mode": "RO" }, { "container_path": "/b", "host_path": "/a/b/cd/e/f/g/h/u/j/k/l", "mode": "RW" }, ), [ V1Volume(name="host--slash-b", host_path=V1HostPathVolumeSource("/b")), V1Volume( name=