def dockerjson_pv( pull_secret, name=None, filename='.dockerconfigjson', project_to='/kaniko/.docker/config.json'): """ Creates V1Volume volume projection from kubernetes pull secret """ from os.path import basename, dirname from kubernetes import client if not name: from uuid import uuid1 name='vol-' + str(uuid1())[:12] return k8sc.V1Volume( name=name, projected=k8sc.V1ProjectedVolumeSource(sources=[ k8sc.V1VolumeProjection( secret=k8sc.V1SecretProjection( name=pull_secret, items=[k8sc.V1KeyToPath(key=filename, path=basename(project_to))] ) ) ]) )
def _use_config_map(task): config_map = k8s.V1ConfigMapVolumeSource( name=name, items=[k8s.V1KeyToPath(key=key, path=key) \ for key in key_path_mapper] ) return task \ .add_volume(k8s.V1Volume(config_map=config_map, name=name)) \ .add_volume_mount(k8s.V1VolumeMount(mount_path=mount_path, name=name))
def create_job_object(job_arguments, size, docker_image, docker_image_tag, affinity): user = os.environ['USER'] job = client.V1Job( metadata=client.V1ObjectMeta( name='kaml-remote-{}-{}'.format(user, uuid.uuid1())), spec=client.V1JobSpec(template=client.V1PodTemplateSpec( metadata=client.V1ObjectMeta(name='kaml-remote-{}-{}'.format( user, uuid.uuid1()), labels={'type': size}), spec=client.V1PodSpec(containers=[ client.V1Container( name='kaml-remote', args=job_arguments, image='{}:{}'.format(docker_image, docker_image_tag), image_pull_policy='Always', env=[client.V1EnvVar(name='KAML_HOME', value='/app')], volume_mounts=[ client.V1VolumeMount(name='kaml-cfg-volume', read_only=True, mount_path='/app/kaml.cfg', sub_path='kaml.cfg'), client.V1VolumeMount( name='gcp-service-account', read_only=True, mount_path='/app/service-key.json', sub_path='service-key.json'), ]) ], affinity=affinity, volumes=[ client.V1Volume(name='kaml-cfg-volume', config_map=client. V1ConfigMapVolumeSource( name='kaml-cfg')), client.V1Volume( name='gcp-service-account', secret=client.V1SecretVolumeSource( secret_name='gcp-service-account', items=[ client.V1KeyToPath( key='service-key.json', path='service-key.json') ])) ], restart_policy='Never')))) return (job)
def _use_pull_secret(task): from os.path import basename, dirname from kubernetes import client as k8sc return (task.add_volume( k8sc.V1Volume( name='registrycreds', projected=k8sc.V1ProjectedVolumeSource(sources=[ k8sc.V1VolumeProjection(secret=k8sc.V1SecretProjection( name=secret_name, items=[ k8sc.V1KeyToPath(key=filename, path=basename(project_to)) ])) ]))).add_volume_mount( k8sc.V1VolumeMount(name='registrycreds', mount_path=dirname(project_to))))
def add_acr_config(kube_manager, pod_spec, namespace): secret_name = constants.AZURE_ACR_CREDS_SECRET_NAME if not kube_manager.secret_exists(secret_name, namespace): raise Exception("Secret '{}' not found in namespace '{}'".format( secret_name, namespace)) volume_mount = client.V1VolumeMount(name='acr-config', mount_path='/kaniko/.docker/', read_only=True) if pod_spec.containers[0].volume_mounts: pod_spec.containers[0].volume_mounts.append(volume_mount) else: pod_spec.containers[0].volume_mounts = [volume_mount] items = [client.V1KeyToPath(key='.dockerconfigjson', path='config.json')] volume = client.V1Volume(name='acr-config', secret=client.V1SecretVolumeSource( secret_name=secret_name, items=items)) if pod_spec.volumes: pod_spec.volumes.append(volume) else: pod_spec.volumes = [volume]
def get_task_manager_boilerplate( job: KubernetesFlinkJob) -> client.V1Deployment: from ai_flow.application_master.master import GLOBAL_MASTER_CONFIG dep_resource_metadata = client.V1ObjectMeta(name='flink-task-manager-' + str(job.uuid)) mount_path = '/opt/ai-flow/project' volume_mount = client.V1VolumeMount(name='download-volume', mount_path=mount_path) flink_config_volume_mount = client.V1VolumeMount( name="flink-config-volume", mount_path="/opt/flink/conf") init_args_default = [ str(job.job_config.properties), str(job.job_context.workflow_execution_id), job.job_config.project_path, mount_path ] init_container = client.V1Container( name='init-container', image=GLOBAL_MASTER_CONFIG['ai_flow_base_init_image'], image_pull_policy='Always', command=["python", "/app/download.py"], args=init_args_default, volume_mounts=[volume_mount, flink_config_volume_mount]) volume = client.V1Volume(name='download-volume') task_manager_args = [ "task-manager", "-Djobmanager.rpc.address=" + 'flink-job-cluster-{}-svc'.format(job.uuid) ] try: flink_conf = job.job_config.flink_conf for key, value in flink_conf.items(): task_manager_args.extend(["-D{}={}".format(key, value)]) except KeyError: pass workflow_id_env = client.V1EnvVar( name='WORKFLOW_ID', value=str(job.job_context.workflow_execution_id)) execution_config_env = client.V1EnvVar(name='CONFIG_FILE_NAME', value=job.config_file) # flink_conf.yaml config map volume config_name = "flink-config-{}".format(job.uuid) key_to_path_list = [] key_to_path_list.append( client.V1KeyToPath(key="flink-conf.yaml", path="flink-conf.yaml")) key_to_path_list.append( client.V1KeyToPath(key="log4j.properties", path="log4j.properties")) key_to_path_list.append( client.V1KeyToPath(key="log4j-cli.properties", path="log4j-cli.properties")) flink_config_volume = client.V1Volume( name="flink-config-volume", config_map=client.V1ConfigMapVolumeSource(name=config_name, items=key_to_path_list)) task_manager_container_image = None if 'flink_ai_flow_base_image' in GLOBAL_MASTER_CONFIG: task_manager_container_image = GLOBAL_MASTER_CONFIG[ 'flink_ai_flow_base_image'] try: if job.job_config.image is not None: task_manager_container_image = job.job_config.image except KeyError: pass if task_manager_container_image is None: raise Exception("flink_ai_flow_base_image not set") tm_container = client.V1Container( name='flink-task-manager-' + str(job.uuid), image=task_manager_container_image, command=['/docker-entrypoint.sh'], args=task_manager_args, env=[workflow_id_env, execution_config_env], volume_mounts=[volume_mount]) try: tm_resource = job.job_config.resources['taskmanager'] tm_container.resources = client.V1ResourceRequirements( requests=tm_resource) except KeyError: pass containers = [tm_container] labels = {'app': 'flink', 'component': 'task-manager-' + str(job.uuid)} pod_template = client.V1PodTemplateSpec( metadata=client.V1ObjectMeta(labels=labels), spec=client.V1PodSpec(containers=containers, init_containers=[init_container], volumes=[volume, flink_config_volume])) labels = {'app': 'flink', 'component': 'task-manager-' + str(job.uuid)} deployment_spec = client.V1DeploymentSpec( replicas=job.job_config.parallelism, template=pod_template, selector={'matchLabels': labels}) dep_resource = client.V1Deployment(api_version='extensions/v1beta1', kind='Deployment', spec=deployment_spec, metadata=dep_resource_metadata) return dep_resource
def get_flink_session_cluster_boilerplate( job: KubernetesFlinkJob) -> client.V1Job: from ai_flow.application_master.master import GLOBAL_MASTER_CONFIG job_master_args_default = [ "session-cluster", "--job-classname", job.job_config.main_class, "-Djobmanager.rpc.address=flink-job-cluster-{}-svc".format(job.uuid), "-Dparallelism.default=1", "-Dblob.server.port=6124", "-Dqueryable-state.server.ports=6125" ] rpc_container_port = client.V1ContainerPort(name='rpc', container_port=6123) blob_container_port = client.V1ContainerPort(name='blob', container_port=6124) query_container_port = client.V1ContainerPort(name='query', container_port=6125) ui_container_port = client.V1ContainerPort(name='ui', container_port=8081) mount_path = '/opt/ai-flow/project' volume_mount = client.V1VolumeMount(name='download-volume', mount_path=mount_path) flink_config_volume_mount = client.V1VolumeMount( name="flink-config-volume", mount_path="/opt/flink/conf") workflow_id_env = client.V1EnvVar( name='WORKFLOW_ID', value=str(job.job_context.workflow_execution_id)) execution_config_env = client.V1EnvVar(name='CONFIG_FILE_NAME', value=job.config_file) if job.job_config.language_type == LanguageType.PYTHON: language_type_env = client.V1EnvVar(name='LANGUAGE_TYPE', value='python') else: language_type_env = client.V1EnvVar(name='LANGUAGE_TYPE', value='java') entry_module_path_env = client.V1EnvVar( name='ENTRY_MODULE_PATH', value=job.job_config.properties['entry_module_path']) flink_job_master_rpc_address_env = client.V1EnvVar( name='FLINK_JOB_MASTER_RPC_ADDRESS', value="flink-job-cluster-{}-svc".format(job.uuid)) job_master_container_image = None if 'flink_ai_flow_base_image' in GLOBAL_MASTER_CONFIG: job_master_container_image = GLOBAL_MASTER_CONFIG[ 'flink_ai_flow_base_image'] if job.job_config.image is not None: job_master_container_image = job.job_config.image if job_master_container_image is None: raise Exception("flink_ai_flow_base_image not set") job_master_container = client.V1Container( name='flink-job-master-{}'.format(job.uuid), image=job_master_container_image, image_pull_policy='Always', ports=[ rpc_container_port, blob_container_port, query_container_port, ui_container_port ], command=['/docker-entrypoint.sh'], args=job_master_args_default, volume_mounts=[volume_mount, flink_config_volume_mount], env=[ workflow_id_env, execution_config_env, flink_job_master_rpc_address_env, entry_module_path_env, language_type_env ]) try: jm_resources = job.job_config.resources['jobmanager'] job_master_container.resources = client.V1ResourceRequirements( requests=jm_resources) except KeyError: pass init_args_default = [ str(job.job_config.properties), str(job.job_context.workflow_execution_id), job.job_config.project_path, mount_path ] init_container = client.V1Container( name='init-container', image=GLOBAL_MASTER_CONFIG['ai_flow_base_init_image'], image_pull_policy='Always', command=["python", "/app/download.py"], args=init_args_default, volume_mounts=[volume_mount]) volume = client.V1Volume(name='download-volume') # flink_conf.yaml config map volume config_name = "flink-config-{}".format(job.uuid) key_to_path_list = [] key_to_path_list.append( client.V1KeyToPath(key="flink-conf.yaml", path="flink-conf.yaml")) key_to_path_list.append( client.V1KeyToPath(key="log4j.properties", path="log4j.properties")) key_to_path_list.append( client.V1KeyToPath(key="log4j-cli.properties", path="log4j-cli.properties")) flink_config_volume = client.V1Volume( name="flink-config-volume", config_map=client.V1ConfigMapVolumeSource(name=config_name, items=key_to_path_list)) pod_spec = client.V1PodSpec(restart_policy='Never', containers=[job_master_container], init_containers=[init_container], volumes=[volume, flink_config_volume]) labels = {'app': 'flink', 'component': 'job-cluster-' + str(job.uuid)} object_meta = client.V1ObjectMeta( labels=labels, annotations={ ANNOTATION_WATCHED: 'True', ANNOTATION_JOB_ID: str(job.instance_id), ANNOTATION_WORKFLOW_ID: str(job.job_context.workflow_execution_id), ANNOTATION_JOB_UUID: str(job.uuid) }) template_spec = client.V1PodTemplateSpec(metadata=object_meta, spec=pod_spec) job_spec = client.V1JobSpec(template=template_spec, backoff_limit=0) object_meta = client.V1ObjectMeta(labels=labels, name=generate_job_name(job)) job = client.V1Job(metadata=object_meta, spec=job_spec, api_version='batch/v1', kind='Job') return job
def create_config_map(self): items = [client.V1KeyToPath(key=self.source_key, path=self.source_path)] return client.V1ConfigMapVolumeSource(name=self.config_map_name, items=items)
def deploy_fake_cega(deploy_lega): """Deploy the Fake CEGA.""" _here = Path(__file__).parent trace_file = Path(_here / 'config/trace.ini') assert trace_file.exists(), "No trace file!" trace_config = configparser.ConfigParser() trace_config.read(trace_file) with open(_here / 'extras/server.py') as users_init: init_users = users_init.read() with open(_here / 'extras/users.html') as user_list: users = user_list.read() with open(_here / 'extras/cega-mq.sh') as ceg_mq_init: cega_init_mq = ceg_mq_init.read() with open(_here / 'config/cega.config') as cega_config: cega_config_mq = cega_config.read() with open(_here / 'config/cega.json') as cega_defs: cega_defs_mq = cega_defs.read() user_pub = trace_config['secrets']['cega_user_public_key'] ports_mq_management = [client.V1ServicePort(name="http", protocol="TCP", port=15672, target_port=15672)] ports_mq = [client.V1ServicePort(name="amqp", protocol="TCP", port=5672, target_port=5672), client.V1ServicePort(name="epmd", protocol="TCP", port=4369, target_port=4369), client.V1ServicePort(name="rabbitmq-dist", protocol="TCP", port=25672, target_port=25672)] deploy_lega.config_map('users-config', {'server.py': init_users, 'users.html': users, 'ega-box-999.yml': f'---\npubkey: {user_pub}'}) env_users_inst = client.V1EnvVar(name="LEGA_INSTANCES", value="lega") env_users_creds = client.V1EnvVar(name="CEGA_REST_lega_PASSWORD", value_from=client.V1EnvVarSource(secret_key_ref=client.V1SecretKeySelector(name='cega-creds', key="credentials"))) mount_users = client.V1VolumeMount(name="users-config", mount_path='/cega') users_map = client.V1ConfigMapProjection(name="users-config", items=[client.V1KeyToPath(key="server.py", path="server.py"), client.V1KeyToPath(key="users.html", path="users.html"), client.V1KeyToPath(key="ega-box-999.yml", path="users/ega-box-999.yml"), client.V1KeyToPath(key="ega-box-999.yml", path="users/lega/ega-box-999.yml")]) users_vol = client.V1VolumeProjection(config_map=users_map) volume_users = client.V1Volume(name="users-config", projected=client.V1ProjectedVolumeSource(sources=[users_vol])) deploy_lega.config_map('cega-mq-entrypoint', {'cega-mq.sh': cega_init_mq}) deploy_lega.config_map('cega-mq-config', {'defs.json': cega_defs_mq, 'rabbitmq.config': cega_config_mq}) deploy_lega.persistent_volume("cega-rabbitmq", "1Gi") deploy_lega.persistent_volume_claim("cega-mq-storage", "cega-rabbitmq", "1Gi") mount_cega_temp = client.V1VolumeMount(name="cega-mq-temp", mount_path='/temp') mount_cega_rabbitmq = client.V1VolumeMount(name="cega-rabbitmq", mount_path='/etc/rabbitmq') volume_cega_temp = client.V1Volume(name="cega-mq-temp", config_map=client.V1ConfigMapVolumeSource(name="cega-mq-config")) volume_cega_rabbitmq = client.V1Volume(name="cega-rabbitmq", persistent_volume_claim=client.V1PersistentVolumeClaimVolumeSource(claim_name="cega-mq-storage")) mount_mq_cega = client.V1VolumeMount(name="cega-mq-entrypoint", mount_path='/script') volume_mq_cega = client.V1Volume(name="cega-mq-entrypoint", config_map=client.V1ConfigMapVolumeSource(name="cega-mq-entrypoint", default_mode=0o744)) deploy_lega.stateful_set('cega-mq', 'rabbitmq:3.6.14-management', ["/script/cega-mq.sh"], None, [mount_cega_temp, mount_mq_cega, mount_cega_rabbitmq], [volume_cega_temp, volume_mq_cega, volume_cega_rabbitmq], ports=[15672, 5672, 4369, 25672]) deploy_lega.deployment('cega-users', 'nbisweden/ega-base:latest', ["python3.6", "/cega/server.py"], [env_users_inst, env_users_creds], [mount_users], [volume_users], ports=[8001]) ports_users = [client.V1ServicePort(protocol="TCP", port=8001, target_port=8001)] deploy_lega.service('cega-mq', ports_mq, type="NodePort") deploy_lega.service('cega-mq-management', ports_mq_management, pod_name="cega-mq", type="NodePort") deploy_lega.service('cega-users', ports_users, type="NodePort")
def kubernetes_deployment(_localega, config, ns, fake_cega): """Wrap all the kubernetes settings.""" _here = Path(__file__).parent trace_file = Path(_here / 'config/trace.ini') assert trace_file.exists(), "No trace file!" trace_config = configparser.ConfigParser() trace_config.read(trace_file) deploy_lega = LocalEGADeploy(_localega, ns) # Setting ENV variables and Volumes env_cega_api = client.V1EnvVar(name="CEGA_ENDPOINT", value=f"{_localega['cega']['endpoint']}") env_inbox_mq = client.V1EnvVar(name="BROKER_HOST", value=f"{_localega['services']['broker']}.{ns}") env_inbox_port = client.V1EnvVar(name="INBOX_PORT", value="2222") env_db_data = client.V1EnvVar(name="PGDATA", value="/var/lib/postgresql/data/pgdata") env_cega_mq = client.V1EnvVar(name="CEGA_CONNECTION", value_from=client.V1EnvVarSource(secret_key_ref=client.V1SecretKeySelector(name='cega-connection', key="address"))) env_cega_creds = client.V1EnvVar(name="CEGA_ENDPOINT_CREDS", value_from=client.V1EnvVarSource(secret_key_ref=client.V1SecretKeySelector(name='cega-creds', key="credentials"))) env_acc_minio = client.V1EnvVar(name="MINIO_ACCESS_KEY", value_from=client.V1EnvVarSource(secret_key_ref=client.V1SecretKeySelector(name='s3-keys', key="access"))) env_sec_minio = client.V1EnvVar(name="MINIO_SECRET_KEY", value_from=client.V1EnvVarSource(secret_key_ref=client.V1SecretKeySelector(name='s3-keys', key="secret"))) env_acc_s3 = client.V1EnvVar(name="S3_ACCESS_KEY", value_from=client.V1EnvVarSource(secret_key_ref=client.V1SecretKeySelector(name='s3-keys', key="access"))) env_sec_s3 = client.V1EnvVar(name="S3_SECRET_KEY", value_from=client.V1EnvVarSource(secret_key_ref=client.V1SecretKeySelector(name='s3-keys', key="secret"))) env_db_pass = client.V1EnvVar(name="POSTGRES_PASSWORD", value_from=client.V1EnvVarSource(secret_key_ref=client.V1SecretKeySelector(name='lega-db-secret', key="postgres_password"))) env_db_user = client.V1EnvVar(name="POSTGRES_USER", value_from=client.V1EnvVarSource(config_map_key_ref=client.V1ConfigMapKeySelector(name='lega-db-config', key="user"))) env_db_name = client.V1EnvVar(name="POSTGRES_DB", value_from=client.V1EnvVarSource(config_map_key_ref=client.V1ConfigMapKeySelector(name='lega-db-config', key="dbname"))) env_lega_pass = client.V1EnvVar(name="LEGA_PASSWORD", value_from=client.V1EnvVarSource(secret_key_ref=client.V1SecretKeySelector(name='lega-password', key="password"))) env_keys_pass = client.V1EnvVar(name="KEYS_PASSWORD", value_from=client.V1EnvVarSource(secret_key_ref=client.V1SecretKeySelector(name='keys-password', key="password"))) mount_config = client.V1VolumeMount(name="config", mount_path='/etc/ega') mount_inbox = client.V1VolumeMount(name="inbox", mount_path='/ega/inbox') mount_mq_temp = client.V1VolumeMount(name="mq-temp", mount_path='/temp') mount_mq_rabbitmq = client.V1VolumeMount(name="rabbitmq", mount_path='/etc/rabbitmq') mount_mq_script = client.V1VolumeMount(name="mq-entrypoint", mount_path='/script') mount_db_data = client.V1VolumeMount(name="data", mount_path='/var/lib/postgresql/data', read_only=False) mound_db_init = client.V1VolumeMount(name="initsql", mount_path='/docker-entrypoint-initdb.d') mount_minio = client.V1VolumeMount(name="data", mount_path='/data') pmap_ini_conf = client.V1VolumeProjection(config_map=client.V1ConfigMapProjection(name="lega-config", items=[client.V1KeyToPath(key="conf.ini", path="conf.ini", mode=0o744)])) pmap_ini_keys = client.V1VolumeProjection(config_map=client.V1ConfigMapProjection(name="lega-keyserver-config", items=[client.V1KeyToPath(key="keys.ini", path="keys.ini", mode=0o744)])) sec_keys = client.V1VolumeProjection(secret=client.V1SecretProjection(name="keyserver-secret", items=[client.V1KeyToPath(key="key1.sec", path="pgp/key.1"), client.V1KeyToPath(key="ssl.cert", path="ssl.cert"), client.V1KeyToPath(key="ssl.key", path="ssl.key")])) deploy_lega.create_namespace() deploy_lega.config_secret('cega-creds', {'credentials': trace_config['secrets']['cega_creds']}) # Create Secrets deploy_lega.config_secret('cega-connection', {'address': trace_config['secrets']['cega_address']}) deploy_lega.config_secret('lega-db-secret', {'postgres_password': trace_config['secrets']['postgres_password']}) deploy_lega.config_secret('s3-keys', {'access': trace_config['secrets']['s3_access'], 'secret': trace_config['secrets']['s3_secret']}) deploy_lega.config_secret('lega-password', {'password': trace_config['secrets']['lega_password']}) deploy_lega.config_secret('keys-password', {'password': trace_config['secrets']['keys_password']}) with open(_here / 'config/key.1.sec') as key_file: key1_data = key_file.read() with open(_here / 'config/ssl.cert') as cert: ssl_cert = cert.read() with open(_here / 'config/ssl.key') as key: ssl_key = key.read() deploy_lega.config_secret('keyserver-secret', {'key1.sec': key1_data, 'ssl.cert': ssl_cert, 'ssl.key': ssl_key}) # Read conf from files with open(_here / 'extras/db.sql') as sql_init: init_sql = sql_init.read() with open(_here / 'extras/mq.sh') as mq_init: init_mq = mq_init.read() with open(_here / 'config/conf.ini') as conf_file: data_conf = conf_file.read() with open(_here / 'config/keys.ini') as keys_file: data_keys = keys_file.read() with open(_here / 'config/rabbitmq.config') as config: config_mq = config.read() with open(_here / 'config/defs.json') as defs: defs_mq = defs.read() # secret = deploy_lega.read_secret('keys-password') # enc_keys = conf.aes_encrypt(b64decode(secret.to_dict()['data']['password'].encode('utf-8')), data_keys.encode('utf-8'), md5) # with open(_here / 'config/keys.ini.enc', 'w') as enc_file: # enc_file.write(b64encode(enc_keys).decode('utf-8')) # Upload Configuration Maps deploy_lega.config_map('initsql', {'db.sql': init_sql}) deploy_lega.config_map('mq-config', {'defs.json': defs_mq, 'rabbitmq.config': config_mq}) deploy_lega.config_map('mq-entrypoint', {'mq.sh': init_mq}) deploy_lega.config_map('lega-config', {'conf.ini': data_conf}) deploy_lega.config_map('lega-keyserver-config', {'keys.ini': data_keys}) deploy_lega.config_map('lega-db-config', {'user': '******', 'dbname': 'lega'}) # Volumes deploy_lega.persistent_volume("postgres", "0.5Gi", accessModes=["ReadWriteMany"]) deploy_lega.persistent_volume("rabbitmq", "0.5Gi") deploy_lega.persistent_volume("inbox", "0.5Gi", accessModes=["ReadWriteMany"]) deploy_lega.persistent_volume_claim("db-storage", "postgres", "0.5Gi", accessModes=["ReadWriteMany"]) deploy_lega.persistent_volume_claim("mq-storage", "rabbitmq", "0.5Gi") deploy_lega.persistent_volume_claim("inbox", "inbox", "0.5Gi", accessModes=["ReadWriteMany"]) volume_db = client.V1Volume(name="data", persistent_volume_claim=client.V1PersistentVolumeClaimVolumeSource(claim_name="db-storage")) volume_rabbitmq = client.V1Volume(name="rabbitmq", persistent_volume_claim=client.V1PersistentVolumeClaimVolumeSource(claim_name="mq-storage")) volume_db_init = client.V1Volume(name="initsql", config_map=client.V1ConfigMapVolumeSource(name="initsql")) volume_mq_temp = client.V1Volume(name="mq-temp", config_map=client.V1ConfigMapVolumeSource(name="mq-config")) volume_mq_script = client.V1Volume(name="mq-entrypoint", config_map=client.V1ConfigMapVolumeSource(name="mq-entrypoint", default_mode=0o744)) volume_config = client.V1Volume(name="config", config_map=client.V1ConfigMapVolumeSource(name="lega-config")) # volume_ingest = client.V1Volume(name="ingest-conf", config_map=client.V1ConfigMapVolumeSource(name="lega-config")) volume_inbox = client.V1Volume(name="inbox", persistent_volume_claim=client.V1PersistentVolumeClaimVolumeSource(claim_name="inbox")) volume_keys = client.V1Volume(name="config", projected=client.V1ProjectedVolumeSource(sources=[pmap_ini_conf, pmap_ini_keys, sec_keys])) pvc_minio = client.V1PersistentVolumeClaim(metadata=client.V1ObjectMeta(name="data"), spec=client.V1PersistentVolumeClaimSpec(access_modes=["ReadWriteOnce"], resources=client.V1ResourceRequirements(requests={"storage": "10Gi"}))) # Deploy LocalEGA Pods deploy_lega.deployment('mapper', 'nbisweden/ega-base:latest', ["ega-id-mapper"], [], [mount_config], [volume_config], patch=True) deploy_lega.deployment('keys', 'nbisweden/ega-base:latest', ["ega-keyserver", "--keys", "/etc/ega/keys.ini"], [env_lega_pass, env_keys_pass], [mount_config], [volume_keys], ports=[8443], patch=True) deploy_lega.deployment('db', 'postgres:9.6', None, [env_db_pass, env_db_user, env_db_name, env_db_data], [mount_db_data, mound_db_init], [volume_db, volume_db_init], ports=[5432]) deploy_lega.deployment('ingest', 'nbisweden/ega-base:latest', ["ega-ingest"], [env_lega_pass, env_acc_s3, env_sec_s3, env_db_pass], [mount_config, mount_inbox], [volume_config, volume_inbox]) deploy_lega.stateful_set('minio', 'minio/minio:latest', None, [env_acc_minio, env_sec_minio], [mount_minio], None, args=["server", "/data"], vol_claims=[pvc_minio], ports=[9000]) deploy_lega.stateful_set('verify', 'nbisweden/ega-base:latest', ["ega-verify"], [env_acc_s3, env_sec_s3, env_lega_pass, env_db_pass], [mount_config], [volume_config]) deploy_lega.stateful_set('mq', 'rabbitmq:3.6.14-management', ["/script/mq.sh"], [env_cega_mq], [mount_mq_temp, mount_mq_script, mount_mq_rabbitmq], [volume_mq_temp, volume_mq_script, volume_rabbitmq], ports=[15672, 5672, 4369, 25672]) deploy_lega.stateful_set('inbox', 'nbisweden/ega-mina-inbox:latest', None, [env_inbox_mq, env_cega_api, env_cega_creds, env_inbox_port], [mount_inbox], [volume_inbox], ports=[2222]) # Ports ports_db = [client.V1ServicePort(protocol="TCP", port=5432, target_port=5432)] ports_inbox = [client.V1ServicePort(protocol="TCP", port=2222, target_port=2222)] ports_s3 = [client.V1ServicePort(name="web", protocol="TCP", port=9000)] ports_keys = [client.V1ServicePort(protocol="TCP", port=8443, target_port=8443)] ports_mq_management = [client.V1ServicePort(name="http", protocol="TCP", port=15672, target_port=15672)] ports_mq = [client.V1ServicePort(name="amqp", protocol="TCP", port=5672, target_port=5672), client.V1ServicePort(name="epmd", protocol="TCP", port=4369, target_port=4369), client.V1ServicePort(name="rabbitmq-dist", protocol="TCP", port=25672, target_port=25672)] # Deploy Services deploy_lega.service('db', ports_db) deploy_lega.service('mq-management', ports_mq_management, pod_name="mq", type="NodePort") deploy_lega.service('mq', ports_mq) deploy_lega.service('keys', ports_keys) deploy_lega.service('inbox', ports_inbox, type="NodePort") deploy_lega.service('minio', ports_s3) # Headless deploy_lega.service('minio-service', ports_s3, pod_name="minio", type="LoadBalancer") metric_cpu = client.V2beta1MetricSpec(type="Resource", resource=client.V2beta1ResourceMetricSource(name="cpu", target_average_utilization=50)) deploy_lega.horizontal_scale("ingest", "ingest", "Deployment", 5, [metric_cpu]) if fake_cega: deploy_fake_cega(deploy_lega)
def get_statefulset_object(cluster_object): name = cluster_object['metadata']['name'] namespace = cluster_object['metadata']['namespace'] try: replicas = cluster_object['spec']['mongodb']['replicas'] except KeyError: replicas = 3 try: mongodb_limit_cpu = \ cluster_object['spec']['mongodb']['mongodb_limit_cpu'] except KeyError: mongodb_limit_cpu = '100m' try: mongodb_limit_memory = \ cluster_object['spec']['mongodb']['mongodb_limit_memory'] except KeyError: mongodb_limit_memory = '64Mi' statefulset = client.V1beta1StatefulSet() # Metadata statefulset.metadata = client.V1ObjectMeta( name=name, namespace=namespace, labels=get_default_labels(name=name)) # Spec statefulset.spec = client.V1beta1StatefulSetSpec(replicas=replicas, service_name=name) statefulset.spec.template = client.V1PodTemplateSpec() statefulset.spec.template.metadata = client.V1ObjectMeta( labels=get_default_labels(name=name)) statefulset.spec.template.spec = client.V1PodSpec() statefulset.spec.template.spec.affinity = client.V1Affinity( pod_anti_affinity=client.V1PodAntiAffinity( required_during_scheduling_ignored_during_execution=[ client.V1PodAffinityTerm( topology_key='kubernetes.io/hostname', label_selector=client.V1LabelSelector(match_expressions=[ client.V1LabelSelectorRequirement( key='cluster', operator='In', values=[name]) ])) ])) # MongoDB container mongodb_port = client.V1ContainerPort(name='mongodb', container_port=27017, protocol='TCP') mongodb_tls_volumemount = client.V1VolumeMount( name='mongo-tls', read_only=True, mount_path='/etc/ssl/mongod') mongodb_data_volumemount = client.V1VolumeMount(name='mongo-data', read_only=False, mount_path='/data/db') mongodb_resources = client.V1ResourceRequirements(limits={ 'cpu': mongodb_limit_cpu, 'memory': mongodb_limit_memory }, requests={ 'cpu': mongodb_limit_cpu, 'memory': mongodb_limit_memory }) mongodb_container = client.V1Container( name='mongod', command=[ 'mongod', '--auth', '--replSet', name, '--sslMode', 'requireSSL', '--clusterAuthMode', 'x509', '--sslPEMKeyFile', '/etc/ssl/mongod/mongod.pem', '--sslCAFile', '/etc/ssl/mongod/ca.pem' ], image='mongo:3.4.1', ports=[mongodb_port], volume_mounts=[mongodb_tls_volumemount, mongodb_data_volumemount], resources=mongodb_resources) # Metrics container metrics_port = client.V1ContainerPort(name='metrics', container_port=9001, protocol='TCP') metrics_resources = client.V1ResourceRequirements(limits={ 'cpu': '50m', 'memory': '16Mi' }, requests={ 'cpu': '50m', 'memory': '16Mi' }) metrics_secret_name = '{}-monitoring-credentials'.format(name) metrics_username_env_var = client.V1EnvVar( name='MONGODB_MONITORING_USERNAME', value_from=client.V1EnvVarSource( secret_key_ref=client.V1SecretKeySelector(name=metrics_secret_name, key='username'))) metrics_password_env_var = client.V1EnvVar( name='MONGODB_MONITORING_PASSWORD', value_from=client.V1EnvVarSource( secret_key_ref=client.V1SecretKeySelector(name=metrics_secret_name, key='password'))) metrics_container = client.V1Container( name='prometheus-exporter', image='quay.io/kubestack/prometheus-mongodb-exporter:latest', command=[ '/bin/sh', '-c', '/bin/mongodb_exporter --mongodb.uri mongodb://${MONGODB_MONITORING_USERNAME}:${MONGODB_MONITORING_PASSWORD}@127.0.0.1:27017/admin --mongodb.tls-cert /etc/ssl/mongod/mongod.pem --mongodb.tls-ca /etc/ssl/mongod/ca.pem' ], # flake8: noqa ports=[metrics_port], resources=metrics_resources, volume_mounts=[mongodb_tls_volumemount], env=[metrics_username_env_var, metrics_password_env_var]) statefulset.spec.template.spec.containers = [ mongodb_container, metrics_container ] ca_volume = client.V1Volume(name='mongo-ca', secret=client.V1SecretVolumeSource( secret_name='{}-ca'.format(name), items=[ client.V1KeyToPath(key='ca.pem', path='ca.pem'), client.V1KeyToPath(key='ca-key.pem', path='ca-key.pem') ])) tls_volume = client.V1Volume(name='mongo-tls', empty_dir=client.V1EmptyDirVolumeSource()) data_volume = client.V1Volume(name='mongo-data', empty_dir=client.V1EmptyDirVolumeSource()) statefulset.spec.template.spec.volumes = [ ca_volume, tls_volume, data_volume ] # Init container # For now use annotation format for init_container to support K8s >= 1.5 statefulset.spec.template.metadata.annotations = { 'pod.beta.kubernetes.io/init-containers': '[{"name": "cert-init","image": "quay.io/kubestack/mongodb-init:latest","volumeMounts": [{"readOnly": true,"mountPath": "/etc/ssl/mongod-ca","name": "mongo-ca"}, {"mountPath": "/etc/ssl/mongod","name": "mongo-tls"}],"env": [{"name": "METADATA_NAME","valueFrom": {"fieldRef": {"apiVersion": "v1","fieldPath": "metadata.name"}}}, {"name": "NAMESPACE","valueFrom": {"fieldRef": {"apiVersion": "v1","fieldPath": "metadata.namespace"}}}],"command": ["ansible-playbook","member-cert.yml"],"imagePullPolicy": "Always"}]' } # flake8: noqa # tls_init_ca_volumemount = client.V1VolumeMount( # name='mongo-ca', # read_only=True, # mount_path='/etc/ssl/mongod-ca') # tls_init_container = client.V1Container( # name="cert-init", # image="quay.io/kubestack/mongodb-init:latest", # volume_mounts=[tls_init_ca_volumemount, mongodb_tls_volumemount], # env=[ # client.V1EnvVar( # name='METADATA_NAME', # value_from=client.V1EnvVarSource( # field_ref=client.V1ObjectFieldSelector( # api_version='v1', # field_path='metadata.name'))), # client.V1EnvVar( # name='NAMESPACE', # value_from=client.V1EnvVarSource( # field_ref=client.V1ObjectFieldSelector( # api_version='v1', # field_path='metadata.namespace')))], # command=["ansible-playbook", "member-cert.yml"]) # # statefulset.spec.template.spec.init_containers = [tls_init_container] return statefulset
def apply_rekcurd_to_kubernetes(project_id: int, application_id: str, service_level: str, version: str, insecure_host: str, insecure_port: int, replicas_default: int, replicas_minimum: int, replicas_maximum: int, autoscale_cpu_threshold: str, policy_max_surge: int, policy_max_unavailable: int, policy_wait_seconds: int, container_image: str, resource_request_cpu: str, resource_request_memory: str, resource_limit_cpu: str, resource_limit_memory: str, commit_message: str, service_model_assignment: int, service_git_url: str = "", service_git_branch: str = "", service_boot_script: str = "", debug_mode: bool = False, service_id: str = None, is_creation_mode: bool = False, display_name: str = None, description: str = None, kubernetes_models=None, **kwargs) -> str: """ kubectl apply :param project_id: :param application_id: :param service_level: :param version: :param insecure_host: :param insecure_port: :param replicas_default: :param replicas_minimum: :param replicas_maximum: :param autoscale_cpu_threshold: :param policy_max_surge: :param policy_max_unavailable: :param policy_wait_seconds: :param container_image: :param resource_request_cpu: :param resource_request_memory: :param resource_limit_cpu: :param resource_limit_memory: :param commit_message: :param service_model_assignment: :param service_git_url: :param service_git_branch: :param service_boot_script: :param debug_mode: :param service_id: :param is_creation_mode: :param display_name: :param description: :param kubernetes_models: :param kwargs: :return: """ __num_retry = 5 progress_deadline_seconds = \ int(__num_retry*policy_wait_seconds*replicas_maximum/(policy_max_surge+policy_max_unavailable)) if service_id is None: is_creation_mode = True service_id = uuid.uuid4().hex if kubernetes_models is None: kubernetes_models = db.session.query(KubernetesModel).filter( KubernetesModel.project_id == project_id).all() data_server_model: DataServerModel = db.session.query( DataServerModel).filter( DataServerModel.project_id == project_id).first_or_404() application_model: ApplicationModel = db.session.query( ApplicationModel).filter( ApplicationModel.application_id == application_id).first_or_404() application_name = application_model.application_name model_model: ModelModel = db.session.query(ModelModel).filter( ModelModel.model_id == service_model_assignment).first_or_404() from kubernetes import client try: git_secret = load_secret(project_id, application_id, service_level, GIT_SECRET_PREFIX) except: git_secret = None volume_mounts = dict() volumes = dict() if git_secret: connector_name = "sec-git-name" secret_name = "sec-{}-{}".format(GIT_SECRET_PREFIX, application_id) volume_mounts = { 'volume_mounts': [ client.V1VolumeMount(name=connector_name, mount_path=GIT_SSH_MOUNT_DIR, read_only=True) ] } volumes = { 'volumes': [ client.V1Volume(name=connector_name, secret=client.V1SecretVolumeSource( secret_name=secret_name, items=[ client.V1KeyToPath(key=GIT_ID_RSA, path=GIT_ID_RSA, mode=GIT_SSH_MODE), client.V1KeyToPath(key=GIT_CONFIG, path=GIT_CONFIG, mode=GIT_SSH_MODE) ])) ] } for kubernetes_model in kubernetes_models: full_config_path = get_full_config_path(kubernetes_model.config_path) from kubernetes import config config.load_kube_config(full_config_path) pod_env = [ client.V1EnvVar(name="REKCURD_SERVICE_UPDATE_FLAG", value=commit_message), client.V1EnvVar(name="REKCURD_KUBERNETES_MODE", value="True"), client.V1EnvVar(name="REKCURD_DEBUG_MODE", value=str(debug_mode)), client.V1EnvVar(name="REKCURD_APPLICATION_NAME", value=application_name), client.V1EnvVar(name="REKCURD_SERVICE_INSECURE_HOST", value=insecure_host), client.V1EnvVar(name="REKCURD_SERVICE_INSECURE_PORT", value=str(insecure_port)), client.V1EnvVar(name="REKCURD_SERVICE_ID", value=service_id), client.V1EnvVar(name="REKCURD_SERVICE_LEVEL", value=service_level), client.V1EnvVar(name="REKCURD_GRPC_PROTO_VERSION", value=version), client.V1EnvVar(name="REKCURD_MODEL_MODE", value=data_server_model.data_server_mode.value), client.V1EnvVar(name="REKCURD_MODEL_FILE_PATH", value=model_model.filepath), client.V1EnvVar(name="REKCURD_CEPH_ACCESS_KEY", value=str(data_server_model.ceph_access_key or "xxx")), client.V1EnvVar(name="REKCURD_CEPH_SECRET_KEY", value=str(data_server_model.ceph_secret_key or "xxx")), client.V1EnvVar(name="REKCURD_CEPH_HOST", value=str(data_server_model.ceph_host or "xxx")), client.V1EnvVar(name="REKCURD_CEPH_PORT", value=str(data_server_model.ceph_port or "1234")), client.V1EnvVar(name="REKCURD_CEPH_IS_SECURE", value=str(data_server_model.ceph_is_secure or "False")), client.V1EnvVar(name="REKCURD_CEPH_BUCKET_NAME", value=str(data_server_model.ceph_bucket_name or "xxx")), client.V1EnvVar(name="REKCURD_AWS_ACCESS_KEY", value=str(data_server_model.aws_access_key or "xxx")), client.V1EnvVar(name="REKCURD_AWS_SECRET_KEY", value=str(data_server_model.aws_secret_key or "xxx")), client.V1EnvVar(name="REKCURD_AWS_BUCKET_NAME", value=str(data_server_model.aws_bucket_name or "xxx")), client.V1EnvVar(name="REKCURD_GCS_ACCESS_KEY", value=str(data_server_model.gcs_access_key or "xxx")), client.V1EnvVar(name="REKCURD_GCS_SECRET_KEY", value=str(data_server_model.gcs_secret_key or "xxx")), client.V1EnvVar(name="REKCURD_GCS_BUCKET_NAME", value=str(data_server_model.gcs_bucket_name or "xxx")), client.V1EnvVar(name="REKCURD_SERVICE_GIT_URL", value=service_git_url), client.V1EnvVar(name="REKCURD_SERVICE_GIT_BRANCH", value=service_git_branch), client.V1EnvVar(name="REKCURD_SERVICE_BOOT_SHELL", value=service_boot_script), ] """Namespace registration.""" core_vi_api = client.CoreV1Api() try: core_vi_api.read_namespace(name=service_level) except: api.logger.info("\"{}\" namespace created".format(service_level)) v1_namespace = client.V1Namespace( api_version="v1", kind="Namespace", metadata=client.V1ObjectMeta(name=service_level)) core_vi_api.create_namespace(body=v1_namespace) """Create/patch Deployment.""" v1_deployment = client.V1Deployment( api_version="apps/v1", kind="Deployment", metadata=client.V1ObjectMeta(name="deploy-{0}".format(service_id), namespace=service_level, labels={ "rekcurd-worker": "True", "id": application_id, "name": application_name, "sel": service_id }), spec=client.V1DeploymentSpec( min_ready_seconds=policy_wait_seconds, progress_deadline_seconds=progress_deadline_seconds, replicas=replicas_default, revision_history_limit=3, selector=client.V1LabelSelector( match_labels={"sel": service_id}), strategy=client.V1DeploymentStrategy( type="RollingUpdate", rolling_update=client.V1RollingUpdateDeployment( max_surge=policy_max_surge, max_unavailable=policy_max_unavailable)), template=client.V1PodTemplateSpec( metadata=client.V1ObjectMeta( labels={ "rekcurd-worker": "True", "id": application_id, "name": application_name, "sel": service_id }), spec=client.V1PodSpec(affinity=client.V1Affinity( pod_anti_affinity=client.V1PodAntiAffinity( preferred_during_scheduling_ignored_during_execution =[ client.V1WeightedPodAffinityTerm( pod_affinity_term=client.V1PodAffinityTerm( label_selector=client. V1LabelSelector(match_expressions=[ client.V1LabelSelectorRequirement( key="id", operator="In", values=[service_id]) ]), topology_key="kubernetes.io/hostname"), weight=100) ])), containers=[ client.V1Container( env=pod_env, image=container_image, image_pull_policy="Always", name=service_id, ports=[ client.V1ContainerPort( container_port= insecure_port) ], resources=client. V1ResourceRequirements( limits={ "cpu": str(resource_limit_cpu ), "memory": resource_limit_memory }, requests={ "cpu": str(resource_request_cpu ), "memory": resource_request_memory }), security_context=client. V1SecurityContext( privileged=True), **volume_mounts) ], node_selector={ "host": service_level }, **volumes)))) apps_v1_api = client.AppsV1Api() if is_creation_mode: api.logger.info("Deployment created.") apps_v1_api.create_namespaced_deployment(body=v1_deployment, namespace=service_level) else: api.logger.info("Deployment patched.") apps_v1_api.patch_namespaced_deployment( body=v1_deployment, name="deploy-{0}".format(service_id), namespace=service_level) """Create/patch Service.""" v1_service = client.V1Service( api_version="v1", kind="Service", metadata=client.V1ObjectMeta(name="svc-{0}".format(service_id), namespace=service_level, labels={ "rekcurd-worker": "True", "id": application_id, "name": application_name, "sel": service_id }), spec=client.V1ServiceSpec(ports=[ client.V1ServicePort(name="grpc-backend", port=insecure_port, protocol="TCP", target_port=insecure_port) ], selector={"sel": service_id})) core_vi_api = client.CoreV1Api() if is_creation_mode: api.logger.info("Service created.") core_vi_api.create_namespaced_service(namespace=service_level, body=v1_service) else: api.logger.info("Service patched.") core_vi_api.patch_namespaced_service( namespace=service_level, name="svc-{0}".format(service_id), body=v1_service) """Create/patch Autoscaler.""" v1_horizontal_pod_autoscaler = client.V1HorizontalPodAutoscaler( api_version="autoscaling/v1", kind="HorizontalPodAutoscaler", metadata=client.V1ObjectMeta(name="hpa-{0}".format(service_id), namespace=service_level, labels={ "rekcurd-worker": "True", "id": application_id, "name": application_name, "sel": service_id }), spec=client.V1HorizontalPodAutoscalerSpec( max_replicas=replicas_maximum, min_replicas=replicas_minimum, scale_target_ref=client.V1CrossVersionObjectReference( api_version="apps/v1", kind="Deployment", name="deploy-{0}".format(service_id)), target_cpu_utilization_percentage=autoscale_cpu_threshold)) autoscaling_v1_api = client.AutoscalingV1Api() if is_creation_mode: api.logger.info("Autoscaler created.") autoscaling_v1_api.create_namespaced_horizontal_pod_autoscaler( namespace=service_level, body=v1_horizontal_pod_autoscaler) else: api.logger.info("Autoscaler patched.") autoscaling_v1_api.patch_namespaced_horizontal_pod_autoscaler( namespace=service_level, name="hpa-{0}".format(service_id), body=v1_horizontal_pod_autoscaler) """Create Istio ingress if this is the first application.""" custom_object_api = client.CustomObjectsApi() try: custom_object_api.get_namespaced_custom_object( group="networking.istio.io", version="v1alpha3", namespace=service_level, plural="virtualservices", name="ing-vs-{0}".format(application_id), ) except: ingress_virtual_service_body = { "apiVersion": "networking.istio.io/v1alpha3", "kind": "VirtualService", "metadata": { "labels": { "rekcurd-worker": "True", "id": application_id, "name": application_name }, "name": "ing-vs-{0}".format(application_id), "namespace": service_level }, "spec": { "hosts": ["*"], "gateways": ["rekcurd-ingress-gateway"], "http": [{ "match": [{ "headers": { "x-rekcurd-application-name": { "exact": application_name }, "x-rekcurd-sevice-level": { "exact": service_level }, "x-rekcurd-grpc-version": { "exact": version }, } }], "route": [{ "destination": { "port": { "number": insecure_port }, "host": "svc-{0}".format(service_id) }, "weight": 100 }], "retries": { "attempts": 25, "perTryTimeout": "1s" } }] } } api.logger.info("Istio created.") custom_object_api.create_namespaced_custom_object( group="networking.istio.io", version="v1alpha3", namespace=service_level, plural="virtualservices", body=ingress_virtual_service_body) """Add service model.""" if is_creation_mode: if display_name is None: display_name = "{0}-{1}".format(service_level, service_id) service_model = ServiceModel(service_id=service_id, application_id=application_id, display_name=display_name, description=description, service_level=service_level, version=version, model_id=service_model_assignment, insecure_host=insecure_host, insecure_port=insecure_port) db.session.add(service_model) db.session.flush() """Finish.""" return service_id
def createJob(username, image, replicas, train_data=None, test_data=None, train_mode="cpu", is_host_network=False, ssh_port="22"): job_name = username + "-horovod" # TODO: port 번호 수정 new_ssh_port = int(ssh_port) new_ssh_port += 1 new_ssh_port = "{}".format(new_ssh_port) job = client.V1Job() job.metadata = client.V1ObjectMeta(name=job_name, labels={ "app": "horovod", "user": username, "role": "master" }) # Job Spec 정의 job_spec = client.V1JobSpec(template="") # job_spec.metadata = client.V1ObjectMeta(name=job_name, labels={ # "app": "horovod", # "user": username, # "role": "master" # }) # Job Spec의 Pod Template 정의 pod_template_spec = client.V1PodTemplateSpec() pod_template_spec.restart_policy = "OnFailure" pod_template_spec.metadata = client.V1ObjectMeta(name=job_name, labels={ "app": "horovod", "user": username, "role": "master" }) pod_template_spec.spec = "" # Pod Spec 정의 pod_spec = client.V1PodSpec(containers=[""], restart_policy="OnFailure") # Container 정의 container = client.V1Container(name=job_name + "-master") container.image = image container.image_pull_policy = "IfNotPresent" container.env = [ client.V1EnvVar(name="SSHPORT", value=ssh_port), client.V1EnvVar(name="USESECRETS", value="true"), # TODO: 바꾸기 client.V1EnvVar(name="ENTRY_POINT", value="train.py"), client.V1EnvVar(name="JOB_NAME", value=username) ] container.ports = [client.V1ContainerPort(container_port=int(ssh_port))] container.volume_mounts = [ client.V1VolumeMount(name=job_name + "-cm", mount_path="/horovod/generated"), client.V1VolumeMount(name=job_name + "-secret", mount_path="/etc/secret-volume", read_only=True), client.V1VolumeMount(name=job_name + "-data", mount_path="/horovod/data") ] container.command = ["/horovod/generated/run.sh"] cpu_mode_stub = "" if train_mode == "cpu": cpu_mode_stub = "ldconfig /usr/local/cuda/lib64/stubs;" # TODO: cpu, gpu, 학습 코드 container.args = [ "ldconfig /usr/local/cuda/lib64/stubs && mpirun -np {replicas} --hostfile /horovod/generated/hostfile\ --mca orte_keep_fqdn_hostnames t --allow-run-as-root --display-map --tag-output\ --timestamp-output sh -c '{cpu_mode_stub} python /horovod/data/train.py'" .format(replicas=replicas, cpu_mode_stub=cpu_mode_stub) ] pod_spec.volumes = [ client.V1Volume(name=job_name + "-cm", config_map=client.V1ConfigMapVolumeSource( name=job_name, items=[ client.V1KeyToPath(key="hostfile.config", path="hostfile", mode=438), client.V1KeyToPath( key="master.waitWorkerReady", path="waitWorkerReady.sh", mode=365), client.V1KeyToPath(key="master.run", path="run.sh", mode=365) ])), client.V1Volume(name=job_name + "-secret", secret=client.V1SecretVolumeSource( secret_name=job_name, default_mode=448, items=[ client.V1KeyToPath(key="host-key", path="id_rsa"), client.V1KeyToPath(key="host-key-pub", path="authorized_keys") ])), client.V1Volume(name=job_name + "-data", empty_dir=client.V1EmptyDirVolumeSource()) ] # TODO: 지금은 Node Selector인데 삭제하고 어떻게 할지 생각하기 # pod_spec.node_selector = { # "node-role": "master" # } pod_spec.containers = [container] if is_host_network == True: pod_spec.host_network = True pod_spec.dns_policy = "ClusterFirstWithHostNet" # init container pod_spec.init_containers = [ client.V1Container( name="wait-workers", image=image, image_pull_policy="IfNotPresent", env=[ client.V1EnvVar(name="SSHPORT", value=ssh_port), client.V1EnvVar(name="USESECRETS", value="true") ], command=[ "/horovod/generated/waitWorkerReady.sh", # TODO: S3 주소 다시 세팅하기. ], args=["/horovod/generated/hostfile"], volume_mounts=[ client.V1VolumeMount(name=job_name + "-cm", mount_path="/horovod/generated"), client.V1VolumeMount(name=job_name + "-secret", mount_path="/etc/secret-volume", read_only=True), client.V1VolumeMount(name=job_name + "-data", mount_path="/horovod/data") ]), client.V1Container( name="download-data", image=image, image_pull_policy="IfNotPresent", command=["/bin/bash", "-c"], args=[ "curl http://ywj-horovod.s3.ap-northeast-2.amazonaws.com/horovod/" + username + "/train.py > /horovod/data/train.py" ], volume_mounts=[ client.V1VolumeMount(name=job_name + "-data", mount_path="/horovod/data") ]) ] init_args = [] # 학습 코드 다운로드 init_args.append( "curl http://ywj-horovod.s3.ap-northeast-2.amazonaws.com/horovod/" + username + "/train.py > /horovod/data/train.py") if train_data != None: init_args.append( "curl http://ywj-horovod.s3.ap-northeast-2.amazonaws.com/{username}/{train_data} > /horovod/data/{train_data}" .format(username=username, train_data=train_data)) if test_data != None: init_args.append( "curl http://ywj-horovod.s3.ap-northeast-2.amazonaws.com/{username}/{test_data} > /horovod/data/{test_data}" .format(username=username, test_data=test_data)) pod_spec.init_containers[1].args = init_args print(pod_spec.init_containers[1].args) pod_template_spec.spec = pod_spec job_spec.template = pod_template_spec job.spec = job_spec return job
def createStatefulSet(username, replicas, image, is_host_network=False, ssh_port="22"): statefulset_name = username + "-horovod" statefulset = client.V1StatefulSet() #statefulset.api_version="apps/v1beta2" statefulset.metadata = client.V1ObjectMeta(name=statefulset_name, labels={ "app": "horovod", "user": username, "role": "worker" }) label_selector = client.V1LabelSelector(match_labels={ "app": "horovod", "user": username, "role": "worker" }) # Pod template 정의 pod_template = client.V1PodTemplateSpec() pod_template.metadata = client.V1ObjectMeta(labels={ "app": "horovod", "user": username, "role": "worker" }) container = client.V1Container(name="worker") container.image = image container.image_pull_policy = "IfNotPresent" container.env = [ client.V1EnvVar(name="SSHPORT", value=ssh_port), client.V1EnvVar(name="USESECRETS", value="true"), # TODO: 바꾸기 client.V1EnvVar(name="ENTRY_POINT", value="train.py") ] container.ports = [client.V1ContainerPort(container_port=22)] container.volume_mounts = [ client.V1VolumeMount(name=statefulset_name + "-cm", mount_path="/horovod/generated"), client.V1VolumeMount(name=statefulset_name + "-secret", mount_path="/etc/secret-volume", read_only=True), client.V1VolumeMount(name=statefulset_name + "-data", mount_path="/horovod/data") ] container.command = ["/horovod/generated/run.sh"] container.readiness_probe = client.V1Probe( _exec=client.V1ExecAction(command=["/horovod/generated/check.sh"]), initial_delay_seconds=1, period_seconds=2) pod_spec = client.V1PodSpec(containers=[container]) # Host Network가 설정되있다면 if is_host_network == True: pod_spec.host_network = True pod_spec.dns_policy = "ClusterFirstWithHostNet" pod_spec.volumes = [ client.V1Volume(name=statefulset_name + "-cm", config_map=client.V1ConfigMapVolumeSource( name=statefulset_name, items=[ client.V1KeyToPath(key="hostfile.config", path="hostfile", mode=438), client.V1KeyToPath(key="ssh.readiness", path="check.sh", mode=365), client.V1KeyToPath(key="worker.run", path="run.sh", mode=365) ])), client.V1Volume(name=statefulset_name + "-secret", secret=client.V1SecretVolumeSource( secret_name=statefulset_name, default_mode=448, items=[ client.V1KeyToPath(key="host-key", path="id_rsa"), client.V1KeyToPath(key="host-key-pub", path="authorized_keys") ])), client.V1Volume(name=statefulset_name + "-data", empty_dir=client.V1EmptyDirVolumeSource()) ] pod_spec.subdomain = statefulset_name pod_spec.hostname = statefulset_name pod_spec.init_containers = [ client.V1Container( name="download-data", image=image, image_pull_policy="IfNotPresent", command=["/bin/bash", "-c"], args=[ "curl http://ywj-horovod.s3.ap-northeast-2.amazonaws.com/horovod/" + username + "/train.py > /horovod/data/train.py" ], volume_mounts=[ client.V1VolumeMount(name=statefulset_name + "-data", mount_path="/horovod/data") ]) ] pod_template.spec = pod_spec statefulset.spec = client.V1StatefulSetSpec( selector=label_selector, service_name=statefulset_name + "-worker", # https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#pod-identity pod_management_policy="Parallel", replicas=replicas, template=pod_template) return statefulset
def __create_job_def(self, post_processing_required=False): # initialize the job def body self.inst_name = self.name if self.job_count > 1: self.inst_name = self.inst_name + '-' + str(self.job_count) self.job_count += 1 job_def = client.V1Job(kind="Job") job_def.metadata = client.V1ObjectMeta(namespace=self.namespace, name=self.inst_name) # initialize job pieces self.job_containers = [] volume_mounts = [] volumes = [] containers = [] init_containers = [] env_variables = [] if not self.volume_name: # use the task name so it can be used across multiple jobs self.volume_name = self.name + '-pd' # build volume mounts volume_mounts = [] volume_mounts.append( client.V1VolumeMount(mount_path=self.wrk_dir, name=self.volume_name)) cpu_request_max = self.nodepool_info['max_cpu'] - self.cpu_reserve mem_request_max = self.nodepool_info['max_mem'] - self.mem_reserve # define resource limits/requests resource_def = client.V1ResourceRequirements( limits={ 'cpu': cpu_request_max, 'memory': str(mem_request_max) + 'G' }, requests={ 'cpu': cpu_request_max * .8, 'memory': str(mem_request_max - 1) + 'G' }) # update script task with job info if self.script_task: self.script_task.cpu_request = cpu_request_max * .8 self.script_task.cpu_max = cpu_request_max self.script_task.memory_request = mem_request_max - 1 self.script_task.memory_max = mem_request_max self.script_task.instance_name = self.inst_name self.script_task.force_standard = not self.preemptible self.script_task.pool_name = str(self.node_label) self.script_task.instance_type = str( self.nodepool_info["inst_type"]) # place the job in the appropriate node pool node_label_dict = {'poolName': str(self.node_label)} # build volumes volumes.append( client.V1Volume( name=self.volume_name, persistent_volume_claim=client. V1PersistentVolumeClaimVolumeSource(claim_name=self.pvc_name))) # incorporate configured persistent volumes if associated with the current task if self.extra_persistent_volumes: for pv in self.extra_persistent_volumes: if pv['task_prefix'] in self.name: claim_name = pv["pvc_name"] if 'dynamic' in pv and pv['dynamic']: claim_name = claim_name[: 57] + '-' + Platform.generate_unique_id( id_len=5) # need to add the extra persistent volume volume_mounts.append( client.V1VolumeMount(mount_path=pv["path"], name=pv['volume_name'], read_only=pv['read_only'])) volumes.append( client.V1Volume(name=pv['volume_name'], persistent_volume_claim=client. V1PersistentVolumeClaimVolumeSource( claim_name=claim_name))) # specify volumes for script task if self.script_task: if 'dynamic' in pv and pv['dynamic']: self.script_task.extra_volumes.append({ "path": pv["path"], "name": pv["volume_name"], "storage": pv["size"], "read_only": pv["read_only"], "claim_name": claim_name, "command": pv["copy_command"], "dynamic": True }) else: self.script_task.extra_volumes.append({ "path": pv["path"], "name": pv["volume_name"], "read_only": pv["read_only"], "claim_name": claim_name, "dynamic": False }) # incorporate configured secrets if self.gcp_secret_configured: volume_mounts.append( client.V1VolumeMount( mount_path="/etc/cloud_conductor/gcp.json", sub_path="gcp.json", name="secret-volume", read_only=True)) volumes.append( client.V1Volume(name="secret-volume", secret=client.V1SecretVolumeSource( secret_name="cloud-conductor-config", items=[ client.V1KeyToPath(key="gcp_json", path="gcp.json") ]))) env_variables.append( client.V1EnvVar(name='GOOGLE_APPLICATION_CREDENTIALS', value='/etc/cloud_conductor/gcp.json')) env_variables.append( client.V1EnvVar(name='RCLONE_CONFIG_GS_TYPE', value='google cloud storage')) env_variables.append( client.V1EnvVar(name='RCLONE_CONFIG_GS_SERVICE_ACCOUNT_FILE', value='$GOOGLE_APPLICATION_CREDENTIALS')) env_variables.append( client.V1EnvVar(name='RCLONE_CONFIG_GS_OBJECT_ACL', value='projectPrivate')) env_variables.append( client.V1EnvVar(name='RCLONE_CONFIG_GS_BUCKET_ACL', value='projectPrivate')) if self.aws_secret_configured: env_variables.append( client.V1EnvVar( name='AWS_ACCESS_KEY_ID', value_from=client.V1EnvVarSource( secret_key_ref=client.V1SecretKeySelector( name='cloud-conductor-config', key='aws_id')))) env_variables.append( client.V1EnvVar( name='AWS_SECRET_ACCESS_KEY', value_from=client.V1EnvVarSource( secret_key_ref=client.V1SecretKeySelector( name='cloud-conductor-config', key='aws_access')))) env_variables.append( client.V1EnvVar(name='RCLONE_CONFIG_S3_TYPE', value='s3')) env_variables.append( client.V1EnvVar( name='RCLONE_CONFIG_S3_ACCESS_KEY_ID', value_from=client.V1EnvVarSource( secret_key_ref=client.V1SecretKeySelector( name='cloud-conductor-config', key='aws_id')))) env_variables.append( client.V1EnvVar( name='RCLONE_CONFIG_S3_SECRET_ACCESS_KEY', value_from=client.V1EnvVarSource( secret_key_ref=client.V1SecretKeySelector( name='cloud-conductor-config', key='aws_access')))) storage_image = 'gcr.io/cloud-builders/gsutil' storage_tasks = ['mkdir_', 'grant_'] container_name_list = [] for k, v in self.processes.items(): # if the process is for storage (i.e. mkdir, etc.) entrypoint = ["/bin/bash", "-c"] if any(x in k for x in storage_tasks) or not v['docker_image']: container_image = storage_image else: container_image = v['docker_image'] if v['docker_entrypoint'] is not None and v[ 'original_cmd'].find(v['docker_entrypoint']) == -1: v['original_cmd'] = v['docker_entrypoint'] + ' ' + v[ 'original_cmd'] if 'rclone' in container_image: v['original_cmd'] = v['original_cmd'].replace( "|&", "2>&1 |") entrypoint = ["/bin/sh", "-c"] args = v['original_cmd'] if not isinstance(args, list): args = [v['original_cmd'].replace("sudo ", "")] args = " && ".join(args) args = args.replace("\n", " ") args = args.replace("java.io.tmpdir=/tmp/", "java.io.tmpdir=/data/tmp/") if "awk " in args: args = re.sub("'\"'\"'", "'", args) if "gsutil" in args: args = "gcloud auth activate-service-account --key-file $GOOGLE_APPLICATION_CREDENTIALS && sleep 10; " + args # add in pipe error handling # if "copy_input" in k or "copy_output" in k: # args = "set -o pipefail && " + args logging.debug(f"({self.name}) Command for task {k} is : {args}") # format the container name and roll call to logging container_name = k.replace("_", "-").replace(".", "-").lower() formatted_container_name = container_name[: 57] + '-' + Platform.generate_unique_id( id_len=5) while formatted_container_name in container_name_list: # make sure all container names are unique formatted_container_name = container_name[: 57] + '-' + Platform.generate_unique_id( id_len=5) container_name_list.append(formatted_container_name) # args = f">&2 echo STARTING TASK {container_name} && " + args containers.append( client.V1Container( # lifecycle=client.V1Lifecycle(post_start=post_start_handler), image=container_image, command=entrypoint, args=[args], name=formatted_container_name, volume_mounts=volume_mounts, env=env_variables, resources=resource_def, image_pull_policy='IfNotPresent')) if self.script_task and container_name not in self.script_task.commands: self.script_task.commands[container_name] = ({ "name": formatted_container_name, "docker_image": container_image, "entrypoint": entrypoint, "args": [args] }) job_spec = dict(backoff_limit=self.default_num_cmd_retries) self.job_containers = containers # Run jobs in order using init_containers # See https://kubernetes.io/docs/concepts/workloads/pods/init-containers/ if len(containers) > 1: init_containers = containers[:-1] containers = [containers[-1]] else: containers = containers init_containers = None # define the pod spec job_template = client.V1PodTemplateSpec() job_labels = {} job_labels[self.inst_name] = 'CC-Job' # add annotation to prevent autoscaler from killing nodes running jobs annotations = { 'cluster-autoscaler.kubernetes.io/safe-to-evict': 'false' } job_template.metadata = client.V1ObjectMeta(labels=job_labels, annotations=annotations) job_template.spec = client.V1PodSpec( init_containers=init_containers, containers=containers, volumes=volumes, restart_policy='Never', termination_grace_period_seconds=self.termination_seconds, node_selector=node_label_dict) job_def.spec = client.V1JobSpec(template=job_template, **job_spec) if self.script_task: self.script_task.num_retries = self.default_num_cmd_retries for k, v in job_labels.items(): self.script_task.labels.append({"key": k, "value": v}) for k, v in annotations.items(): self.script_task.annotations.append({"key": k, "value": v}) return job_def
def create_deployment(apps_v1_api, username, token, gpu): name = 'jlab-{}'.format(username) try: init_container = client.V1Container( name='{}-init'.format(name), image="ubuntu:18.04", image_pull_policy="IfNotPresent", command=["/bin/sh"], args=["-c", "chown 1001:1001 /persistent_volume"], volume_mounts=[ client.V1VolumeMount( name='persistent-volume', mount_path="/persistent_volume", sub_path='{}/jupyter'.format(username) ) ] ) if gpu == True: limits = { 'nvidia.com/gpu': 1 } else: limits = None container = client.V1Container( name=name, image=envvars.DOCKER_IMAGE_JLAB_SERVER, resources=client.V1ResourceRequirements( limits=limits ), image_pull_policy="Always", ports=[client.V1ContainerPort(container_port=8888)], env=[ client.V1EnvVar( name='DES_USER', value=username ), client.V1EnvVar( name='PIP_TARGET', value='/home/jovyan/work/.pip' ), client.V1EnvVar( name='PYTHONPATH', value='/home/jovyan/work/.pip' ) ], volume_mounts=[ client.V1VolumeMount( name='jupyter-config', mount_path="/home/jovyan/.jupyter/" ), client.V1VolumeMount( name='persistent-volume', mount_path="/home/jovyan/jobs/cutout", sub_path='{}/cutout'.format(username) ), client.V1VolumeMount( name='persistent-volume', mount_path="/home/jovyan/jobs/query", sub_path='{}/query'.format(username) ), client.V1VolumeMount( name='persistent-volume', mount_path="/home/jovyan/work", sub_path='{}/jupyter'.format(username) ) ] ) volume_config = client.V1Volume( name='jupyter-config', config_map=client.V1ConfigMapVolumeSource( name=name, items=[client.V1KeyToPath( key=name, path="jupyter_notebook_config.py" )] ) ) volume_persistent = client.V1Volume( name='persistent-volume', persistent_volume_claim=client.V1PersistentVolumeClaimVolumeSource( claim_name=envvars.PVC_NAME_BASE ) ) # Template template = client.V1PodTemplateSpec( metadata=client.V1ObjectMeta(labels={"app": name}), spec=client.V1PodSpec( image_pull_secrets=[ client.V1LocalObjectReference( name='registry-auth' ) ], init_containers=[ init_container ], containers=[ container ], volumes=[ volume_config, volume_persistent ], node_selector = {'gpu': '{}'.format(gpu).lower()} ) ) # Spec spec = client.V1DeploymentSpec( replicas=1, template=template, selector=client.V1LabelSelector( match_labels=dict({'app': name}) ) ) # Deployment deployment = client.V1Deployment( api_version="apps/v1", kind="Deployment", metadata=client.V1ObjectMeta(name=name), spec=spec) # Creation of the Deployment in specified namespace api_response = apps_v1_api.create_namespaced_deployment( namespace=namespace, body=deployment ) # logger.info('Deployment created:\n{}'.format(api_response)) except ApiException as e: error_msg = str(e).strip() logger.error(error_msg)
def get_statefulset_object(cluster_object): name = cluster_object['metadata']['name'] namespace = cluster_object['metadata']['namespace'] try: replicas = cluster_object['spec']['mongodb']['replicas'] except KeyError: replicas = 3 try: mongodb_limit_cpu = \ cluster_object['spec']['mongodb']['mongodb_limit_cpu'] except KeyError: mongodb_limit_cpu = '100m' try: mongodb_limit_memory = \ cluster_object['spec']['mongodb']['mongodb_limit_memory'] except KeyError: mongodb_limit_memory = '64Mi' try: hard_pod_anti_affinity = \ cluster_object['spec']['mongodb']['hard_pod_anti_affinity'] except KeyError: hard_pod_anti_affinity = True statefulset = client.V1beta1StatefulSet() # Metadata statefulset.metadata = client.V1ObjectMeta( name=name, namespace=namespace, labels=get_default_labels(name=name)) # Spec statefulset.spec = client.V1beta1StatefulSetSpec( replicas=replicas, service_name=name, template=client.V1PodTemplateSpec()) statefulset.spec.template.metadata = client.V1ObjectMeta( labels=get_default_labels(name=name)) statefulset.spec.template.spec = client.V1PodSpec(containers=[]) pod_affinity_term = client.V1PodAffinityTerm( topology_key='kubernetes.io/hostname', label_selector=client.V1LabelSelector(match_expressions=[ client.V1LabelSelectorRequirement( key='cluster', operator='In', values=[name]) ])) pod_anti_affinity = client.V1PodAntiAffinity( required_during_scheduling_ignored_during_execution=[ pod_affinity_term ]) if not hard_pod_anti_affinity: pod_anti_affinity = client.V1PodAntiAffinity( preferred_during_scheduling_ignored_during_execution=[ client.V1WeightedPodAffinityTerm( weight=100, pod_affinity_term=pod_affinity_term) ]) statefulset.spec.template.spec.affinity = client.V1Affinity( pod_anti_affinity=pod_anti_affinity) # MongoDB container mongodb_port = client.V1ContainerPort(name='mongodb', container_port=27017, protocol='TCP') mongodb_tls_volumemount = client.V1VolumeMount( name='mongo-tls', read_only=True, mount_path='/etc/ssl/mongod') mongodb_data_volumemount = client.V1VolumeMount(name='mongo-data', read_only=False, mount_path='/data/db') mongodb_resources = client.V1ResourceRequirements(limits={ 'cpu': mongodb_limit_cpu, 'memory': mongodb_limit_memory }, requests={ 'cpu': mongodb_limit_cpu, 'memory': mongodb_limit_memory }) mongodb_container = client.V1Container( name='mongod', env=[ client.V1EnvVar( name='POD_IP', value_from=client.V1EnvVarSource( field_ref=client.V1ObjectFieldSelector( api_version='v1', field_path='status.podIP'))) ], command=[ 'mongod', '--auth', '--replSet', name, '--sslMode', 'requireSSL', '--clusterAuthMode', 'x509', '--sslPEMKeyFile', '/etc/ssl/mongod/mongod.pem', '--sslCAFile', '/etc/ssl/mongod/ca.pem', '--bind_ip', '127.0.0.1,$(POD_IP)' ], image='mongo:3.6.4', ports=[mongodb_port], volume_mounts=[mongodb_tls_volumemount, mongodb_data_volumemount], resources=mongodb_resources) # Metrics container metrics_port = client.V1ContainerPort(name='metrics', container_port=9001, protocol='TCP') metrics_resources = client.V1ResourceRequirements(limits={ 'cpu': '50m', 'memory': '16Mi' }, requests={ 'cpu': '50m', 'memory': '16Mi' }) metrics_secret_name = '{}-monitoring-credentials'.format(name) metrics_username_env_var = client.V1EnvVar( name='MONGODB_MONITORING_USERNAME', value_from=client.V1EnvVarSource( secret_key_ref=client.V1SecretKeySelector(name=metrics_secret_name, key='username'))) metrics_password_env_var = client.V1EnvVar( name='MONGODB_MONITORING_PASSWORD', value_from=client.V1EnvVarSource( secret_key_ref=client.V1SecretKeySelector(name=metrics_secret_name, key='password'))) metrics_container = client.V1Container( name='prometheus-exporter', image='quay.io/kubestack/prometheus-mongodb-exporter:latest', command=[ '/bin/sh', '-c', '/bin/mongodb_exporter --mongodb.uri mongodb://${MONGODB_MONITORING_USERNAME}:${MONGODB_MONITORING_PASSWORD}@127.0.0.1:27017/admin --mongodb.tls-cert /etc/ssl/mongod/mongod.pem --mongodb.tls-ca /etc/ssl/mongod/ca.pem' ], # flake8: noqa ports=[metrics_port], resources=metrics_resources, volume_mounts=[mongodb_tls_volumemount], env=[metrics_username_env_var, metrics_password_env_var]) statefulset.spec.template.spec.containers = [ mongodb_container, metrics_container ] ca_volume = client.V1Volume(name='mongo-ca', secret=client.V1SecretVolumeSource( secret_name='{}-ca'.format(name), items=[ client.V1KeyToPath(key='ca.pem', path='ca.pem'), client.V1KeyToPath(key='ca-key.pem', path='ca-key.pem') ])) tls_volume = client.V1Volume(name='mongo-tls', empty_dir=client.V1EmptyDirVolumeSource()) data_volume = client.V1Volume(name='mongo-data', empty_dir=client.V1EmptyDirVolumeSource()) statefulset.spec.template.spec.volumes = [ ca_volume, tls_volume, data_volume ] # Init container tls_init_ca_volumemount = client.V1VolumeMount( name='mongo-ca', read_only=True, mount_path='/etc/ssl/mongod-ca') tls_init_mongodb_tls_volumemount = client.V1VolumeMount( name='mongo-tls', read_only=False, mount_path='/etc/ssl/mongod') tls_init_container = client.V1Container( name="cert-init", image="quay.io/kubestack/mongodb-init:latest", volume_mounts=[ tls_init_ca_volumemount, tls_init_mongodb_tls_volumemount ], env=[ client.V1EnvVar( name='METADATA_NAME', value_from=client.V1EnvVarSource( field_ref=client.V1ObjectFieldSelector( api_version='v1', field_path='metadata.name'))), client.V1EnvVar( name='NAMESPACE', value_from=client.V1EnvVarSource( field_ref=client.V1ObjectFieldSelector( api_version='v1', field_path='metadata.namespace'))) ], command=["ansible-playbook", "member-cert.yml"]) statefulset.spec.template.spec.init_containers = [tls_init_container] return statefulset