def newJob(self, cronjob: str) -> None: _suffix = "-manual-{}".format(int(datetime.now().timestamp())) _template = self.batch_v1beta1.read_namespaced_cron_job( cronjob, self.namespace) if _template: _annotations = {"cronjob.kubernetes.io/instantiate": "manual"} _name = cronjob + _suffix _labels = _template.spec.job_template.metadata.labels _spec = _template.spec.job_template.spec _owner = client.V1OwnerReference( api_version="batch/v1beta1", block_owner_deletion=True, controller=True, kind="CronJob", name=cronjob, uid=_template.metadata.uid, ) _metadata = client.V1ObjectMeta( annotations=_annotations, labels=_labels, name=_name, owner_references=[_owner], ) _new_job = client.V1Job(api_version="batch/v1", kind="Job", spec=_spec, metadata=_metadata) _ = self.batch_v1.create_namespaced_job(namespace=self.namespace, body=_new_job) return
def testToSwaggerDict(self): pod = client.V1Pod( metadata=client.V1ObjectMeta(owner_references=[ client.V1OwnerReference( api_version='argoproj.io/v1alpha1', kind='Workflow', name='wf-1', uid='wf-uid-1') ]), spec=client.V1PodSpec(containers=[], service_account='sa-1')) pod_dict = container_common.to_swagger_dict(pod) self.assertDictEqual( { 'metadata': { 'ownerReferences': [{ 'apiVersion': 'argoproj.io/v1alpha1', 'kind': 'Workflow', 'name': 'wf-1', 'uid': 'wf-uid-1' }] }, 'spec': { 'serviceAccount': 'sa-1' } }, pod_dict)
def __init__(self): if os.environ.get("KUBE_CONFIG") == "incluster": kubernetes.config.load_incluster_config() else: kubernetes.config.load_kube_config() self.api = kubernetes.client.CoreV1Api() self.namespace = os.environ["NAMESPACE"] self.image = os.environ["USER_IMAGE"] self.args = sys.argv[1:] self.cluster_name = os.environ["CLUSTER_NAME"] self.resources = json.loads(os.environ.get("USER_RESOURCES", "{}")) self.job_info_path = os.environ["JOB_INFO_PATH"] job_info = [] for info_key in "name", "uid": with open(os.path.join(self.job_info_path, info_key)) as job_info_fp: job_info.append(job_info_fp.read()) self.job_name, self.job_uid = job_info # pylint: disable=unbalanced-tuple-unpacking self.owner_references = [k8s_client.V1OwnerReference( name=self.job_name, api_version="batch/v1", controller=True, uid=self.job_uid, kind="job", block_owner_deletion=True, )]
def _mock_launcher_pod(self): return client.V1Pod(metadata=client.V1ObjectMeta(owner_references=[ client.V1OwnerReference(api_version='argoproj.io/v1alpha1', kind='Workflow', name='wf-1', uid='wf-uid-1') ]), spec=client.V1PodSpec(containers=[], service_account='sa-1'))
def _BuildPodManifest(self) -> k8s_client.V1Pod: if isinstance(self._serving_binary, serving_bins.TensorFlowServing): env_vars_dict = self._serving_binary.MakeEnvVars( model_path=self._model_path) env_vars = [ k8s_client.V1EnvVar(name=key, value=value) for key, value in env_vars_dict.items() ] else: raise NotImplementedError('Unsupported serving binary {}'.format( type(self._serving_binary).__name__)) service_account_name = (self._config.service_account_name or self._executor_pod.spec.service_account_name) active_deadline_seconds = (self._config.active_deadline_seconds or _DEFAULT_ACTIVE_DEADLINE_SEC) if active_deadline_seconds < 0: raise ValueError( 'active_deadline_seconds should be > 0. Got {}'.format( active_deadline_seconds)) return k8s_client.V1Pod( metadata=k8s_client.V1ObjectMeta( generate_name=_MODEL_SERVER_POD_NAME_PREFIX, labels=self._label_dict, # Resources with ownerReferences are automatically deleted once all # its owners are deleted. owner_references=[ k8s_client.V1OwnerReference( api_version=self._executor_pod.api_version, kind=self._executor_pod.kind, name=self._executor_pod.metadata.name, uid=self._executor_pod.metadata.uid, ), ], ), spec=k8s_client.V1PodSpec( containers=[ k8s_client.V1Container( name=_MODEL_SERVER_CONTAINER_NAME, image=self._serving_binary.image, env=env_vars, ), ], service_account_name=service_account_name, # No retry in case model server container failed. Retry will happen # at the outermost loop (executor.py). restart_policy=_POD_CONTAINER_RESTART_POLICY_NEVER, # This is a hard deadline for the model server container to ensure # the Pod is properly cleaned up even with an unexpected termination # of an infra validator. After the deadline, container will be # removed but Pod resource won't. This makes the Pod log visible # after the termination. active_deadline_seconds=active_deadline_seconds, # TODO(b/152002076): Add TTL controller once it graduates Beta. # ttl_seconds_after_finished=, ))
def create_owner_reference(owner_pod): owner_ref = ([ client.V1OwnerReference( api_version="v1", block_owner_deletion=True, kind="Pod", name=owner_pod.metadata.name, uid=owner_pod.metadata.uid, ) ] if owner_pod else None) return owner_ref
def __init__(self, event_type, namespace, hostess_docker_registry, hostess_docker_image, hostess_docker_tag, docker_certificate_secret, **kwargs): self.event_type = event_type self.namespace = namespace self.hostess_docker_registry = hostess_docker_registry self.hostess_docker_image = hostess_docker_image self.hostess_docker_tag = hostess_docker_tag self.docker_certificate_secret = docker_certificate_secret self.kind = kwargs.get("kind") self.name = kwargs.get("metadata", {}).get("name") self.uid = kwargs.get("metadata", {}).get("uid") self.full_name = "registry-mirror-{}".format(self.name) self.daemon_set_name = self.full_name + "-utils" self.apiVersion = kwargs.get("apiVersion") self.upstreamUrl = kwargs.get("spec", {}).get("upstreamUrl") self.masqueradeUrl = kwargs.get("spec", {}).get("masqueradeUrl", "mirror-"+self.upstreamUrl) self.credentials_secret_name = kwargs.get( "spec", {}).get("credentialsSecret") self.image_pull_secrets = kwargs["image_pull_secrets"] or "" self.ca_certificate_bundle = kwargs["ca_certificate_bundle"] self.volume_claim_spec = kwargs.get( "spec", {}, ).get( "volumeClaimTemplate", {}, ).get( "spec", {}, ) self.labels = { "app": "docker-registry", "mirror": self.name, } self.metadata = client.V1ObjectMeta( namespace=self.namespace, name=self.full_name, labels=self.labels, owner_references=[ client.V1OwnerReference( api_version=self.apiVersion, name=self.name, kind=self.kind, uid=self.uid, ) ] ) self.core_api = client.CoreV1Api() self.apps_api = client.AppsV1beta1Api() self.ext_api = client.ExtensionsV1beta1Api()
def _make_owner_references(self): try: return [ client.V1OwnerReference(api_version="v1", block_owner_deletion=True, controller=True, kind="Pod", name=self.pod_name, uid=self._get_game_uid()) ] except IndexError: # Couldn't find the current pod return []
def update_configmap(configmap, jobname): """ Updatet eine Configmap mit einer owner_reference Trägt die Job UID als owner_reference in eine Configmap ein. Dazu muss zuerst die UID des Jobs der zur Configmap gehört ermittelt werden. """ logger = logging.getLogger(jobname) config.load_kube_config() namespace = "default" # Ermittle die UID des Jobs api = client.BatchV1Api() try: api_response = api.read_namespaced_job(jobname, namespace, pretty=True) except ApiException as e: logger.debug("Beim einlesen der Configmap ist etwas schief gelaufen") logger.debug( "Exception when calling BatchV1Api->read_namespaced_job: %s\n" % e) logger.info("Configmap erfolgreich eingelesen") logger.debug(api_response) uid = api_response.metadata.uid # Update Configmap mit owner_reference api = client.CoreV1Api() owner_references = client.V1OwnerReference(api_version="batch/v1", kind="Job", name=jobname, uid=uid) owner_references_list = [owner_references] configmap.metadata.owner_references = owner_references_list try: response = api.patch_namespaced_config_map(name=jobname, namespace=namespace, body=configmap, pretty=True) except ApiException as e: logger.debug("Beim Patchen der Configmap ist etwas schief gelaufen") logger.debug( "Exception when calling CoreV1Api->create_namespaced_config_map: %s\n" % e) return 500 logger.info("Configmap wurde erfolgreich gepatched") logger.debug(response) return 200
def _update_config_map_owner_reference(self): cm = self.v1_client.read_namespaced_config_map( name=self.resources_identifier, namespace=NAMESPACE) job = self.batch_client.read_namespaced_job( name=self.resources_identifier, namespace=NAMESPACE) cm.metadata.owner_references = [] job_owner_ref = client.V1OwnerReference(api_version=job.api_version, kind="Job", name=self.resources_identifier, uid=job.metadata.uid) cm.metadata.owner_references.append(job_owner_ref) self.v1_client.patch_namespaced_config_map( name=self.resources_identifier, namespace=NAMESPACE, body=cm)
def construct_replica_set(name, owner_deployment=None): """Construct a fake ReplicaSet body""" if owner_deployment: owner_references = [ client.V1OwnerReference( api_version=owner_deployment.api_version, uid=uuid.uuid4().hex, name=owner_deployment.metadata.name, kind='Deployment', ) ] match_labels = owner_deployment.spec.selector.match_labels else: owner_references = [] match_labels = {'rs-name': name} return client.V1ReplicaSet( api_version='extensions/v1beta1', kind='ReplicaSet', metadata=client.V1ObjectMeta( name=name, # Set owner reference to deployment owner_references=owner_references, ), spec=client.V1ReplicaSetSpec( replicas=1, selector=client.V1LabelSelector(match_labels=match_labels), template=client.V1PodTemplateSpec( spec=client.V1PodSpec(containers=[ client.V1Container(image="busybox", name="main", command=["sleep", "3600"]) ]), metadata=client.V1ObjectMeta(labels=match_labels, name=name), ), ), )
def construct_pod(name, labels=None, owner_replicaset=None): """Construct a fake Pod body""" if owner_replicaset: owner_references = [ client.V1OwnerReference( api_version=owner_replicaset.api_version, uid=uuid.uuid4().hex, name=owner_replicaset.metadata.name, kind='ReplicaSet', ) ] labels = owner_replicaset.spec.selector.match_labels else: owner_references = [] return client.V1Pod( api_version='v1', kind='Pod', metadata=client.V1ObjectMeta(name=name, labels=labels, owner_references=owner_references), spec=client.V1PodSpec( containers=[client.V1Container(name="main", image="busybox")]), )
def trigger_k8s_cronjob(cronjob_name, namespace): try: config.load_incluster_config() except: config.load_kube_config() configuration = client.Configuration() api = client.BatchV1Api(client.ApiClient(configuration)) cronjob = get_cronjob(cronjob_name, namespace) if cronjob: date_str = datetime.datetime.now().strftime("%Y%m%d%H%M%S") # Change the name of the job to be created to show that it was manually created at time: date_str cronjob.spec.job_template.metadata.name = str( date_str + cronjob.metadata.name)[:63] try: # Create an OwnerReference object and add it to the metadata.owner_references list owner_reference = client.V1OwnerReference( api_version=cronjob.api_version or 'batch/v1beta1', controller=True, kind=cronjob.kind or 'CronJob', name=cronjob.metadata.name, uid=cronjob.metadata.uid) cronjob.spec.job_template.metadata.owner_references = [ owner_reference ] except ApiException as e: logging.critical( "Exception when calling BatchV1Api->create_namespaced_job: %s\n" % e) logging.critical("owner_reference: %s\n" % owner_reference) logging.critical("cronjob: %s\n" % cronjob) try: # Create a job from the job_template of the cronjob created_job = api.create_namespaced_job( namespace=namespace, body=cronjob.spec.job_template) except ApiException as e: logging.critical( "Exception when calling BatchV1Api->create_namespaced_job: %s\n" % e) # Get the uid from the newly created job controllerUid = created_job.metadata.uid core_v1 = client.CoreV1Api(client.ApiClient(configuration)) # Create a label_selector from the job's UID pod_label_selector = "controller-uid=" + controllerUid try: # Wait a bit for the job to be created time.sleep(10) # Get the pod name for the newly created job pods_list = core_v1.list_namespaced_pod( namespace, label_selector=pod_label_selector, timeout_seconds=10) pod_name = pods_list.items[0].metadata.name except ApiException as e: logging.critical( "Exception when calling CoreV1Api->list_namespaced_pod: %s\n" % e) try: # Get the status of the newly created job status = core_v1.read_namespaced_pod_status(pod_name, namespace).status.phase except ApiException as e: logging.critical( "Exception when calling CoreV1Api->read_namespaced_pod_status: %s\n" % e) # Sleep while the pod has not completed, break on Failed or Succeeded status pending_statuses = ['Pending', 'Running', 'Unknown'] while status in pending_statuses: try: status = core_v1.read_namespaced_pod_status( pod_name, namespace).status.phase logging.critical('Current Status: ' + status) if status == 'Succeeded' or status == 'Failed': break logging.critical('sleeping') time.sleep(5) except ApiException as e: logging.critical( "Exception when calling CoreV1Api->read_namespaced_pod_status: %s\n" % e) try: # Retrieve and print the log from the finished pod pod_log = core_v1.read_namespaced_pod_log(name=pod_name, namespace=namespace, pretty=True, timestamps=True) logging.critical(pod_log) except ApiException as e: logging.critical( "Exception when calling CoreV1Api->read_namespaced_pod_log: %s\n" % e) logging.critical(status) # Return True if status='Succeeded', False if 'Failed' if status == 'Succeeded': return 'Job Succeeded' raise Exception('Job Failed') # get_cronjob() returned False else: raise Exception("Could not find cronjob")
def _BuildPodManifest(self) -> k8s_client.V1Pod: annotations = {} env_vars = [] if isinstance(self._serving_binary, serving_bins.TensorFlowServing): env_vars_dict = self._serving_binary.MakeEnvVars( model_path=self._model_path) env_vars.extend( k8s_client.V1EnvVar(name=key, value=value) for key, value in env_vars_dict.items()) if self._config.serving_pod_overrides: overrides = self._config.serving_pod_overrides if overrides.annotations: annotations.update(overrides.annotations) if overrides.env: env_vars.extend( _convert_to_kube_env(env) for env in overrides.env) service_account_name = (self._config.service_account_name or self._executor_pod.spec.service_account_name) active_deadline_seconds = (self._config.active_deadline_seconds or _DEFAULT_ACTIVE_DEADLINE_SEC) if active_deadline_seconds < 0: raise ValueError('active_deadline_seconds should be > 0, but got ' f'{active_deadline_seconds}.') result = k8s_client.V1Pod( metadata=k8s_client.V1ObjectMeta( generate_name=_MODEL_SERVER_POD_NAME_PREFIX, annotations=annotations, labels=self._label_dict, # Resources with ownerReferences are automatically deleted once all # its owners are deleted. owner_references=[ k8s_client.V1OwnerReference( api_version=self._executor_pod.api_version, kind=self._executor_pod.kind, name=self._executor_pod.metadata.name, uid=self._executor_pod.metadata.uid, ), ], ), spec=k8s_client.V1PodSpec( containers=[ k8s_client.V1Container( name=_MODEL_SERVER_CONTAINER_NAME, image=self._serving_binary.image, env=env_vars, volume_mounts=[], ), ], service_account_name=service_account_name, # No retry in case model server container failed. Retry will happen # at the outermost loop (executor.py). restart_policy=_RestartPolicy.NEVER.value, # This is a hard deadline for the model server container to ensure # the Pod is properly cleaned up even with an unexpected termination # of an infra validator. After the deadline, container will be # removed but Pod resource won't. This makes the Pod log visible # after the termination. active_deadline_seconds=active_deadline_seconds, volumes=[], # TODO(b/152002076): Add TTL controller once it graduates Beta. # ttl_seconds_after_finished=, )) self._SetupModelVolumeIfNeeded(result) return result
def generate_pod(): metadata = client.V1ObjectMeta( name="platform-app-958795556-2nqgj", namespace="production", generate_name="platform-app-958795556-", labels={ "app": "platform", "chart": "platform", "component": "app", "heritage": "Helm", "pod-template-hash": "958795556", "release": "platform-production", "version": "1.0.3", }, owner_references=[ client.V1OwnerReference( api_version="apps/v1", kind="ReplicaSet", name="platform-app-958795556", uid="35ba938b-681d-11eb-a74a-16e1a04d726b", controller=True, block_owner_deletion=True, ) ], ) container = client.V1Container( name="app", image="platform.azurecr.io/app:master", image_pull_policy="Always", termination_message_policy="File", termination_message_path="/dev/termination-log", env=[], resources=client.V1ResourceRequirements( limits={ "cpu": "1200m", "memory": "1Gi" }, requests={ "cpu": "1", "memory": "768Mi" }, ), ports=[client.V1ContainerPort(container_port=3000, protocol="TCP")], volume_mounts=[ client.V1VolumeMount( name="default-token-2cg25", read_only=True, mount_path="/var/run/secrets/kubernetes.io/serviceaccount", ) ], liveness_probe=client.V1Probe( initial_delay_seconds=10, timeout_seconds=5, period_seconds=10, success_threshold=1, failure_threshold=6, http_get=client.V1HTTPGetAction(path="/health/liveness", port=3000, scheme="HTTP"), ), readiness_probe=client.V1Probe( initial_delay_seconds=10, timeout_seconds=5, period_seconds=10, success_threshold=2, failure_threshold=6, http_get=client.V1HTTPGetAction(path="/health/readness", port=3000, scheme="HTTP"), ), ) spec = client.V1PodSpec( containers=[container], volumes=[ client.V1Volume( name="default-token-2cg25", secret=client.V1SecretVolumeSource( secret_name="default-token-2cg25", default_mode=420), ) ], restart_policy="Always", termination_grace_period_seconds=30, dns_policy="ClusterFirst", service_account_name="default", service_account="default", node_name="aks-agentpool-26722002-vmss00039t", security_context=client.V1PodSecurityContext(run_as_user=1000, fs_group=1000), scheduler_name="default-scheduler", tolerations=[ client.V1Toleration( key="node.kubernetes.io/not-ready", operator="Exists", effect="NoExecute", toleration_seconds=300, ), client.V1Toleration( key="node.kubernetes.io/unreachable", operator="Exists", effect="NoExecute", toleration_seconds=300, ), ], priority=0, enable_service_links=True, ) return client.V1Pod(metadata=metadata, spec=spec)
def create_user_namespace( api: client.CoreV1Api, userspace_dc: dynamic.DynamicClient, user_name: str, user_email: str, expected_user_namespaces: Dict[str, str], namespaces: List[str], ) -> None: env = os.environ.get("ORBIT_ENV", "") if not env: raise ValueError("Orbit Environment ORBIT_ENV is required") for team, user_ns in expected_user_namespaces.items(): try: team_namespace = api.read_namespace(name=team).to_dict() team_uid = team_namespace.get("metadata", {}).get("uid", None) logger.info(f"Retrieved Team Namespace uid: {team_uid}") except Exception: logger.exception("Error retrieving Team Namespace") team_uid = None if user_ns not in namespaces: logger.info(f"User namespace {user_ns} doesnt exist. Creating...") kwargs = { "name": user_ns, "annotations": {"owner": user_email}, "labels": { "orbit/efs-id": EFS_FS_ID, "orbit/env": os.environ.get("ORBIT_ENV"), "orbit/space": "user", "orbit/team": team, "orbit/user": user_name, # "istio-injection": "enabled", }, } if team_uid: kwargs["owner_references"] = [ client.V1OwnerReference(api_version="v1", kind="Namespace", name=team, uid=team_uid) ] body = client.V1Namespace() body.metadata = client.V1ObjectMeta(**kwargs) try: # create userspace namespace resource api.create_namespace(body=body) logger.info(f"Created namespace {user_ns}") except ApiException as ae: logger.warning(f"Exception when trying to create user namespace {user_ns}") logger.warning(ae.body) try: # create userspace custom resource for the given user namespace logger.info(f"Creating userspace custom resource {user_ns}") create_userspace( userspace_dc=userspace_dc, name=user_ns, env=env, space="user", team=team, user=user_name, team_efsid=EFS_FS_ID, user_email=user_email, ) logger.info(f"Created userspace custom resource {user_ns}") except ApiException as ae: logger.warning(f"Exception when trying to create userspace custom resource {user_ns}") logger.warning(ae.body)