def delete_complete_jobs(api: CoreV1Api, batch_api: BatchV1Api, namespace: str): """Delete complete jobs.""" for job in batch_api.list_namespaced_job(namespace).items: if ( job.status.conditions and job.status.conditions[0].type == "Complete" and not job.metadata.deletion_timestamp and _is_flush_job(job) ): logger.info(f"deleting complete job: {job.metadata.name}") # configure persistent volume claims to be deleted with the job pv_name = _pv_name_from_job(job) logger.info(f"including pv in pvc delete: {pv_name}") api.patch_persistent_volume( name=pv_name, body=V1PersistentVolume( spec=V1PersistentVolumeSpec( persistent_volume_reclaim_policy="Delete", ) ), ) logger.info(f"including pvc in job delete: {job.metadata.name}") api.patch_namespaced_persistent_volume_claim( name=job.metadata.name, namespace=namespace, body=V1PersistentVolumeClaim( metadata=V1ObjectMeta( owner_references=[ V1OwnerReference( api_version="batch/v1", kind="Job", name=job.metadata.name, uid=job.metadata.uid, block_owner_deletion=True, ) ] ) ), ) try: batch_api.delete_namespaced_job( name=job.metadata.name, namespace=namespace, body=V1DeleteOptions( grace_period_seconds=0, propagation_policy="Foreground", preconditions=V1Preconditions( resource_version=job.metadata.resource_version, uid=job.metadata.uid, ), ), ) except ApiException as e: if e.reason not in (CONFLICT, NOT_FOUND): raise logger.info(f"job already deleted or updated: {job.metadata.name}")
def _create_flush_job( batch_api: BatchV1Api, command: List[str], env: List[V1EnvVar], image: str, name: str, namespace: str, service_account_name: str, ) -> V1Job: logger.info(f"creating job: {name}") try: return batch_api.create_namespaced_job( namespace=namespace, body=V1Job( api_version="batch/v1", kind="Job", metadata=V1ObjectMeta(name=name, namespace=namespace), spec=V1JobSpec( template=V1PodTemplateSpec( spec=V1PodSpec( containers=[ V1Container( image=image, command=command, name="flush", volume_mounts=[ V1VolumeMount(mount_path="/data", name="queue") ], env=env, ) ], restart_policy="OnFailure", volumes=[ V1Volume( name="queue", persistent_volume_claim=( V1PersistentVolumeClaimVolumeSource( claim_name=name ) ), ) ], service_account_name=service_account_name, ) ) ), ), ) except ApiException as e: if e.reason == CONFLICT and json.loads(e.body)["reason"] == ALREADY_EXISTS: logger.info(f"using existing job: {name}") return batch_api.read_namespaced_job(name, namespace) raise
def delete_completed_jobs(job_labels: Mapping[str, str], client: BatchV1Api = None): client = initialize_batch_client() if client is None else client logger.info("Deleting succesful jobs.") jobs = list_jobs(client, job_labels) for job in jobs: if job_complete(job): name = job.metadata.name logger.info(f"Deleting completed job: {name}") client.delete_namespaced_job(name, namespace=job.metadata.namespace)
def apply(manifest_file: str, server: str, namespace: str, user: str, password: str, verbose: int, quiet: int): """ Apply manifest file """ logging.basicConfig( level=logging.WARN + 10 * quiet - 10 * verbose, format= '[%(asctime)s] %(levelname)-7s [%(name)s:%(module)s - %(funcName)s:%(lineno)s] %(message)s', datefmt='%d.%m.%Y %H:%M:%S') logger.info("Running `apply`.") with ClusterCredentialsContextManager(server=server, user=user, password=password): v1_batch = BatchV1Api() with io.open(manifest_file, 'r', encoding='utf-8') as the_file: yaml_gen = yaml.load_all(the_file.read()) documents = list(yaml_gen) api_response = v1_batch.create_namespaced_job(namespace, documents[0], pretty=True) pprint.pprint(api_response) logger.info("Done.")
def flush_released_pvs( api: CoreV1Api, batch_api: BatchV1Api, command: List[str], env: List[V1EnvVar], image: str, namespace: str, service_account_name: str, ): """ Flush persistent volumes. Gracefully handle resuming after an interruption, because this is not atomic. """ existing_jobs = { job.metadata.name for job in batch_api.list_namespaced_job(namespace).items } for pv in api.list_persistent_volume().items: name = _job_and_pvc_name_from_pv(pv) if ( name not in existing_jobs and pv.spec.claim_ref and pv.spec.claim_ref.namespace == namespace and pv.spec.persistent_volume_reclaim_policy != "Delete" and pv.status and (pv.status.phase == "Released" or pv.spec.claim_ref.name == name) ): logger.info(f"flushing unbound pv: {pv.metadata.name}") if pv.status.phase != "Bound": pvc = _create_pvc(api, name, namespace, pv) _bind_pvc(api, pv, pvc) _create_flush_job( batch_api, command, env, image, name, namespace, service_account_name )
def main(): """Continuously flush and delete detached persistent volumes.""" args = parser.parse_args() load_incluster_config() api = CoreV1Api() batch_api = BatchV1Api() tasks = [ partial( flush_released_pvs_and_delete_complete_jobs, api, batch_api, args.command, args.env, args.image, args.namespace, args.service_account_name, ), partial( delete_detached_pvcs, api, args.namespace, args.claim_prefix, timedelta(seconds=args.pvc_cleanup_delay_seconds), {}, # detached_pvc_cache ), partial(delete_unschedulable_pods, api, args.namespace), ] with ThreadPool(len(tasks)) as pool: pool.map(run_task, tasks, chunksize=1)
def create_job(self, k8s_job_client: client.BatchV1Api): job_creation_response = k8s_job_client.create_namespaced_job( body=self.mlcube_job_manifest, namespace=self.namespace, ) logging.info("MLCommons Box k8s job created. Status='%s'" % str(job_creation_response.status))
def get_batch_v1_api(self): """ BatchV1Api :return: """ client = self.get_api_client() batch_v1_api = BatchV1Api(client) return batch_v1_api
def _inspect_jobs( config: Dict[str, Any], lock: synchronize.Lock, replications_queue: Queue, # type: ignore replication_statuses: Dict[str, str], ) -> None: jobs = BatchV1Api().list_job_for_all_namespaces() for job in jobs.items: spec = job.spec.template.spec containers, statuses = _inspect_containers(config, lock, replications_queue, replication_statuses, spec.containers) init_containers, init_statuses = (_inspect_containers( config, lock, replications_queue, replication_statuses, spec.init_containers) if spec.init_containers else ([], [])) all_statuses = statuses + init_statuses all_containers = containers + init_containers if len(all_statuses) > 0 and all( [status == "Complete" for status in all_statuses]): with lock: for container in all_containers: del replication_statuses[container["image"]] body = { "spec": { "template": { "spec": { "containers": containers, "initContainers": init_containers, } } } } BatchV1Api().patch_namespaced_job( name=job.metadata.name, namespace=job.metadata.namespace, body=body, ) logger.info("Patched Deployment: %s, Namespace: %s", job.metadata.name, job.metadata.namespace)
def cleanup_jobs(namespace: str, name: str, dry: bool): core = CoreV1Api() batch = BatchV1Api() job_name_regex = f'^{name.replace("*", ".*")}$' for job_name in find_jobs(batch, namespace, job_name_regex): logger.info("delete job: %s", job_name) if not dry: delete_job(batch, namespace, job_name) pod_name_regex = f'^{job_name}-.*$' for pod_name in find_pods(core, namespace, pod_name_regex): logger.info("delete pod: %s", pod_name) if not dry: delete_pod(core, namespace, pod_name)
def get_job(self): api_instance = BatchV1Api(self.api_client) jobs = api_instance.list_job_for_all_namespaces() jobs_status = [] for i in jobs.items: name = i.metadata.name ns = i.metadata.namespace start = i.status.start_time if i.status.succeeded == 1: status = "success" elif i.status.failed == 1: status = "failed" else: status = "active" jobs_status.append({ "ns": ns, "name": name, "start": start, "status": status }) return {"desc": "jobs", "result": jobs_status}
def __init__(self, path): """初始化kubernetes""" config.load_kube_config(path) self.batch = BatchV1Api()
def create_job( namespace: str, name: str, labels: kopf.Labels, annotations: kopf.Annotations, spec: kopf.Spec, status: kopf.Status, patch: kopf.Patch, logger: kopf.Logger, namespaces_idx: kopf.Index[str, Dict[str, Any]], podsettings_idx: kopf.Index[Tuple[str, str], Dict[str, Any]], **_: Any, ) -> str: ns: Optional[Dict[str, Any]] = None for ns in namespaces_idx.get(namespace, []): logger.debug("ns: %s", ns) if ns is None: patch["status"] = { "orbitJobOperator": { "jobStatus": "JobCreationFailed", "error": "No Namespace resource found" } } return "JobCreationFailed" env = ns["env"] team = ns["team"] global ENV_CONTEXT # Caching if ENV_CONTEXT is None: context = _load_env_context_from_ssm(env) if context is None: patch["status"] = { "orbitJobOperator": { "jobStatus": "JobCreationFailed", "error": "Unable to load Env Context from SSM" } } return "JobCreationFailed" else: ENV_CONTEXT = context node_type = spec.get("compute", {}).get("nodeType", "fargate") labels = { "app": "orbit-runner", "orbit/node-type": node_type, "notebook-name": spec.get("notebookName", ""), "orbit/attach-security-group": "yes" if node_type == "ec2" else "no", } podsetting_metadata: Dict[str, Any] = {} for podsetting_metadata in podsettings_idx.get( (team, spec.get("compute", {}).get("podSetting", None)), []): logger.debug("PodSetting: %s", podsetting_metadata) job_spec = job_utils.construct_job_spec( env=env, team=team, env_context=ENV_CONTEXT, podsetting_metadata=podsetting_metadata, orbit_job_spec=spec, labels=labels, ) logger.debug("spec: %s", spec) if spec.get("schedule"): cronjob_id = f"orbit-{namespace}-{spec.get('triggerName')}" cron_job_template: V1beta1JobTemplateSpec = V1beta1JobTemplateSpec( spec=job_spec) cron_job_spec: V1beta1CronJobSpec = V1beta1CronJobSpec( job_template=cron_job_template, schedule=spec.get("schedule")) job = V1beta1CronJob( api_version="batch/v1beta1", kind="CronJob", metadata=V1ObjectMeta(name=cronjob_id, labels={ **labels, **spec.get("compute", {}).get( "labels", {}) }, namespace=namespace), status=V1beta1CronJobStatus(), spec=cron_job_spec, ) kopf.adopt(job, nested="spec.template") cron_job_instance: V1beta1CronJob = BatchV1beta1Api( ).create_namespaced_cron_job(namespace=namespace, body=job) cronjob_instance_metadata: V1ObjectMeta = cron_job_instance.metadata logger.debug("Started Cron Job: %s", cronjob_instance_metadata.name) patch["metadata"] = {"labels": {"k8sJobType": "CronJob"}} patch["status"] = { "orbitJobOperator": { "jobStatus": "JobCreated", "jobName": cronjob_instance_metadata.name, "nodeType": node_type, } } return "CronJobCreated" else: job = V1Job( api_version="batch/v1", kind="Job", metadata=V1ObjectMeta(labels={ **labels, **spec.get("compute", {}).get("labels", {}) }), spec=job_spec, ) kopf.adopt(job, nested="spec.template") job_instance: V1Job = BatchV1Api().create_namespaced_job( namespace=namespace, body=job) job_instance_metadata: V1ObjectMeta = job_instance.metadata logger.debug("Started Job: %s", job_instance_metadata.name) patch["metadata"] = {"labels": {"k8sJobType": "Job"}} patch["status"] = { "orbitJobOperator": { "jobStatus": "JobCreated", "jobName": job_instance_metadata.name, "nodeType": node_type, } } return "JobCreated"
def delete_job(batch: BatchV1Api, namespace: str, job_name: str): batch.delete_namespaced_job(job_name, namespace, V1DeleteOptions())
def find_jobs(batch: BatchV1Api, namespace: str, name_regex: str): for job in batch.list_namespaced_job(namespace).items: if (re.match(name_regex, job.metadata.name) and job.status.succeeded == 1): yield job.metadata.name
def test_flush_manager(options: Dict[str, Any], emulator: str, web: str): print("starting test") api = CoreV1Api() # max number of loops to run when waiting for kube actions to complete max_wait_loops = 20 if options["cluster"] is None else 60 # server has invalid PUBSUB_EMULATOR, so that only flush can deliver messages static_pvs = options["cluster"] is None if static_pvs: create_static_pvs(api) print("waiting for pods to be healthy") for _ in range(max_wait_loops): if all(pod.status.phase == "Running" for pod in api.list_namespaced_pod("default").items): break time.sleep(1) else: assert False, "pods did not become healthy" # create a subscription to the defined topic print("creating pubsub subscription") os.environ["PUBSUB_EMULATOR_HOST"] = emulator sub_client = SubscriberClient() topic_path = "projects/{project}/topics/{topic}".format(**options) subscription_path = "projects/{project}/subscriptions/{topic}".format( **options) try: sub_client.create_subscription(subscription_path, topic_path, retry=None) except AlreadyExists: pass print("posting message 0") requests.post(web, headers={ "host": "web" }, json={ "id": 0 }).raise_for_status() print("setting up race condition: attached pvc is also deleted") delete_pvcs(api) print("setting up race condition: pod unschedulable due to missing pvc") with pytest.raises(ApiException) as excinfo: restart_web_pods(api) assert excinfo.value.reason == "Conflict" print("posting message 1") with pytest.raises(requests.exceptions.ConnectionError): requests.post(web, headers={ "host": "web" }, json={ "id": 1 }).raise_for_status() print("starting flush-manager") # TODO optionally run flush-manager via subprocess.Popen, to ensure testing # current code and enable code coverage _sa = kube_resource("kube/flush-manager.sa.yml", **options) _cluster_role = kube_resource("kube/flush-manager.clusterrole.yml", **options) _cluster_role_binding = kube_resource( "kube/flush-manager.clusterrolebinding.yml", **options) _role = kube_resource("kube/flush-manager.role.yml", **options) _role_binding = kube_resource("kube/flush-manager.rolebinding.yml", **options) _deploy = kube_resource("kube/flush-manager.deploy.yml", **options) with _sa, _cluster_role, _cluster_role_binding, _role, _role_binding, _deploy: print("posting message 2 until successful") for i in range(max_wait_loops): try: requests.post(web, headers={ "host": "web" }, json={ "id": 2 }).raise_for_status() except requests.exceptions.ConnectionError: if i > 0 and static_pvs: create_static_pvs(api) else: break time.sleep(1) else: assert False, "pod did not recover" # scale to 0 pods print("scaling web to 0 pods") AppsV1Api().patch_namespaced_stateful_set_scale( name="web", namespace="default", body=V1StatefulSet(api_version="apps/v1", kind="StatefulSet", spec=dict(replicas=0)), ) # wait for no pvcs print("waiting for cleanup to complete") for _ in range(max_wait_loops): if not api.list_persistent_volume().items: break time.sleep(1) else: print("pvs were not cleaned up") assert [] == api.list_persistent_volume().items # assert jobs and pvcs also deleted assert [] == list_pvcs(api) assert [] == BatchV1Api().list_namespaced_job("default").items # assert received message id 0 and 2 assert [b'{"id": 0}', b'{"id": 2}'] == [ element.message.data for element in sub_client.pull(subscription_path, 2).received_messages ]
def _job_dispatch(self): api = BatchV1Api(get_kubernetes_api_client()) def _actual_work(): idle = True try: for item in Task.objects.filter( status=TASK.SCHEDULED).order_by("create_time"): idle = False conf = json.loads(item.settings.container_config) common_name = "task-exec-{}".format(item.uuid) shared_storage_name = "shared-{}".format(item.uuid) user_storage_name = "user-{}".format(item.uuid) user_dir = "/cloud_scheduler_userspace/" create_namespace() create_userspace_pvc() if not get_userspace_pvc(): item.status = TASK.FAILED item.logs_get = True item.logs = "Failed to get user space storage" item.save(force_update=True) else: try: if not config_checker(conf): raise ValueError( "Invalid config for TaskSettings: {}". format(item.settings.uuid)) # kubernetes part shell = conf['shell'] commands = [] mem_limit = conf['memory_limit'] time_limit = item.settings.time_limit working_dir = conf['working_path'] image = conf['image'] shared_pvc = conf['persistent_volume']['name'] shared_mount_path = conf['persistent_volume'][ 'mount_path'] script_path = conf['task_script_path'] commands.append('mkdir -p {}'.format(working_dir)) commands.append('cp -r {}/* {}'.format( user_dir, working_dir)) # snapshot commands.append('cp -r {}/* {}'.format( shared_mount_path + '/' + script_path, working_dir)) # overwrite commands.append( 'chmod -R +x {}'.format(working_dir)) commands.append('cd {}'.format(working_dir)) commands.append( 'timeout --signal TERM {timeout} {shell} -c \'{commands}\'' .format(timeout=time_limit, shell=shell, commands=';'.join(conf['commands']))) shared_mount = client.V1VolumeMount( mount_path=shared_mount_path, name=shared_storage_name, read_only=True) user_mount = client.V1VolumeMount( mount_path='/cloud_scheduler_userspace/', name=user_storage_name, sub_path="user_{}_task_{}".format( item.user_id, item.settings_id), read_only=True) env_username = client.V1EnvVar( name="CLOUD_SCHEDULER_USER", value=item.user.username) env_user_uuid = client.V1EnvVar( name="CLOUD_SCHEDULER_USER_UUID", value=item.user.uuid) container_settings = { 'name': 'task-container', 'image': image, 'volume_mounts': [shared_mount, user_mount], 'command': [shell], 'args': ['-c', ';'.join(commands)], 'env': [env_username, env_user_uuid] } if mem_limit: container_settings[ 'resources'] = client.V1ResourceRequirements( limits={'memory': mem_limit}) container = client.V1Container( **container_settings) persistent_volume_claim = client.V1PersistentVolumeClaimVolumeSource( claim_name=shared_pvc) user_volume_claim = client.V1PersistentVolumeClaimVolumeSource( claim_name=USERSPACE_NAME) volume = client.V1Volume( name=shared_storage_name, persistent_volume_claim=persistent_volume_claim ) user_volume = client.V1Volume( name=user_storage_name, persistent_volume_claim=user_volume_claim) template = client.V1PodTemplateSpec( metadata=client.V1ObjectMeta( labels={"task-exec": item.uuid}), spec=client.V1PodSpec( restart_policy="Never", containers=[container], volumes=[volume, user_volume])) spec = client.V1JobSpec( template=template, backoff_limit=0, active_deadline_seconds=GLOBAL_TASK_TIME_LIMIT) job = client.V1Job( api_version="batch/v1", kind="Job", metadata=client.V1ObjectMeta(name=common_name), spec=spec) _ = api.create_namespaced_job( namespace=KUBERNETES_NAMESPACE, body=job) item.status = TASK.WAITING item.save(force_update=True) except ApiException as ex: LOGGER.warning("Kubernetes ApiException %d: %s", ex.status, ex.reason) except ValueError as ex: LOGGER.warning(ex) item.status = TASK.FAILED item.save(force_update=True) except Exception as ex: LOGGER.error(ex) item.status = TASK.FAILED item.save(force_update=True) except Exception as ex: LOGGER.error(ex) if idle: time.sleep(1) while True: _actual_work() if self.test: break
def _job_monitor(self): api = CoreV1Api(get_kubernetes_api_client()) job_api = BatchV1Api(get_kubernetes_api_client()) def _actual_work(): idle = True try: for item in Task.objects.filter( Q(status=TASK.WAITING) | Q(status=TASK.RUNNING) | Q(status=TASK.PENDING)).order_by("create_time"): common_name = "task-exec-{}".format(item.uuid) try: response = api.list_namespaced_pod( namespace=KUBERNETES_NAMESPACE, label_selector="task-exec={}".format(item.uuid)) if response.items: status = response.items[0].status.phase new_status = item.status deleting = response.items[ 0].metadata.deletion_timestamp if status == 'Running': new_status = TASK.RUNNING elif status == 'Succeeded': new_status = TASK.SUCCEEDED elif status == 'Pending' and not deleting: new_status = TASK.PENDING elif status == 'Failed': new_status = TASK.FAILED if new_status != item.status: if status in ('Succeeded', 'Failed'): exit_code = None detailed_status = response.items[ 0].status.container_statuses if detailed_status and detailed_status[ 0].state.terminated: exit_code = detailed_status[ 0].state.terminated.exit_code LOGGER.debug(exit_code) response = api.read_namespaced_pod_log( name=response.items[0].metadata.name, namespace=KUBERNETES_NAMESPACE) if response: item.logs = response item.logs_get = True if exit_code: item.exit_code = exit_code if exit_code == 124: # SIGTERM by TLE item.logs += "\nTime limit exceeded when executing job." new_status = TASK.TLE elif exit_code == 137: # SIGKILL by MLE item.logs += "\nMemory limit exceeded when executing job." new_status = TASK.MLE job_api.delete_namespaced_job( name=common_name, namespace=KUBERNETES_NAMESPACE, body=client.V1DeleteOptions( propagation_policy='Foreground', grace_period_seconds=3)) item.status = new_status idle = False item.save(force_update=True) # else wait for a period because it takes time for corresponding pod to be initialized except ApiException as ex: LOGGER.warning(ex) for item in Task.objects.filter(status=TASK.DELETING): common_name = "task-exec-{}".format(item.uuid) try: _ = job_api.delete_namespaced_job( name=common_name, namespace=KUBERNETES_NAMESPACE, body=client.V1DeleteOptions( propagation_policy='Foreground', grace_period_seconds=5)) LOGGER.info( "The kubernetes job of Task: %s deleted successfully", item.uuid) item.delete() except ApiException as ex: if ex.status == 404: item.delete() else: LOGGER.warning("Kubernetes ApiException %d: %s", ex.status, ex.reason) except Exception as ex: LOGGER.error(ex) except Exception as ex: LOGGER.error(ex) if idle: time.sleep(1) while True: _actual_work() if self.test: break
def get_kubernetes_api_clients() -> (CoreV1Api, BatchV1Api): logger.info("Creating Kubernetes api clients") load_kubernetes_config(os.environ.get("KUBECONFIG", None)) api_client = ApiClient(kubernetes.client.Configuration()) return CoreV1Api(api_client=api_client), BatchV1Api(api_client=api_client)
def create_register_job(dataset, email, password) -> Optional[str]: config.load_incluster_config() batch_v1 = BatchV1Api() # job names cannot include underscores job_name = f'register-{dataset}'.replace('_', '-') # TODO: Handle datasets that require extra dependencies (matplotlib, etc.) # Also remove those extra dependencies from requirements.txt try: batch_v1.create_namespaced_job('default', { "apiVersion": "batch/v1", "kind": "Job", "metadata": {"name": job_name}, "spec": { "template": { "spec": { "containers": [{ "name": job_name, "image": "cyclotomic/blueno-registration-pipelines", "imagePullPolicy": "Always", "command": ["python"], "args": [ "main.py", dataset, email, password ], "env": [ { "name": "FILESYSTEM_STORE_ROOT", "value": "/root" }, { "name": "BLUENO_SERVER", "value": "http://blueno-server" } ], "volumeMounts": [ { # We mount to /root because tensorflow # datasets still downloads data to # the default (~/tensorflow_datasets) "mountPath": "/root", "name": "nfs" } ], "resources": { "requests": { "memory": "2Gi", } }, }], "volumes": [ { "name": "nfs", "persistentVolumeClaim": { "claimName": "blueno-nfs" } } ], # We avoid restarting on failure because a new # NFS mount is needed if the NFS server pod fails "restartPolicy": "Never" } } } }) return None except ApiException as e: return json.loads(e.body)['message']