def delete_deployment(api_instance: AppsV1Api, deployment_names: List[str], namespace: str) -> None: """ Delete our deployment :param api_instance: The api instance :param deployment_names: The names of the deployments. :param namespace: The namespace in which the pods are deployed. """ for deployment_name in deployment_names: api_response = api_instance.delete_namespaced_deployment( name=deployment_name, namespace=namespace, body=client.V1DeleteOptions( propagation_policy='Foreground', grace_period_seconds=5)) print("Deployment deleted. status='%s'" % str(api_response.status))
def get_resource_status(name: str, k8s_context: str, type: str, namespace: str = "orbit-system") -> Dict[str, Any]: _logger.debug("Retrieving Status for %s %s in Namespace %s", type, name, namespace) config.load_kube_config(context=k8s_context) apps = AppsV1Api() if type.lower() == "statefulset": api = apps.read_namespaced_stateful_set_status elif type.lower() == "deployment": api = apps.read_namespaced_deployment_status else: raise Exception("Unknown resource type") resp = api(name=name, namespace=namespace) resource = resp.to_dict() return cast(Dict[str, Any], resource.get("status"))
def rollout_status_stateful_set(api: client.AppsV1Api, name: str, namespace: str, replicas_to_update: int) -> Tuple[str, bool]: # tbh this is mostly ported from Go into Python from: # https://github.com/kubernetes/kubernetes/blob/master/staging/src/k8s.io/kubectl/pkg/polymorphichelpers/rollout_status.go#L119-L152 ss = api.read_namespaced_stateful_set(name=name, namespace=namespace) if ss.metadata.generation > ss.status.observed_generation: return ( f"Waiting for statefulset {repr(name)} spec update to be observed...", False, ) spec_replicas = ss.spec.replicas updated_replicas = ss.status.updated_replicas or 0 ready_replicas = ss.status.ready_replicas or 0 rollout_partition = None if ss.spec.update_strategy.rolling_update is not None: rollout_partition = ss.spec.update_strategy.rolling_update.partition if ready_replicas < spec_replicas: return ( f"Waiting for statefulset {repr(name)} rollout to finish: {spec_replicas - ready_replicas} not ready", False, ) if rollout_partition: if updated_replicas < replicas_to_update: return ( f"Waiting for statefulset {repr(name)} partitioned rollout to finish: {updated_replicas} out of {replicas_to_update} replicas have been updated", False, ) return ( f"StatefulSet {repr(name)} successfully completed partitioned rollout. {updated_replicas} replicas updated.", True, ) if ss.status.update_revision != ss.status.current_revision: return ( f"Waiting for statefulset {repr(name)} rollout to finish: {updated_replicas} out of {replicas_to_update} replicas have been updated", False, ) return f"StatefulSet {repr(name)} successfully rolled out", True
def current_image_digest_tag(layer: "Layer") -> dict: image_info = {"digest": None, "tag": None} load_opta_kube_config() apps_client = AppsV1Api() deployment_list: V1DeploymentList = apps_client.list_namespaced_deployment( namespace=layer.name) if len(deployment_list.items) > 0: deployment: V1Deployment = deployment_list.items[0] image_parts = deployment.spec.template.spec.containers[0].image.split( "@") if len(image_parts) == 2: image_info["digest"] = image_parts[-1] return image_info image_parts = deployment.spec.template.spec.containers[0].image.split( ":") if len(image_parts) == 2: image_info["tag"] = image_parts[-1] return image_info return image_info
def rollout_status_stateful_set( api: client.AppsV1Api, name: str, namespace: str, ) -> Tuple[str, bool]: # tbh this is mostly ported from Go into Python from: # https://github.com/kubernetes/kubernetes/blob/master/pkg/kubectl/rollout_status.go#L76-L92 ss = api.read_namespaced_stateful_set(name=name, namespace=namespace) if ss.metadata.generation > ss.status.observed_generation: return ( f"Waiting for statefulset {repr(name)} spec update to be observed...", False, ) # TimedOutReason is added in a deployment when its newest replica set # fails to show any progress within the given deadline (progressDeadlineSeconds). for condition in ss.status.conditions or []: if condition.type == "Progressing": if condition.reason == "ProgressDeadlineExceeded": return f"statefulset {repr(name)} exceeded its progress deadline", False spec_replicas = ss.spec.replicas status_replicas = ss.status.replicas or 0 updated_replicas = ss.status.updated_replicas or 0 ready_replicas = ss.status.ready_replicas or 0 if updated_replicas < spec_replicas: return ( f"Waiting for statefulset {repr(name)} rollout to finish: {updated_replicas} out of {spec_replicas} new replicas have been updated...", False, ) if status_replicas > updated_replicas: return ( f"Waiting for statefulset {repr(name)} rollout to finish: {status_replicas-updated_replicas} old replicas are pending termination...", False, ) if ready_replicas < updated_replicas: return ( f"Waiting for statefulset {repr(name)} rollout to finish: {ready_replicas} of {updated_replicas} updated replicas are available...", False, ) return f"StatefulSet {repr(name)} successfully rolled out", True
def __init__(self, knowledge: Knowledge, kubeconfig_file: str): super().__init__(knowledge) config.load_kube_config(config_file=kubeconfig_file) self.basic_api = CoreV1Api() self.extensions_api = AppsV1Api()
def __init__(self, controller: "KubernetesController") -> None: super().__init__(controller) self.api = AppsV1Api(controller.client) self.outpost = self.controller.outpost
def get(client: AppsV1Api, log: BoundLogger, namespace: V1Namespace, dep: V1Deployment) -> Optional[V1Deployment]: return common_k8s.get_resource( lambda: client.list_namespaced_deployment(namespace=namespace.metadata. name), log, 'deployment', dep.metadata.name)
def get_user_vnc_pod(uuid, user): extension_api = ExtensionsV1beta1Api(get_kubernetes_api_client()) app_api = AppsV1Api(get_kubernetes_api_client()) core_api = CoreV1Api(get_kubernetes_api_client()) result = {} has_deployment = False user_vnc = None try: setting = TaskSettings.objects.get(uuid=uuid) user_vnc, _ = TaskVNCPod.objects.get_or_create( settings=setting, user=user, defaults={ 'settings': setting, 'user': user, 'pod_name': '', 'url_path': '', 'vnc_password': '', 'expire_time': round(time.time() + USER_SPACE_POD_TIMEOUT) }) _, created = TaskStorage.objects.get_or_create(settings=setting, user=user, defaults={ 'settings': setting, 'user': user, 'pod_name': '' }) if user_vnc.pod_name: try: # check whether deployment is on has_deployment = True _ = app_api.read_namespaced_deployment( name=user_vnc.pod_name, namespace=KUBERNETES_NAMESPACE) except ApiException as ex: if ex.status != 404: LOGGER.exception(ex) else: has_deployment = False selector = "task-{}-user-{}-vnc".format(setting.uuid, user.id) if not has_deployment: # create a new deployment conf = json.loads(setting.container_config) user_dir = "user_{}_task_{}".format(user.id, setting.id) dep_name = "task-vnc-{}-{}".format(setting.uuid, get_short_uuid()) shared_pvc_name = "shared-{}".format(setting.uuid) shared_mount = client.V1VolumeMount( mount_path=conf['persistent_volume']['mount_path'], name=shared_pvc_name, read_only=True) user_storage_name = "user-{}".format(setting.uuid) user_mount = client.V1VolumeMount( mount_path='/cloud_scheduler_userspace', name=user_storage_name, sub_path=user_dir) username = '******'.format(user.username, setting.id) commands = [ 'set +e', 'ln -s /cloud_scheduler_userspace /headless/Desktop/user_space', 'useradd -u {uid} {username}'.format(uid=499 + user.id, username=username), 'usermod -d /headless {}'.format(username), "su -s /bin/bash -c '/dockerstartup/vnc_startup.sh -w' {}". format(username) ] if created: cp_command = 'cp -r {}/* /headless/Desktop/user_space'.format( conf['persistent_volume']['mount_path'] + '/' + conf['task_initial_file_path']) chown = 'chown -R {user}:{user} /headless/Desktop/user_space/*'.format( user=username) commands.insert(4, cp_command) commands.insert(5, chown) vnc_pw = random_password(8) env_vnc_pw = client.V1EnvVar(name="VNC_PW", value=vnc_pw) container = client.V1Container( name='headless-vnc', image=config.USER_VNC_DOCKER_IMAGE, env=[env_vnc_pw], command=['/bin/bash'], args=['-c', ';'.join(commands)], volume_mounts=[shared_mount, user_mount]) persistent_volume_claim = client.V1PersistentVolumeClaimVolumeSource( claim_name=conf['persistent_volume']['name']) user_volume_claim = client.V1PersistentVolumeClaimVolumeSource( claim_name=USERSPACE_NAME) shared_volume = client.V1Volume( name=shared_pvc_name, persistent_volume_claim=persistent_volume_claim) user_volume = client.V1Volume( name=user_storage_name, persistent_volume_claim=user_volume_claim) template = client.V1PodTemplateSpec( metadata=client.V1ObjectMeta(labels={'app': selector}), spec=client.V1PodSpec(containers=[container], volumes=[shared_volume, user_volume])) spec = client.V1DeploymentSpec( replicas=1, template=template, selector={'matchLabels': { 'app': selector }}) deployment = client.V1Deployment( kind='Deployment', metadata=client.V1ObjectMeta( name=dep_name, namespace=KUBERNETES_NAMESPACE, labels={'app': selector}), spec=spec) app_api.create_namespaced_deployment( body=deployment, namespace=KUBERNETES_NAMESPACE) user_vnc.pod_name = dep_name user_vnc.vnc_password = vnc_pw if not user_vnc.url_path: # create service spec = client.V1ServiceSpec( external_name=selector, ports=[ client.V1ServicePort(name='websocket-port', port=config.USER_VNC_PORT, target_port=config.USER_VNC_PORT) ], selector={'app': selector}, type='ClusterIP', ) service = client.V1Service(spec=spec, metadata=client.V1ObjectMeta( labels={'app': selector}, name=selector, namespace=KUBERNETES_NAMESPACE)) try: core_api.create_namespaced_service( namespace=KUBERNETES_NAMESPACE, body=service) except ApiException as ex: if ex.status != 409: # ignore conflict (duplicate) LOGGER.exception(ex) raise ApiException # create ingress url_path = str(get_uuid()) spec = client.ExtensionsV1beta1IngressSpec( rules=[ client.ExtensionsV1beta1IngressRule( host=config.USER_VNC_HOST, http=client.ExtensionsV1beta1HTTPIngressRuleValue( paths=[ client.ExtensionsV1beta1HTTPIngressPath( client.ExtensionsV1beta1IngressBackend( service_name=selector, service_port=config.USER_VNC_PORT), path='/' + url_path) ])) ], tls=[ client.ExtensionsV1beta1IngressTLS( hosts=[config.USER_VNC_HOST], secret_name=config.USER_VNC_TLS_SECRET) ], ) ingress = client.ExtensionsV1beta1Ingress(metadata={ 'name': selector, 'annotations': { 'kubernetes.io/ingress.class': 'nginx', 'nginx.ingress.kubernetes.io/proxy-read-timeout': '86400', 'nginx.ingress.kubernetes.io/proxy-send-timeout': '86400', } }, spec=spec) need_patch = False try: extension_api.create_namespaced_ingress( KUBERNETES_NAMESPACE, ingress) except ApiException as ex: if ex.status != 409: # ignore conflict (duplicate) LOGGER.exception(ex) raise ApiException else: need_patch = True if need_patch: extension_api.patch_namespaced_ingress( selector, KUBERNETES_NAMESPACE, ingress) user_vnc.url_path = url_path user_vnc.expire_time = round(time.time() + USER_SPACE_POD_TIMEOUT) result['url_path'] = user_vnc.url_path result['vnc_password'] = user_vnc.vnc_password result['deployment_name'] = user_vnc.pod_name result['vnc_host'] = config.USER_VNC_HOST result['vnc_port'] = config.USER_VNC_WS_PORT user_vnc.save(force_update=True) except ApiException as ex: LOGGER.exception(ex) except Exception as ex: LOGGER.exception(ex) finally: if user_vnc: user_vnc.save(force_update=True) return result
def _storage_pod_monitor(self): api = CoreV1Api(get_kubernetes_api_client()) app_api = AppsV1Api(get_kubernetes_api_client()) def _actual_work(): idle = True try: for item in TaskStorage.objects.filter( expire_time__gt=0).order_by('expire_time'): try: if item.expire_time <= round(time.time()): # release idled pod idle = False username = '******'.format(item.user.username, item.settings.id) pod = api.read_namespaced_pod( name=item.pod_name, namespace=KUBERNETES_NAMESPACE) if pod is not None and pod.status is not None and pod.status.phase == 'Running': response = stream( api.connect_get_namespaced_pod_exec, item.pod_name, KUBERNETES_NAMESPACE, command=[ '/bin/bash', '-c', 'unlink /home/{username};userdel {username}' .format(username=username) ], stderr=True, stdin=False, stdout=True, tty=False) LOGGER.debug(response) pod.metadata.labels['occupied'] = str( max( int(pod.metadata.labels['occupied']) - 1, 0)) api.patch_namespaced_pod( pod.metadata.name, KUBERNETES_NAMESPACE, pod) item.pod_name = '' item.expire_time = 0 item.save(force_update=True) except ApiException as ex: if ex.status == 404: item.pod_name = '' item.expire_time = 0 item.save(force_update=True) else: LOGGER.warning(ex) for item in TaskVNCPod.objects.filter( expire_time__gt=0).order_by('expire_time'): try: if item.expire_time <= round(time.time()): idle = False if item.pod_name: app_api.delete_namespaced_deployment( name=item.pod_name, namespace=KUBERNETES_NAMESPACE) item.pod_name = '' item.expire_time = 0 item.url_path = '' item.save(force_update=True) except ApiException as ex: if ex.status != 404: LOGGER.exception(ex) else: item.pod_name = '' item.expire_time = 0 item.url_path = '' item.save(force_update=True) except Exception as ex: LOGGER.warning(ex) if idle: time.sleep(1) while True: _actual_work() if self.test: break
finalizer_value = f"{babylon_domain}/lab-ui-manager" lab_ui_url_annotation = f"{babylon_domain}/labUserInterfaceUrl" lab_ui_urls_annotation = f"{babylon_domain}/labUserInterfaceUrls" lab_ui_label = f"{babylon_domain}/labUserInterface" owner_annotation = f"{babylon_domain}/owner" owner_uid_label = f"{babylon_domain}/owner-uid" requester_annotation = f"{babylon_domain}/requester" resource_claim_name_label = f"{poolboy_domain}/resource-claim-name" resource_claim_namespace_label = f"{poolboy_domain}/resource-claim-namespace" if os.path.exists('/run/secrets/kubernetes.io/serviceaccount'): kubernetes.config.load_incluster_config() else: kubernetes.config.load_kube_config() apps_v1_api = AppsV1Api() core_v1_api = CoreV1Api() custom_objects_api = CustomObjectsApi() rbac_authorization_v1_api = RbacAuthorizationV1Api() openshift_ingress_domain = custom_objects_api.get_cluster_custom_object( 'config.openshift.io', 'v1', 'ingresses', 'cluster')['spec']['domain'] class BookbagBuild: def __init__( self, definition=None, name=None, namespace=None, spec=None,
def test_flush_manager(options: Dict[str, Any], emulator: str, web: str): print("starting test") api = CoreV1Api() # max number of loops to run when waiting for kube actions to complete max_wait_loops = 20 if options["cluster"] is None else 60 # server has invalid PUBSUB_EMULATOR, so that only flush can deliver messages static_pvs = options["cluster"] is None if static_pvs: create_static_pvs(api) print("waiting for pods to be healthy") for _ in range(max_wait_loops): if all(pod.status.phase == "Running" for pod in api.list_namespaced_pod("default").items): break time.sleep(1) else: assert False, "pods did not become healthy" # create a subscription to the defined topic print("creating pubsub subscription") os.environ["PUBSUB_EMULATOR_HOST"] = emulator sub_client = SubscriberClient() topic_path = "projects/{project}/topics/{topic}".format(**options) subscription_path = "projects/{project}/subscriptions/{topic}".format( **options) try: sub_client.create_subscription(subscription_path, topic_path, retry=None) except AlreadyExists: pass print("posting message 0") requests.post(web, headers={ "host": "web" }, json={ "id": 0 }).raise_for_status() print("setting up race condition: attached pvc is also deleted") delete_pvcs(api) print("setting up race condition: pod unschedulable due to missing pvc") with pytest.raises(ApiException) as excinfo: restart_web_pods(api) assert excinfo.value.reason == "Conflict" print("posting message 1") with pytest.raises(requests.exceptions.ConnectionError): requests.post(web, headers={ "host": "web" }, json={ "id": 1 }).raise_for_status() print("starting flush-manager") # TODO optionally run flush-manager via subprocess.Popen, to ensure testing # current code and enable code coverage _sa = kube_resource("kube/flush-manager.sa.yml", **options) _cluster_role = kube_resource("kube/flush-manager.clusterrole.yml", **options) _cluster_role_binding = kube_resource( "kube/flush-manager.clusterrolebinding.yml", **options) _role = kube_resource("kube/flush-manager.role.yml", **options) _role_binding = kube_resource("kube/flush-manager.rolebinding.yml", **options) _deploy = kube_resource("kube/flush-manager.deploy.yml", **options) with _sa, _cluster_role, _cluster_role_binding, _role, _role_binding, _deploy: print("posting message 2 until successful") for i in range(max_wait_loops): try: requests.post(web, headers={ "host": "web" }, json={ "id": 2 }).raise_for_status() except requests.exceptions.ConnectionError: if i > 0 and static_pvs: create_static_pvs(api) else: break time.sleep(1) else: assert False, "pod did not recover" # scale to 0 pods print("scaling web to 0 pods") AppsV1Api().patch_namespaced_stateful_set_scale( name="web", namespace="default", body=V1StatefulSet(api_version="apps/v1", kind="StatefulSet", spec=dict(replicas=0)), ) # wait for no pvcs print("waiting for cleanup to complete") for _ in range(max_wait_loops): if not api.list_persistent_volume().items: break time.sleep(1) else: print("pvs were not cleaned up") assert [] == api.list_persistent_volume().items # assert jobs and pvcs also deleted assert [] == list_pvcs(api) assert [] == BatchV1Api().list_namespaced_job("default").items # assert received message id 0 and 2 assert [b'{"id": 0}', b'{"id": 2}'] == [ element.message.data for element in sub_client.pull(subscription_path, 2).received_messages ]
def apps_api(self) -> AppsV1Api: api_client = new_client_from_config(context=self.context()) return AppsV1Api(api_client)
from github_runner_authorizer.config import get_config COMPONENT_KEY = 'component' COMPONENT_VALUE = 'github-runner' ENV_ACCESS_TOKEN = 'ACCESS_TOKEN' TIME_FORMAT = '%Y-%m-%dT%H:%M:%S%z' config = get_config() APP_ID = config('app_id') INSTALLATION_ID = config('installation_id') NAMESPACE = config('namespace') PRIVATE_KEY = config('private_key') load_incluster_config() client = AppsV1Api() logger = Logger(__name__) logger.addHandler(StreamHandler(stdout)) def patch_template(patch_container): return { 'spec': { 'template': { 'spec': { 'containers': [ patch_container ] } } }
def create_manifest(self, component: str): """ Creates a manifest from a list of components located in mlstack/manifests. Uses a clever getattr() trick to avoid hardcoding everything. Will create kubernetes apps in the following order: - PersistentVolumeClaim - PersistentVolume - ConfigMap - Deployment - Secret - Service Args components: A list of mlstack component manifests to create. """ warning_message = "KubeApiException on {kind} `{name}`. \n Exception:\n" for file in glob.glob( str(Path(self.manifests_dir, component)) + "/*.yaml"): generator = read_yaml(file) for body in generator: if body: kind = body.get("kind") name = body.get("metadata").get("name") method_ext = "_".join( val.lower() for val in re.findall("[A-Z][^A-Z]*", kind)) warning_message = "KubeApiException on {kind} `{name}`".format( kind=kind, name=name) if kind in [ "PersistentVolumeClaim", "ConfigMap", "Service", "Secret", ]: try: method = "create_namespaced_{ext}".format( ext=method_ext) getattr(self, method)(namespace="mlstack", body=body) logger.info("%s `%s` created", kind, name) except KubeApiException as exception: logger.warning(warning_message, exception) if kind in ["PersistentVolume"]: try: getattr(self, "create_persistent_volume")(body=body) logger.info("%s `%s` created", kind, name) except KubeApiException as exception: logger.warning(warning_message, exception) if kind in ["Deployment"]: try: AppsV1Api().create_namespaced_deployment( namespace="mlstack", body=body) logger.info("%s `%s` created", kind, name) except KubeApiException as exception: logger.warning(warning_message, exception)
def __init__(self) -> None: """Create a new client for the cluster we are running in.""" self.namespace = Path(NAMESPACE_FILE).read_text().strip() self.core_api = core_v1_api.CoreV1Api() self.apps_api = AppsV1Api()