def initialize_clients(kubeconfig_path): global cli config.load_kube_config(kubeconfig_path) cli = client.CoreV1Api()
async def k8s_register(app): log.info("k8s_register") # TBD - find more elegant way to avoid this warning import urllib3 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) k8s_config.load_incluster_config( ) #get the config from within the cluster and set it as the default config for all new clients c = k8s_client.Configuration() #go and get a copy of the default config c.verify_ssl = False #set verify_ssl to false in that config k8s_client.Configuration.set_default( c) #make that config the default for all new clients v1 = k8s_client.CoreV1Api() # TBD - use the async version ret = v1.list_pod_for_all_namespaces(watch=False) pod_ips = [] sn_urls = {} dn_urls = {} for i in ret.items: pod_ip = i.status.pod_ip if not pod_ip: continue labels = i.metadata.labels if labels and "app" in labels and labels["app"] == "hsds": log.info(f"hsds pod - ip: {pod_ip}") pod_ips.append(pod_ip) if not pod_ips: log.error("Expected to find at least one hsds pod") return pod_ips.sort() # for assigning node numbers node_count = len(pod_ips) ready_count = 0 this_node_id = app["id"] sn_port = config.get("sn_port") dn_port = config.get("dn_port") for node_number in range(node_count): for port in (sn_port, dn_port): # send an info request to the node pod_ip = pod_ips[node_number] url = f"http://{pod_ip}:{port}" if port == sn_port: sn_urls[node_number] = url else: dn_urls[node_number] = url info_rsp = await get_info(app, url) if not info_rsp: # timeout or other failure continue if "node" not in info_rsp: log.error("expected to find node key in info resp") continue node_rsp = info_rsp["node"] log.debug(f"got info resp: {node_rsp}") for key in ("type", "id", "node_number", "node_count"): if key not in node_rsp: log.error( f"unexpected node type in node state, expected to find key: {key}" ) continue if node_rsp["type"] not in ("sn", "dn"): log.error( f"expected node_type to be sn or dn, type is {node_rsp['type']}" ) continue node_id = node_rsp["id"] if node_id == this_node_id: # set node_number and node_count log.debug("got info_rsp for this node") if app["node_number"] != node_number: old_number = app["node_number"] log.info( f"node_number has changed - old value was {old_number} new number is {node_number}" ) if app["node_type"] == "dn": meta_cache = app["meta_cache"] chunk_cache = app["chunk_cache"] if meta_cache.dirtyCount > 0 or chunk_cache.dirtyCount > 0: # set the node state to waiting till the chunk cache have been flushed if app["node_state"] == "READY": log.info( "setting node_state to waiting while cache is flushing" ) app["node_state"] = "WAITING" else: meta_cache.clearCache() chunk_cache.clearCache() log.info( f"node number was: {old_number} setting to: {node_number}" ) app["node_number"] = node_number app['register_time'] = time.time() else: # SN nodes can update node_number immediately log.info( f"node number was: {old_number} setting to: {node_number}" ) app["node_number"] = node_number app['register_time'] = time.time() if app["node_count"] != node_count: old_count = app["node_count"] log.info( f"node count was: {old_count} setting to: {node_count}" ) app["node_count"] = node_count if node_number == node_rsp[ "node_number"] and node_count == node_rsp["node_count"]: ready_count += 1 log.debug(f"incremented ready_count to {ready_count}") else: log.info(f"differing node_number/node_count for url: {url}") log.info( f"expected node_number: {node_number} actual: {node_rsp['node_number']}" ) log.info( f"expected node_count: {node_count} actual: {node_rsp['node_count']}" ) if ready_count == node_count * 2: if app["node_state"] != "READY": log.info("setting node state to READY") app["node_state"] = "READY" app["node_count"] = node_count app["sn_urls"] = sn_urls app["dn_urls"] = dn_urls else: log.info( f"not all pods ready - ready_count: {ready_count}/{node_count*2}") if app["node_state"] == "READY": log.info("setting node state to SCALING") app["node_state"] = "SCALING"
def test_kube_state_metrics(self): v1 = client.CoreV1Api() pod = v1.list_namespaced_pod("cnvrg", label_selector="app.kubernetes.io/name=kube-state-metrics") self.assertEqual(1, len(pod.items)) self.assertIsNotNone(pod.items[0].status.conditions[0].message) self.assertIn("nodes are available", pod.items[0].status.conditions[0].message)
def has_configmap(name, namespace="default"): return has_resource(name, "ConfigMap", kube_client.CoreV1Api(), namespace)
def create_namespace(name): api = kube_client.CoreV1Api() return api.create_namespace(body=kube_client.V1Namespace(metadata=kube_client.V1ObjectMeta(name=name)))
def wait(): try: name = environ.get("RD_CONFIG_NAME") namespace = environ.get("RD_CONFIG_NAMESPACE") retries = int(environ.get("RD_CONFIG_RETRIES")) sleep = float(environ.get("RD_CONFIG_SLEEP")) show_log = environ.get("RD_CONFIG_SHOW_LOG") == "true" # Poll for completion if retries retries_count = 0 completed = False while True: common.connect() batch_v1 = client.BatchV1Api() core_v1 = client.CoreV1Api() api_response = batch_v1.read_namespaced_job_status(name, namespace, pretty="True") log.debug(api_response) #for condition in api_response.status.conditions: # log.info(condition.type) retries_count = retries_count + 1 if retries_count > retries: log.error("Number of retries exceeded") completed = True if api_response.status.conditions: for condition in api_response.status.conditions: if condition.type == "Failed": completed = True if api_response.status.completion_time: completed = True if show_log: log.debug("Searching for pod associated with job") schedule_start_time = time.time() schedule_timeout = 600 while True: try: pod_list = core_v1.list_namespaced_pod( namespace, label_selector="job-name==" + name) first_item = pod_list.items[0] pod_name = first_item.metadata.name break except IndexError: log.warning("Still Waiting for Pod to be Scheduled") time.sleep(60) if schedule_timeout and time.time( ) - schedule_start_time > schedule_timeout: # pragma: no cover raise TimeoutError log.info("Fetching logs from pod: {0}".format(pod_name)) # time.sleep(15) log.info( "========================== job log start ==========================" ) start_time = time.time() timeout = 300 while True: try: core_v1.read_namespaced_pod_log(name=pod_name, namespace=namespace) break except ApiException as ex: log.warning("Pod is not ready, status: %d", ex.status) if ex.status == 200: break else: log.info("waiting for log") time.sleep(15) if timeout and time.time( ) - start_time > timeout: # pragma: no cover raise TimeoutError w = watch.Watch() for line in w.stream(core_v1.read_namespaced_pod_log, name=pod_name, namespace=namespace): print(line.encode('ascii', 'ignore')) log.info( "=========================== job log end ===========================" ) if completed: break log.info("Waiting for job completion") show_log = False time.sleep(sleep) if api_response.status.succeeded: log.info("Job succeeded") sys.exit(0) else: log.info("Job failed") sys.exit(1) except ApiException: log.exception("Exception waiting for job:") sys.exit(1)
def __init__(self, use_endpoints=False): self._api = k8s_client.CoreV1Api() self._api.api_client.user_agent = USER_AGENT self._api.api_client.rest_client.pool_manager.connection_pool_kw['maxsize'] = 10 self._request_timeout = None self._use_endpoints = use_endpoints
def __init__( self, image: str, namespace: str = "default", nodes_per_block: int = 1, init_blocks: int = 0, min_blocks: int = 0, max_blocks: int = 10, max_cpu: float = 2, max_mem: str = "500Mi", init_cpu: float = 1, init_mem: str = "250Mi", parallelism: float = 1, worker_init: str = "", pod_name: Optional[str] = None, user_id: Optional[str] = None, group_id: Optional[str] = None, run_as_non_root: bool = False, secret: Optional[str] = None, incluster_config: Optional[bool] = True, persistent_volumes: Optional[List[Tuple[str, str]]] = None, ) -> None: if persistent_volumes is None: persistent_volumes = [] if not _kubernetes_enabled: raise OptionalModuleMissing( ["kubernetes"], "Kubernetes provider requires kubernetes module and config.", ) if incluster_config: config.load_incluster_config() else: config.load_kube_config() self.namespace = namespace self.image = image self.nodes_per_block = nodes_per_block self.init_blocks = init_blocks # Kubernetes provider doesn't really know which pods by container to initialize # so best to set init_blocks to 0 assert init_blocks == 0 self.min_blocks = min_blocks self.max_blocks = max_blocks self.max_cpu = max_cpu self.max_mem = max_mem self.init_cpu = init_cpu self.init_mem = init_mem self.parallelism = parallelism self.worker_init = worker_init self.secret = secret self.incluster_config = incluster_config self.pod_name = pod_name self.user_id = user_id self.group_id = group_id self.run_as_non_root = run_as_non_root self.persistent_volumes = persistent_volumes self.kube_client = client.CoreV1Api() # Dictionary that keeps track of jobs, keyed on job_id self.resources_by_pod_name = {} # Dictionary that keeps track of jobs, keyed on task_type self.resources_by_task_type = {}
def list_deployments(): v1 = client.CoreV1Api() print("Listing pods with their IPs:") ret = v1.list_pod_for_all_namespaces(watch=False) return filter_results(ret, namespace)
def __init__(self): config.load_incluster_config() self.api = client.CoreV1Api()
#!/usr/bin/python3.5 import socket import time import argparse import os, sys, stat import json import logging from kubernetes import client, config from kubernetes.stream import stream config.load_incluster_config() k8s_coreapi = client.CoreV1Api() logger = logging.getLogger("Metrics") formatter = logging.Formatter('%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s') console_handler = logging.StreamHandler(sys.stdout) console_handler.formatter = formatter logger.addHandler(console_handler) logger.setLevel(logging.DEBUG) def get_util_memory(pod_name, namespace, command=["/bin/sh", "-c", "ps -aux|awk '{print$3\" \"$4\" \"$11}';nvidia-smi | grep % |awk '{print$9\" \"$11\" \"$13}'"], ps="python"): cpu = None memory = None gpu_util_memory = [] try: resp = stream(k8s_coreapi.connect_get_namespaced_pod_exec, pod_name, namespace, command=command, stderr=True, stdin=False, stdout=True, tty=False) resp = resp.splitlines() for r in resp:
def test_services(self): client.Configuration.set_default(self.configuration) v1 =client.CoreV1Api() services=v1.list_namespaced_service(self.namespace) self.assertGreaterEqual(len(services.items),6)
def main(): if os.environ.get('RD_CONFIG_DEBUG') == 'true': log.setLevel(logging.DEBUG) log.debug("Log level configured for DEBUG") data = {} data["name"] = os.environ.get('RD_CONFIG_NAME') data["type"] = os.environ.get('RD_CONFIG_TYPE') data["namespace"] = os.environ.get('RD_CONFIG_NAMESPACE') common.connect() try: if data["type"] == "Deployment": apps_v1 = client.AppsV1Api() resp = apps_v1.delete_namespaced_deployment( name=data["name"], namespace=data["namespace"], body=client.V1DeleteOptions(propagation_policy='Foreground', grace_period_seconds=5), pretty="true") if data["type"] == "ConfigMap": apps_v1 = client.CoreV1Api() resp = apps_v1.delete_namespaced_config_map( name=data["name"], namespace=data["namespace"], body=client.V1DeleteOptions(propagation_policy='Foreground', grace_period_seconds=5), pretty="true") if data["type"] == "StatefulSet": apps_v1 = client.AppsV1Api() resp = apps_v1.delete_namespaced_stateful_set( name=data["name"], namespace=data["namespace"], body=client.V1DeleteOptions(propagation_policy='Foreground', grace_period_seconds=5), pretty="true") if data["type"] == "Service": apps_v1 = client.CoreV1Api() resp = apps_v1.delete_namespaced_service( namespace=data["namespace"], name=data["name"], body=client.V1DeleteOptions(propagation_policy='Foreground', grace_period_seconds=5), pretty="true") if data["type"] == "Ingress": apps_v1 = client.ExtensionsV1beta1Api() body = client.V1DeleteOptions() resp = apps_v1.delete_namespaced_ingress( name=data["name"], namespace=data["namespace"], body=body, pretty="true") if data["type"] == "Job": api_instance = client.BatchV1Api() resp = api_instance.delete_namespaced_job( name=data["name"], namespace=data["namespace"], body=client.V1DeleteOptions(api_version='v1', kind="DeleteOptions", propagation_policy="Background"), pretty="true") if data["type"] == "StorageClass": api_instance = client.StorageV1Api() resp = api_instance.delete_storage_class( name=data["name"], body=client.V1DeleteOptions(), pretty="true") if data["type"] == "PersistentVolumeClaim": api_instance = client.CoreV1Api() resp = api_instance.delete_namespaced_persistent_volume_claim( namespace=data["namespace"], body=client.V1DeleteOptions(), name=data["name"], pretty="true") if data["type"] == "Secret": api_instance = client.CoreV1Api() resp = api_instance.delete_namespaced_secret( namespace=data["namespace"], name=data["name"], body=client.V1DeleteOptions(), pretty="true") if data["type"] == "PersistentVolume": api_instance = client.CoreV1Api() resp = api_instance.delete_persistent_volume( name=data["name"], body=client.V1DeleteOptions(), pretty="true") print(common.parseJson(resp)) except ApiException as e: log.error("Exception error creating: %s\n" % e) sys.exit(1)
def k8s_api(self): if not self._k8s_api: self._k8s_api = client.CoreV1Api(self.api_client) return self._k8s_api
db_nodes = self.test_instance.env_instance.node_instances[DbNode] assert len(db_nodes) == 1 mysql_probe = 'mysql -h {} -P 4000 -uroot -e "select tidb_version();"'.format(db_nodes[0].pod_ip) resp = stream(self.test_instance.env_instance.api_core_v1.connect_get_namespaced_pod_exec, namespace=DEFAULT_NAMESPACE, name=db_nodes[0].pod_name, command=['bash', '-c', mysql_probe], stderr=True, stdin=False, stdout=True, tty=False) self.assert_('tidb_version' in resp, resp) class FakeTest(Test): def test_actions(self) -> List[Type[TestAction]]: return [ partial(SleepAction, sleep_interval=10), FakeAction, ] @staticmethod def env() -> Type[TestBed]: return FakeEnv def _init_env(self, env_init_interval: Optional[int] = 1): super()._init_env(env_init_interval) if __name__ == '__main__': config.load_kube_config() FakeTest(api_core_v1=client.CoreV1Api(), api_apps_v1=client.AppsV1Api()).start()
def __init__(self, kubernetes_proxy_addr=None, redis_ip=None, redis_port=6379, useInternalIP=False, namespace='default', create_namespace_if_not_exists=False): """ Parameters ---------- kubernetes_proxy_addr : str, optional The proxy address if you are proxying connections locally using ``kubectl proxy``. If this argument is provided, Clipper will construct the appropriate proxy URLs for accessing Clipper's Kubernetes services, rather than using the API server addres provided in your kube config. redis_ip : str, optional The address of a running Redis cluster. If set to None, Clipper will start a Redis deployment for you. redis_port : int, optional The Redis port. If ``redis_ip`` is set to None, Clipper will start Redis on this port. If ``redis_ip`` is provided, Clipper will connect to Redis on this port. useInternalIP : bool, optional Use Internal IP of the K8S nodes . If ``useInternalIP`` is set to False, Clipper will throw an exception if none of the nodes have ExternalDNS. If ``useInternalIP`` is set to true, Clipper will use the Internal IP of the K8S node if no ExternalDNS exists for any of the nodes. namespace: str, optional The Kubernetes namespace to use . If this argument is provided, all Clipper artifacts and resources will be created in this k8s namespace. If not "default" namespace is used. create_namespace_if_not_exists: bool, False Create a k8s namespace if the namespace doesnt already exist. If this argument is provided and the k8s namespace does not exist a new k8s namespace will be created. Note ---- Clipper stores all persistent configuration state (such as registered application and model information) in Redis. If you want Clipper to be durable and able to recover from failures, we recommend configuring your own persistent and replicated Redis cluster rather than letting Clipper launch one for you. """ if kubernetes_proxy_addr is not None: self.kubernetes_proxy_addr = kubernetes_proxy_addr self.use_k8s_proxy = True else: self.use_k8s_proxy = False self.redis_ip = redis_ip self.redis_port = redis_port self.useInternalIP = useInternalIP config.load_kube_config() configuration.assert_hostname = False self._k8s_v1 = client.CoreV1Api() self._k8s_beta = client.ExtensionsV1beta1Api() # Check if namespace exists and if create flag set ...create the namespace or throw error namespaces = [] for ns in self._k8s_v1.list_namespace().items: namespaces.append(ns.metadata.name) if namespace in namespaces: self.k8s_namespace = namespace elif create_namespace_if_not_exists: body = client.V1Namespace() body.metadata = client.V1ObjectMeta(name=namespace) try: self._k8s_v1.create_namespace(body) except ApiException as e: logging.error( "Exception creating Kubernetes namespace: {}".format(e)) raise ClipperException( "Could not create Kubernetes namespace. " "Reason: {}".format(e.reason)) self.k8s_namespace = namespace else: msg = "Error connecting to Kubernetes cluster. Namespace does not exist" logger.error(msg) raise ClipperException(msg)
def cluster(options: Dict[str, Any]) -> Iterator[Optional[str]]: if options["cluster"] is None: load_kube_config() if options["project"] is None: options["project"] = "test" yield None else: credentials, project = google.auth.default() if options["project"] is None: options["project"] = project gke = ClusterManagerClient(credentials=credentials) # create cluster parent = f"projects/{options['project']}/locations/{options['location']}" name = f"{parent}/clusters/{options['cluster']}" try: operation = gke.create_cluster( cluster=Cluster( name=options["cluster"], logging_service=None, monitoring_service=None, node_pools=[ NodePool( initial_node_count=1, name="test", config={ "preemptible": options["preemptible"], "machine_type": "n1-highcpu-2", }, ) ], ), parent=parent, ) except AlreadyExists: pass else: # wait for operation to complete request = GetOperationRequest( name=operation.self_link.split("projects").pop()) while gke.get_operation( request).status <= Operation.Status.RUNNING: time.sleep(15) # set kube credentials cluster = gke.get_cluster(name=name) config = kube.Configuration() config.host = f"https://{cluster.endpoint}:443" config.verify_ssl = False config.api_key = {"authorization": f"Bearer {credentials.token}"} kube.Configuration.set_default(config) # delete cluster after test completes try: yield options["cluster"] finally: try: # delete persistent volumes because gke cluster delete won't do it # https://cloud.google.com/kubernetes-engine/docs/how-to/deleting-a-cluster#overview api = kube.CoreV1Api() for pv in api.list_persistent_volume().items: try: pv.spec.persistent_volume_reclaim_policy = "Delete" api.patch_persistent_volume( name=pv.metadata.name, body=pv, ) api.delete_persistent_volume( name=pv.metadata.name, grace_period_seconds=0, propagation_policy="Foreground", ) except ApiException: print_exc() # wait for pv deletes to complete for _ in range(60): if not api.list_persistent_volume().items: break time.sleep(1) else: print("FAILED TO CLEANUP PERSISTENT VOLUMES") finally: gke.delete_cluster(name=name)
async def watchPods(self, websocket=None, recev_data=None): print("watch start") config.load_kube_config() v1 = client.CoreV1Api() v1.api_client.configuration.verify_ssl = False w = watch.Watch() ingress_apply = False for event in w.stream(v1.list_pod_for_all_namespaces, timeout_seconds=30): #try: ip_minikube = subprocess.run(["minikube ip"], shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) ip_minikube = ip_minikube.stdout ip_minikube = ip_minikube.decode("utf-8") ip_minikube = str(ip_minikube) ip_minikube = ip_minikube.replace("\n", "") if ingress_apply == False: data_send_ip = self.sendOutput( self.cloneData( recev_data, { "output": ip_minikube, "action": "output-ingress", "finish_output": "true" })) await websocket.send(data_send_ip) ingress_apply = True if self.stop_watch_pods == True: w.stop() else: event_object = event['object'].to_dict() action = event["type"] if not event_object["metadata"][ "generate_name"] == None and not event_object[ "metadata"]["generate_name"] == "None": if 'component' in event_object["metadata"]["labels"]: name = event_object["metadata"]["labels"]["component"] else: name = event_object["metadata"]["generate_name"] if "name" in event_object["metadata"]: name_hash = event_object["metadata"]["name"] else: name_hash = event_object["metadata"]["generate_name"] hash_object = "" if "pod-template-hash" in event_object["metadata"][ "labels"]: hash_object = event_object["metadata"]["labels"][ "pod-template-hash"] phase = event_object["status"]["phase"] conditions = event_object["status"]["conditions"] if not conditions == None: l_conditions = len(conditions) message_condition = conditions[l_conditions - 1]["message"] reason_condition = conditions[l_conditions - 1]["reason"] type_condition = conditions[l_conditions - 1]["type"] result_event = { "name": name, "action": action, "name_hash": name_hash, "hash": hash_object, "phase": phase, "message": message_condition, "reason": reason_condition, "type": type_condition } data_send_event = self.sendOutput( self.cloneData( recev_data, { "output": result_event, "action": "output-events", "finish_output": "false" })) await websocket.send(data_send_event)
def init(): """Initialize and get client""" config.load_kube_config() v1 = client.CoreV1Api() return v1
def get_namespaces(): config.load_incluster_config() v1 = client.CoreV1Api() ns = v1.list_namespace() return ns
from kubernetes import client, config def list_pods(v1): print("\nListing pods in namespace: default") ret = v1.list_namespaced_pod(namespace="default") for i in ret.items: print("%s %s %s" % (i.status.pod_ip, i.metadata.namespace, i.metadata.name)) config.load_incluster_config() pod = client.V1Pod() v1 = client.CoreV1Api() container = client.V1Container(name="busybox") container.image = "busybox" container.args = ["sleep", "3600"] container.name = "busybox" spec = client.V1PodSpec(containers=[container]) pod.metadata = client.V1ObjectMeta(name="busybox") pod.spec = spec list_pods(v1) v1.create_namespaced_pod(namespace="default", body=pod) list_pods(v1)
def config1(): config.load_kube_config(config_file='/root/zcc/hello/config') v1 = client.CoreV1Api() return v1
def delete_configmap(name, namespace="default", timeout=K8S_DELETE_TIMEOUT): return delete_resource(name, "ConfigMap", kube_client.CoreV1Api(), namespace=namespace, timeout=timeout)
def core_v1(load_kube_config): return client.CoreV1Api()
def pod_is_ready(name, namespace="default"): api = kube_client.CoreV1Api() pod = api.read_namespaced_pod(name, namespace=namespace) return pod.status.phase.lower() == "running" and all(container.ready for container in pod.status.container_statuses)
def __init__(self): self.k8s_core_v1 = client.CoreV1Api()
def test_prom_operator(self): v1 = client.CoreV1Api() pod = v1.list_namespaced_pod("cnvrg", label_selector="app.kubernetes.io/name=prometheus-operator") self.assertEqual(1, len(pod.items)) self.assertIn("nodes are available", pod.items[0].status.conditions[0].message)
def __init__(self, namespace): self.namespace = namespace config.load_kube_config() self.client = client.CoreV1Api()
def test_vpa_updater(self): v1 = client.CoreV1Api() pod = v1.list_namespaced_pod("cnvrg", label_selector="app=vpa-updater") self.assertEqual(1, len(pod.items)) self.assertIsNotNone(pod.items[0].status.conditions[0].message) self.assertIn("nodes are available", pod.items[0].status.conditions[0].message)
def __init__(self, api_client: client.ApiClient) -> None: self.connection = client.CoreV1Api(api_client) self._nodes: Dict[str, api.Node] = {} self._pods: Dict[str, api.Pod] = {} self._collect_objects()