def main(): is_debug = asyncio.run(_is_debug_mode()) LOGGER_CONFIGURER.configure_logger(is_debug) if not EPSAGON_TOKEN: logging.error( "Missing Epsagon token. " "Make sure to configure EPSAGON_TOKEN in cluster_agent_deployment.yaml" ) return if not CLUSTER_NAME: logging.error( "Missing cluster name. " "Make sure to configure EPSAGON_CLUSTER_NAME in cluster_agent_deployment.yaml" ) return config.load_incluster_config() logging.info("Loaded cluster config") if is_debug: loaded_conf = client.configuration.Configuration.get_default_copy() logging.debug( "Loaded cluster configuration:\nHost: %s\n" "Using SSL Cert? %s\nUsing API token? %s", loaded_conf.host, bool(loaded_conf.ssl_ca_cert), bool(loaded_conf.api_key) ) loop = asyncio.new_event_loop() loop.add_signal_handler(signal.SIGHUP, _reload_handler) loop.run_until_complete(run(is_debug)) loop.close()
async def _get_deployment_logs(namespace, name, tail_lines=TAIL_LINES_DEFAULT): """Gather pod names via K8s label selector""" pods = [] config.load_incluster_config() k8s_client = client.CoreV1Api() try: api_response = await k8s_client.list_namespaced_pod( namespace, label_selector='release={}'.format(name)) for api_items in api_response.items: pods.append(api_items.metadata.name) except ApiException as e: logging.error( f"Exception when calling CoreV1Api->list_namespaced_pod: {e}") # Iterate over list of pods and concatenate logs logs = "" try: for pod in pods: logs += pod + "\n" logs += await k8s_client.read_namespaced_pod_log( pod, namespace, tail_lines=tail_lines) except ApiException as e: logging.error( f"Exception when calling CoreV1Api->read_namespaced_pod_log: {e}") return logs
async def log_stream_websocket(ws): config.load_incluster_config() k8s_client = client.CoreV1Api() name = ws.query_params['name'] namespace = ws.query_params['namespace'] await ws.accept() resp = await k8s_client.read_namespaced_pod_log( name, namespace, tail_lines=TAIL_LINES_DEFAULT, follow=True, _preload_content=False ) while True: try: line = await resp.content.readline() except asyncio.TimeoutError as e: logging.error( f""" Async timeout server side, will recover from client side {e} """) break if not line: break await ws.send_text(line.decode('utf-8')) await ws.close()
async def main(): # Setup Sentry if configured sentry_dsn = os.getenv("SENTRY_DSN") if sentry_dsn: with open(".version") as f: release = f.readline().strip() environment = os.getenv("HOSTNAME", "dev").split("-")[0] sentry_sdk.init( sentry_dsn, release=release, environment=environment) log.info("Sentry initialized with release='%s' and environment='%s'", release, environment) try: config.load_incluster_config() except Exception: await config.load_kube_config() crds = client.CustomObjectsApi() # Give tiller time to start up, if it isn't already log.info("Waiting for tiller to be available ..") await run_command(f"helm version", timeout=30) tasks = [ asyncio.ensure_future(monitor_forever(crds, "global")), asyncio.ensure_future(monitor_forever(crds, "production")), asyncio.ensure_future(monitor_forever(crds, "staging")), ] signal.signal(signal.SIGTERM, functools.partial(signal_handler, tasks)) await asyncio.wait(tasks)
async def setup(self, app): await super().setup(app) try: # Not a coroutine for some reason config.load_incluster_config() except config.ConfigException: await config.load_kube_config() self.api_client = client.ApiClient() self.core_client = client.CoreV1Api(api_client=self.api_client) self.custom_client = client.CustomObjectsApi( api_client=self.api_client) self.cluster_waiters = defaultdict(Flag) self.clusters = {} self.username_to_clusters = defaultdict(dict) self.queue = WorkQueue() self.informer = Informer( parent=self, name="cluster", client=self.custom_client, method="list_cluster_custom_object", method_kwargs=dict( group="gateway.dask.org", version=self.crd_version, plural="daskclusters", label_selector=self.label_selector, ), on_update=self.on_cluster_event, on_delete=self.on_cluster_event, ) await self.informer.start() self.sync_task = asyncio.ensure_future(self.sync_clusters_loop())
async def log_job(request, ws, job, pod_id, namespace, container): job_uuid = job.uuid.hex if job_uuid in request.app.job_logs_ws_managers: ws_manager = request.app.job_logs_ws_managers[job_uuid] else: ws_manager = SocketManager() request.app.job_logs_ws_managers[job_uuid] = ws_manager ws_manager.add_socket(ws) # Stream phase changes status = None while status != JobLifeCycle.RUNNING and not JobLifeCycle.is_done(status): job.refresh_from_db() if status != job.last_status: status = job.last_status await notify_ws(ws=ws, message=get_status_message(status)) if should_disconnect(ws=ws, ws_manager=ws_manager): return await asyncio.sleep(SOCKET_SLEEP) if JobLifeCycle.is_done(status): await notify_ws(ws=ws, message=get_status_message(status)) return config.load_incluster_config() k8s_api = client.CoreV1Api() await log_job_pod(k8s_api=k8s_api, ws=ws, ws_manager=ws_manager, pod_id=pod_id, container=container, namespace=namespace)
async def main(): parser = argparse.ArgumentParser() parser.add_argument('timeout_seconds', type=int) parser.add_argument('namespace', type=str) subparsers = parser.add_subparsers(dest='kind') pod_parser = subparsers.add_parser('Pod') pod_parser.add_argument('name', type=str) service_parser = subparsers.add_parser('Service') service_parser.add_argument('name', type=str) service_parser.add_argument('--port', '-p', type=int, default=80) args = parser.parse_args() if args.kind == 'Pod': if 'USE_KUBE_CONFIG' in os.environ: await config.load_kube_config() else: config.load_incluster_config() v1 = client.CoreV1Api() t = wait_for_pod_complete(v1, args.namespace, args.name) else: assert args.kind == 'Service' t = wait_for_service_alive(args.namespace, args.name, args.port) await asyncio.gather(timeout(args.timeout_seconds), t)
async def _watch(self): DBSession = self.connector.DBSession k8s_config.load_incluster_config() async with k8s_client.ApiClient() as api: v1 = k8s_client.CoreV1Api(api) with open(os.path.join(self.config_dir, 'runner.namespace')) as fp: namespace = fp.read().strip() # Find existing run pods pods = await v1.list_namespaced_pod( namespace=namespace, label_selector='app=run', ) PROM_RUNS.set(0) for pod in pods.items: run_id = int(pod.metadata.labels['run'], 10) logger.info("Found run pod for %d", run_id) PROM_RUNS.inc() await self._check_pod(api, run_id, pod) # Watch changes watch = k8s_watch.Watch() f, kwargs = v1.list_namespaced_pod, dict( namespace=namespace, label_selector='app=run', ) while True: try: async for event in watch.stream(f, **kwargs): await self._handle_watch_event(api, DBSession, event) except k8s_client.ApiException as e: if e.status != 410: raise
async def main(): try: config.load_incluster_config() except Exception: await config.load_kube_config() crds = client.CustomObjectsApi() await monitor(crds)
async def on_startup(app): if 'BATCH_USE_KUBE_CONFIG' in os.environ: await config.load_kube_config() else: config.load_incluster_config() app['k8s_client'] = client.CoreV1Api() app['dbpool'] = await create_database_pool()
async def init(cls): logger.info('Init!') if Config.IN_CLUSTER: config.load_incluster_config() else: await config.load_kube_config() cls.v1 = client.CoreV1Api() t = threading.Thread(target=cls.init_watch_all_pods) t.setDaemon(True) t.start()
async def login_via_kubernetes_asyncio( logger: Union[logging.Logger, logging.LoggerAdapter], **kwargs: Any, ) -> ConnectionInfo: """ Authenticate with the Kubernetes cluster. Upon startup of the Kopf operator, this function attempts to authenticate with a Kubernetes cluster. If the :attr:`~crate.operator.config.Config.KUBECONFIG` is defined, an attempt will be made to use that config file. In other cases, an in-cluster authentication will be tried. """ if config.KUBECONFIG: logger.info("Authenticating with KUBECONFIG='%s'", config.KUBECONFIG) await load_kube_config(config_file=config.KUBECONFIG) else: logger.info("Authenticating with in-cluster config") load_incluster_config() # Below follows a copy of Kopf's `kopf.utilities.piggybacking.login_via_client` # We do not even try to understand how it works and why. Just load it, and # extract the results. k8s_config = Configuration.get_default_copy() # For auth-providers, this method is monkey-patched with the # auth-provider's one. # We need the actual auth-provider's token, so we call it instead of # accessing api_key. # Other keys (token, tokenFile) also end up being retrieved via this method. header: Optional[str] = k8s_config.get_api_key_with_prefix("authorization") parts: Sequence[str] = header.split(" ", 1) if header else [] scheme, token = ((None, None) if len(parts) == 0 else (None, parts[0]) if len(parts) == 1 else (parts[0], parts[1])) # RFC-7235, Appendix C. # Interpret the k8s_config object for our own minimalistic credentials. # Note: kubernetes client has no concept of a "current" context's namespace. c = ConnectionInfo( server=k8s_config.host, ca_path=k8s_config.ssl_ca_cert, # can be a temporary file insecure=not k8s_config.verify_ssl, username=k8s_config.username or None, # an empty string when not defined password=k8s_config.password or None, # an empty string when not defined scheme=scheme, token=token, certificate_path=k8s_config.cert_file, # can be a temporary file private_key_path=k8s_config.key_file, # can be a temporary file priority= 30, # The priorities for `client` and `pykube-ng` are 10 and 20. ) return c
async def _get_all_namespaces(): """Get all namespaces""" config.load_incluster_config() k8s_client = client.CoreV1Api() namespaces = [] ret = await k8s_client.list_namespace(watch=False) for i in ret.items: namespaces.append(i.metadata.name) return namespaces
async def _describe_pod(pod, namespace): """Describes pod""" config.load_incluster_config() k8s_client = client.CoreV1Api() try: ret = await k8s_client.read_namespaced_pod( pod, namespace, pretty='true') except ApiException as e: logging.error( f"Exception when calling CoreV1Api->read_namespaced_pod: {e}") return ret
def main(): args = parser.parse_args() loop = asyncio.get_event_loop() # Load the kubeconfig file specified in the KUBECONFIG environment # variable, or fall back to `~/.kube/config`. config.load_incluster_config() # loop.run_until_complete(config.load_kube_config()) loop.run_until_complete(simple_watch_clusters()) loop.run_until_complete(simple_watch_nodepools()) loop.run_until_complete(simple_watch_nodes(args.preemptible)) loop.close()
async def _get_pod_logs(pod, namespace, tail_lines=TAIL_LINES_DEFAULT): """Read pod logs""" config.load_incluster_config() k8s_client = client.CoreV1Api() try: ret = await k8s_client.read_namespaced_pod_log( pod, namespace, tail_lines=tail_lines) except ApiException as e: logging.error( f"Exception when calling CoreV1Api->read_namespaced_pod: {e}") return ret
async def main(): logging.basicConfig(format="%(asctime)s %(message)s", level=logging.INFO) try: config.load_incluster_config() logging.debug('Acquired credentials from service account') except: await config.load_kube_config() logging.debug('Acquired credentials from kubeconfig') v1 = client.CoreV1Api() while True: await label_newest_node(v1, namespace, user_node_selector, attractor_label) await asyncio.sleep(10)
async def get(cls) -> CustomObjectsApi: if cls.k8s_custom_object_api: return cls.k8s_custom_object_api else: try: try: await config.load_kube_config() except FileNotFoundError: config.load_incluster_config() cls.k8s_custom_object_api = client.CustomObjectsApi(client.ApiClient()) return cls.k8s_custom_object_api except Exception: logger.exception(f'Failed to initialize {cls.__name__}') raise
async def setup(self, k8s_config=None): if not k8s_config: if self.in_cluster: config.load_incluster_config() else: await config.load_kube_config() self.api_client = client.api_client.ApiClient() else: self.api_client = client.api_client.ApiClient(configuration=k8s_config) self.k8s_api = client.CoreV1Api(self.api_client) self.k8s_batch_api = client.BatchV1Api(self.api_client) self.k8s_beta_api = client.ExtensionsV1beta1Api(self.api_client) self.k8s_custom_object_api = client.CustomObjectsApi(self.api_client) self.k8s_version_api = client.VersionApi(self.api_client)
async def _get_all_pods(namespace=None): """Get all pods""" pods = {} config.load_incluster_config() k8s_client = client.CoreV1Api() if namespace: ret = await k8s_client.list_namespaced_pod(namespace, watch=False) else: ret = await k8s_client.list_pod_for_all_namespaces(watch=False) for i in ret.items: pod = i.metadata.name namespace = i.metadata.namespace pods.update({pod: i.metadata.namespace}) return pods
async def init(cls, in_cluster: bool, task_runner_service: TaskRunnerService) -> K8sClient: if in_cluster: # auth inside k8s cluster config.load_incluster_config() configuration = client.Configuration() else: # local auth (from kubectl config) configuration = client.Configuration() await config.load_kube_config(client_configuration=configuration) api_client = client.ApiClient(configuration) core_client = client.CoreApi(api_client) v1_client = client.CoreV1Api(api_client) return cls(core_client=core_client, v1_client=v1_client, task_runner_service=task_runner_service)
async def initialize_kubernetes() -> None: """Load the Kubernetes configuration. This has to be run once per process and should be run during application startup. This function handles Kubernetes configuration independent of any given Kubernetes client so that clients can be created for each request. Notes ----- If ``KUBERNETES_PORT`` is set in the environment, this will use ``load_incluster_config`` to get configuration information from the local pod metadata. Otherwise, it will use ``load_kube_config`` to read configuration from the user's home directory. """ if "KUBERNETES_PORT" in os.environ: config.load_incluster_config() else: await config.load_kube_config()
async def log_experiment(request, ws, experiment, namespace, container): experiment_uuid = experiment.uuid.hex if experiment_uuid in request.app.experiment_logs_ws_managers: ws_manager = request.app.experiment_logs_ws_managers[experiment_uuid] else: ws_manager = SocketManager() request.app.experiment_logs_ws_managers[experiment_uuid] = ws_manager ws_manager.add_socket(ws) # Stream phase changes status = None while status != ExperimentLifeCycle.RUNNING and not ExperimentLifeCycle.is_done( status): experiment.refresh_from_db() if status != experiment.last_status: status = experiment.last_status await notify_ws(ws=ws, message=get_status_message(status)) if should_disconnect(ws=ws, ws_manager=ws_manager): return await asyncio.sleep(SOCKET_SLEEP) if ExperimentLifeCycle.is_done(status): await notify_ws(ws=ws, message=get_status_message(status)) return config.load_incluster_config() k8s_api = client.CoreV1Api() log_requests = [] for job in experiment.jobs.all(): pod_id = job.pod_id log_requests.append( log_job_pod(k8s_api=k8s_api, ws=ws, ws_manager=ws_manager, pod_id=pod_id, container=container, namespace=namespace, task_type=job.role, task_idx=job.sequence)) await asyncio.wait(log_requests)
def main(): loop = asyncio.get_event_loop() # loop.run_until_complete(config.load_kube_config()) config.load_incluster_config() # Start Prometheus HTTP Endpoint for exposing metrics start_http_server(8000) # Build tasks list of enabled resources tasks = [] for resource in tasklist: # possibles = globals().copy() # possibles.update(locals()) # method = possibles.get(resource) # tasks.append(asyncio.ensure_future(method())) task = asyncio.ensure_future(globals()[resource]()) tasks.append(task) # Start the loop loop.run_until_complete(asyncio.wait(tasks)) loop.close()
async def main(): parser = argparse.ArgumentParser() parser.add_argument('timeout_seconds', type=int) parser.add_argument('namespace', type=str) subparsers = parser.add_subparsers(dest='kind') pod_parser = subparsers.add_parser('Pod') pod_parser.add_argument('name', type=str) service_parser = subparsers.add_parser('Service') service_parser.add_argument('name', type=str) service_parser.add_argument('--port', '-p', type=int, default=80) service_parser.add_argument('--endpoint', '-e', type=str, default='/healthcheck') service_parser.add_argument('--header', action='append', type=str, nargs=2) args = parser.parse_args() if args.kind == 'Pod': if 'USE_KUBE_CONFIG' in os.environ: await config.load_kube_config() else: config.load_incluster_config() v1 = client.CoreV1Api() t = wait_for_pod_complete(v1, args.namespace, args.name) else: assert args.kind == 'Service' headers = None if args.header is None else { flag: val for flag, val in args.header } t = wait_for_service_alive(args.namespace, args.name, args.port, args.endpoint, headers) await asyncio.gather(timeout(args.timeout_seconds), t)
async def main(): while True: try: # it works only if this script is run by K8s as a POD config.load_incluster_config() v1 = client.CoreV1Api() print("Listing pods with their IPs:") ret = await v1.list_pod_for_all_namespaces() for i in ret.items: print(i.status.pod_ip, i.metadata.namespace, i.metadata.name) except Exception: traceback.print_exc(file=sys.stdout) finally: print("sleep 10s") await asyncio.sleep(10)
async def poll(): print('info: in poll', file=sys.stderr) if 'USE_KUBE_CONFIG' in os.environ: await config.load_kube_config() else: config.load_incluster_config() v1 = client.CoreV1Api() while True: try: try: pod = await v1.read_namespaced_pod( name, namespace, _request_timeout=5.0) if pod and pod.status and pod.status.container_statuses: container_statuses = pod.status.container_statuses if all(cs.state and cs.state.terminated for cs in container_statuses): if all(cs.state.terminated.exit_code == 0 for cs in container_statuses): print('info: success') sys.exit(0) else: print('error: a container failed') sys.exit(1) except client.rest.ApiException as exc: if exc.status == 404: print('info: 404', file=sys.stderr) pass else: raise except concurrent.futures.CancelledError: print('info: CancelledError', file=sys.stderr) raise except Exception as e: print(f'poll failed due to exception {traceback.format_exc()}{e}', file=sys.stderr) await asyncio.sleep(1)
import os import urllib.parse import aiobotocore from kubernetes_asyncio import client, config # ============================================================================ if os.environ.get("IN_CLUSTER"): print("Cluster Init") config.load_incluster_config() else: # loop = asyncio.get_event_loop() # loop.run_until_complete(main()) config.load_kube_config() DEFAULT_NAMESPACE = os.environ.get("BROWSER_NAMESPACE") or "browsers" # ============================================================================ class K8SManager: def __init__(self, namespace=DEFAULT_NAMESPACE): self.core_api = client.CoreV1Api() self.batch_api = client.BatchV1Api() self.namespace = namespace async def get_job(self, name): try: return await self.batch_api.read_namespaced_job( name=name, namespace=self.namespace) except Exception as exc: print(exc)
async def setup(self): # Register signal handlers loop = asyncio.get_event_loop() for s in (signal.SIGTERM, signal.SIGINT): loop.add_signal_handler(s, self.handle_shutdown_signal, s) # Rate limiter for k8s api calls self.rate_limiter = RateLimiter(rate=self.k8s_api_rate_limit, burst=self.k8s_api_rate_limit_burst) # Initialize the kubernetes clients try: config.load_incluster_config() except config.ConfigException: await config.load_kube_config() self.api_client = client.ApiClient() self.core_client = RateLimitedClient( client.CoreV1Api(api_client=self.api_client), self.rate_limiter) self.custom_client = RateLimitedClient( client.CustomObjectsApi(api_client=self.api_client), self.rate_limiter) # Local state self.cluster_info = collections.defaultdict(ClusterInfo) self.stopped_clusters = {} # Initialize queue and informers self.queue = WorkQueue( backoff=Backoff(base_delay=self.backoff_base_delay, max_delay=self.backoff_max_delay)) endpoints_selector = (self.label_selector + ",app.kubernetes.io/component=dask-scheduler") self.informers = { "cluster": Informer( parent=self, name="cluster", client=self.custom_client, method="list_cluster_custom_object", method_kwargs=dict( group="gateway.dask.org", version=self.crd_version, plural="daskclusters", label_selector=self.label_selector, ), on_update=self.on_cluster_update, on_delete=self.on_cluster_delete, ), "pod": Informer( parent=self, name="pod", client=self.core_client, method="list_pod_for_all_namespaces", method_kwargs=dict(label_selector=self.label_selector), on_update=self.on_pod_update, on_delete=self.on_pod_delete, ), "endpoints": Informer( parent=self, name="endpoints", client=self.core_client, method="list_endpoints_for_all_namespaces", method_kwargs=dict(label_selector=endpoints_selector), on_update=self.on_endpoints_update, on_delete=self.on_endpoints_delete, ), } await asyncio.wait([i.start() for i in self.informers.values()]) self.log.debug("All informers started") # Initialize reconcilers self.reconcilers = [ asyncio.ensure_future(self.reconciler_loop()) for _ in range(self.parallelism) ] # Start background tasks self.task_pool = TaskPool() self.task_pool.spawn(self.cleanup_expired_cluster_records_loop()) # Start the aiohttp application self.runner = web.AppRunner( self.app, handle_signals=False, access_log_class=AccessLogger, access_log=self.log, ) await self.runner.setup() host, port = self.address.split(":") port = int(port) site = web.TCPSite(self.runner, host, port, shutdown_timeout=15.0, backlog=128) await site.start() self.log.info("%s started!", self.name) self.log.info("API listening at http://%s", self.address)
async def run_inner(self, run_info): run_id = run_info['id'] del run_info # This does not run the experiment, it schedules a runner pod by # talking to the Kubernetes API. That pod will run the experiment and # update the database directly k8s_config.load_incluster_config() name = self._pod_name(run_id) # Load configuration from configmap volume with open(os.path.join(self.config_dir, 'runner.pod_spec')) as fp: pod_spec = yaml.safe_load(fp) with open(os.path.join(self.config_dir, 'runner.namespace')) as fp: namespace = fp.read().strip() # Make required changes for container in pod_spec['containers']: if container['name'] == 'runner': container['args'] += [str(run_id)] # This is mostly used by Tilt if os.environ.get('OVERRIDE_RUNNER_IMAGE'): container['image'] = os.environ['OVERRIDE_RUNNER_IMAGE'] async with k8s_client.ApiClient() as api: # Create a Kubernetes pod to run v1 = k8s_client.CoreV1Api(api) pod = k8s_client.V1Pod( api_version='v1', kind='Pod', metadata=k8s_client.V1ObjectMeta( name=name, labels={ 'app': 'run', 'run': str(run_id), }, ), spec=pod_spec, ) await v1.create_namespaced_pod( namespace=namespace, body=pod, ) logger.info("Pod created: %s", name) PROM_RUNS.inc() # Create a service for proxy connections svc = k8s_client.V1Service( api_version='v1', kind='Service', metadata=k8s_client.V1ObjectMeta( name=name, labels={ 'app': 'run', 'run': str(run_id), }, ), spec=k8s_client.V1ServiceSpec( selector={ 'app': 'run', 'run': str(run_id), }, ports=[ k8s_client.V1ServicePort( protocol='TCP', port=5597, ), ], ), ) await v1.create_namespaced_service( namespace=namespace, body=svc, ) logger.info("Service created: %s", name)
async def on_startup(app): if 'BATCH_USE_KUBE_CONFIG' in os.environ: await config.load_kube_config() else: config.load_incluster_config() app['k8s_client'] = client.CoreV1Api()