Ejemplo n.º 1
0
    def log_generator(self, container, timeout=300):
        pod = self._get_pod(container)

        self.taclib_log.info(f"Waiting for job {container.metadata.name} to run...")
        self._wait_for_status(container, "Running")
        w = Watch()

        self.taclib_log.info(f"Start watching {pod.metadata.name} logs")

        while True:
            try:
                for e in w.stream(
                    self._c.read_namespaced_pod_log,
                    name=pod.metadata.name,
                    namespace=self.namespace,
                    follow=True,
                    _request_timeout=config["request_log_timeout"].get(int),
                ):
                    yield e.encode()
            except ReadTimeoutError:
                self.taclib_log.info("Failed to read pod log - timeout error")
                job = self._get_job(container.metadata.name)
                status = self._get_pod(job).status.phase
                if status == "Running":
                    self.taclib_log.info(
                        "Pod is still running after failing to fetch logs"
                    )
                    self.taclib_log.info("Retrying to fetch pod logs")
                    continue
            break

        self.taclib_log.info(f"Stop watching {pod.metadata.name} logs")
Ejemplo n.º 2
0
 def watch_operator(self,
                    deployment_id: Optional[str] = None,
                    experiment_id: Optional[str] = None):
     GROUP = "argoproj.io"
     VERSION = "v1alpha1"
     PLURAL = "workflows"
     load_kube_config()
     api = client.CustomObjectsApi()
     while True:
         run_id = get_latest_run_id(experiment_id or deployment_id)
         if not run_id:
             yield "operator not running"
             time.sleep(5)
         else:
             resource_version = list_resource_version(
                 group=GROUP,
                 version=VERSION,
                 namespace=KF_PIPELINES_NAMESPACE,
                 plural=PLURAL,
             )
             w = Watch()
             stream = w.stream(
                 api.list_namespaced_custom_object,
                 group=GROUP,
                 version=VERSION,
                 namespace=KF_PIPELINES_NAMESPACE,
                 plural=PLURAL,
                 resource_version=resource_version,
                 label_selector=f"pipeline/runid={run_id}",
                 pretty="true",
             )
             for streamline in stream:
                 yield f"Event: {streamline['type']} {streamline['object']['metadata']['name']}"
def watch_logs(kube_client, pod_info):
    """Stream a single pod's logs

    pod logs are streamed directly to sys.stderr,
    so that pytest capture can deal with it.

    Blocking, should be run in a thread.

    Called for each new pod from watch_kubernetes
    """
    watch = Watch()
    while True:
        try:
            for event in watch.stream(
                func=kube_client.read_namespaced_pod_log,
                namespace=pod_info.namespace,
                name=pod_info.name,
            ):
                print(f"[{pod_info.name}]: {event}")
        except ApiException as e:
            if e.status == 400:
                # 400 can occur if the container is not yet ready
                # wait and retry
                time.sleep(1)
                continue
            elif e.status == 404:
                # pod is gone, we are done
                return
            else:
                # unexpeced error
                print(f"Error watching logs for {pod_info.name}: {e}", file=sys.stderr)
                raise
        else:
            break
def watch_kubernetes(kube_client, kube_ns):
    """Stream kubernetes events to stdout

    so that pytest io capturing can include k8s events and logs

    All events are streamed to stdout

    When a new pod is started, spawn an additional thread to watch its logs
    """
    log_threads = {}
    watch = Watch()
    for event in watch.stream(
        func=kube_client.list_namespaced_event,
        namespace=kube_ns,
    ):

        resource = event['object']
        obj = resource.involved_object
        print(f"k8s event ({event['type']} {obj.kind}/{obj.name}): {resource.message}")

        # new pod appeared, start streaming its logs
        if (
            obj.kind == "Pod"
            and event["type"] == "ADDED"
            and obj.name not in log_threads
        ):
            log_threads[obj.name] = t = Thread(
                target=watch_logs, args=(kube_client, obj), daemon=True
            )
            t.start()
Ejemplo n.º 5
0
def cluster_cr_stream() -> Iterator:
    w = Watch()
    return w.stream(custom_objects_api().list_namespaced_custom_object,
                    namespace=RAY_NAMESPACE,
                    group="cluster.ray.io",
                    version="v1",
                    plural="rayclusters")
Ejemplo n.º 6
0
def namespaced_cr_stream(namespace) -> Iterator:
    w = Watch()
    return w.stream(custom_objects_api().list_namespaced_custom_object,
                    namespace=namespace,
                    group=RAY_API_GROUP,
                    version=RAY_API_VERSION,
                    plural=RAYCLUSTER_PLURAL)
Ejemplo n.º 7
0
    def start(self):
        # Update all deployments to status 'Stopped'.
        for deployment in models.Deployment.query.all():
            deployment.status = 'Stopped'
            deployment.on_status_changed()
        db.session.commit()

        # Create a watcher for kube events. Sync deployment status on each event.
        watch = Watch()
        for event in watch.stream(Apps.API.list_namespaced_deployment,
                                  'default',
                                  watch=True):
            print('[DEBUG]', event['type'], event['object'].metadata.name,
                  event['object'].status.available_replicas)

            deployment = models.Deployment.query.get_by_ref(
                event['object'].metadata.name)

            if event['type'] == 'DELETED':
                deployment.status = 'Stopped'
            elif not event['object'].status.available_replicas:
                deployment.status = 'Creating'
            elif event['type'] == 'MODIFIED':
                deployment.status = 'Created'
            else:
                deployment.status = 'Running'

            db.session.commit()

            deployment.on_status_changed()
Ejemplo n.º 8
0
def cluster_scoped_cr_stream() -> Iterator:
    w = Watch()
    return w.stream(
        custom_objects_api().list_cluster_custom_object,
        group=RAY_API_GROUP,
        version=RAY_API_VERSION,
        plural=RAYCLUSTER_PLURAL)
Ejemplo n.º 9
0
    def log_stream(self, pod, container):
        """
        Generates log stream of given pod's container.


        Whenever the event source is called, there's a new thread for each pod that listen for new logs and
        there's a thread that watches for new pods being created. But there's a limitation within the log generation.
        When the client disconnects from the event source, the allocated threads aren't deallocated, not releasing the memory and process used.

        Parameters
        ----------
            pod: str
            container: str

        Yields
        ------
            str
        """
        load_kube_config()
        v1 = client.CoreV1Api()
        w = Watch()
        pod_name = pod.metadata.name
        namespace = pod.metadata.namespace
        container_name = container.name
        try:
            for streamline in w.stream(
                v1.read_namespaced_pod_log,
                name=pod_name,
                namespace=namespace,
                container=container_name,
                pretty="true",
                tail_lines=0,
                timestamps=True,
            ):
                self.queue.put_nowait(streamline)

        except RuntimeError as e:
            logging.exception(e)
            return

        except asyncio.CancelledError as e:
            logging.exception(e)
            return

        except ApiException as e:
            """
            Expected behavior when trying to connect to a container that isn't ready yet.
            """
            logging.exception(e)

        except CancelledError as e:
            """
            Expected behavior when trying to cancel task
            """
            logging.exception(e)
            return
Ejemplo n.º 10
0
 def wait_pod_event(self, name, cond):
     w = Watch()
     for event in w.stream(self.core_api.list_namespaced_pod,
                           self.namespace,
                           timeout_seconds=120):
         object = event['object']
         etype = event['type']
         if object.metadata.name != name: continue
         if cond(etype, object):
             w.stop()
Ejemplo n.º 11
0
def tail_one(
    api_v1: client.CoreV1Api, pod_name: str, namespace: str, tail_lines: int
) -> NoReturn:
    watch = Watch()
    color_name = io.capture(f"[dim white]{pod_name}[/]", end="")
    for line in watch.stream(
        api_v1.read_namespaced_pod_log,
        pod_name,
        namespace,
        tail_lines=tail_lines,
    ):
        print(f"{color_name}  {line}")
Ejemplo n.º 12
0
def _thread_tail(
    queue: Queue,
    api_v1: client.CoreV1Api,
    pod_name: str,
    pad_width: int,
    namespace: str,
) -> NoReturn:
    watch = Watch()
    padded_name = ("{:<" + str(pad_width) + "}").format(pod_name)
    left_col = io.capture(f"[dim white]{padded_name}[/]", end="")
    for line in watch.stream(
        api_v1.read_namespaced_pod_log, pod_name, namespace, tail_lines=0
    ):
        queue.put(left_col + line)
Ejemplo n.º 13
0
Archivo: pod.py Proyecto: keichi/kbench
def wait_for_startup(v1, pods):
    pending = set(pods.keys())
    watch = Watch()

    for event in watch.stream(v1.list_namespaced_pod, namespace=NAMESPACE):
        pod = event["object"]
        name = pod.metadata.name

        if name in pending and pod.status.phase == "Running":
            pods[name].started_at = time.monotonic()
            logger.trace("Pod {} started in {:.3f} [s]", name,
                         pods[name].started_at - pods[name].created_at)

            pending.remove(name)

            if not pending:
                return
Ejemplo n.º 14
0
 def watch_job(self, job_name, namespace):
     """
     监控job是否完成 bool
     """
     watcher = Watch()
     try:
         for event in watcher.stream(self.batch.list_namespaced_job,
                                     namespace=namespace,
                                     label_selector=f'job-name={job_name}'):
             succeed = event['object'].status.succeeded
             active = event['object'].status.active
             if succeed == 1 and active == None:
                 watcher.stop()
                 return True
     except Exception as e:
         print(e)
         return False
Ejemplo n.º 15
0
def wait_for_deployment_rescale(v1, name, target_replicas):
    watch = Watch()
    for event in watch.stream(v1.list_namespaced_deployment,
                              namespace=NAMESPACE):
        deployment = event["object"]

        if deployment.metadata.name != name:
            continue

        ready_replicas = deployment.status.ready_replicas

        if ready_replicas is None:
            ready_replicas = 0

        logger.trace("Deployment {} has {} replicas", name, ready_replicas)

        if ready_replicas == target_replicas:
            return
Ejemplo n.º 16
0
Archivo: pod.py Proyecto: keichi/kbench
def wait_for_cleanup(v1, pods):
    pending = set(pods)
    watch = Watch()

    for event in watch.stream(v1.list_namespaced_pod, namespace=NAMESPACE):
        type = event["type"]
        pod = event["object"]
        name = pod.metadata.name

        if name in pending and type == "DELETED":
            pods[name].exited_at = time.monotonic()
            logger.trace("Pod {} exited in {:.3f} [s]", name,
                         pods[name].exited_at - pods[name].deleted_at)

            pending.remove(name)

            if not pending:
                return
Ejemplo n.º 17
0
 def execute(self, context):
     # initialize config
     try:
         config = kube_config.load_incluster_config()
     except:
         config = kube_config.load_kube_config()
     # create an instance of the API class
     api_instance = CustomObjectsApi(ApiClient(config))
     # params to create custom object
     params = [self.group, self.version, self.namespace, self.plural]
     crd_created = self.create_custom_definition(api_instance, *params)
     if crd_created:
         w = Watch()
         for event in w.stream(api_instance.list_namespaced_custom_object, *params, timeout_seconds=self.timeout):
             job_name = event.get('object', {}).get('metadata', {}).get('name')
             job_state = event.get('object', {}).get('status', {}).get('applicationState', {}).get('state')
             if job_name == self.job_name and job_state == "COMPLETED":
                 break
Ejemplo n.º 18
0
 def watch_deployment_pods(self, deployment_id):
     load_kube_config()
     v1 = client.CoreV1Api()
     w = Watch()
     try:
         for pod in w.stream(
             v1.list_namespaced_pod,
             namespace=KF_PIPELINES_NAMESPACE,
             label_selector=f"seldon-deployment-id={deployment_id}",
         ):
             if pod["type"] == "ADDED":
                 pod = pod["object"]
                 for container in pod.spec.containers:
                     if container.name not in EXCLUDE_CONTAINERS:
                         self.loop.run_in_executor(self.pool, self.log_stream, pod, container)
     except CancelledError:
         """
         Expected behavior when trying to cancel task
         """
         w.stop()
         return
Ejemplo n.º 19
0
 def watch_workflow_pods(self, experiment_id: str):
     # Bug conhecido:
     # Um pod que foi encontrado pelo worker de pods pode não ser encontrado pelo worker de logs no caso de experimentos
     load_kube_config()
     v1 = client.CoreV1Api()
     w = Watch()
     try:
         for pod in w.stream(v1.list_namespaced_pod,
                             namespace=KF_PIPELINES_NAMESPACE,
                             label_selector=f"experiment-id={experiment_id}"):
             if pod["type"] == "ADDED":
                 pod = pod["object"]
                 for container in pod.spec.containers:
                     if container.name not in EXCLUDE_CONTAINERS and "name" in pod.metadata.annotations:
                         self.loop.run_in_executor(self.pool, self.log_stream, pod, container)
     except CancelledError:
         """
         Expected behavior when trying to cancel task
         """
         w.stop()
         return
    def streamEvents(self) -> None:
        """
        Watches for changes to the mongo objects in Kubernetes and processes any changes immediately.
        """
        event_watcher = Watch()

        # start watching from the latest version that we have
        if self.cluster_versions:
            event_watcher.resource_version = max(
                self.cluster_versions.values())

        for event in event_watcher.stream(
                self.kubernetes_service.listMongoObjects,
                _request_timeout=self.STREAM_REQUEST_TIMEOUT):
            logging.info("Received event %s", event)

            if event["type"] in ("ADDED", "MODIFIED"):
                cluster_object = self._parseConfiguration(event["object"])
                if cluster_object:
                    self.checkCluster(cluster_object)
                else:
                    logging.warning(
                        "Could not validate cluster object, stopping event watcher."
                    )
                    event_watcher.stop = True
            elif event["type"] in ("DELETED", ):
                self.collectGarbage()

            else:
                logging.warning(
                    "Could not parse event, stopping event watcher.")
                event_watcher.stop = True

            # Change the resource version manually because of a bug fixed in a later version of the K8s client:
            # https://github.com/kubernetes-client/python-base/pull/64
            if isinstance(event.get('object'),
                          dict) and 'resourceVersion' in event['object'].get(
                              'metadata', {}):
                event_watcher.resource_version = event['object']['metadata'][
                    'resourceVersion']
Ejemplo n.º 21
0
def tail_module_log(
        layer: "Layer",
        module_name: str,
        since_seconds: Optional[int] = None,
        earliest_pod_start_time: Optional[datetime.datetime] = None,
        start_color_idx: int = 15,  # White Color
) -> None:
    current_pods_monitored: Set[str] = set()
    load_opta_kube_config()
    v1 = CoreV1Api()
    watch = Watch()
    count = 0
    """Using the UTC Time stamp as the Kubernetes uses the UTC Timestamps."""
    for event in watch.stream(
            v1.list_namespaced_pod,
            namespace=layer.name,
            label_selector=
            f"app.kubernetes.io/instance={layer.name}-{module_name}",
    ):
        pod: V1Pod = event["object"]
        color_idx = count % (256 - start_color_idx) + start_color_idx
        if color_idx in REDS:
            count += 1
            color_idx = count % (256 - start_color_idx) + start_color_idx
        if (earliest_pod_start_time is not None
                and pod.metadata.creation_timestamp < earliest_pod_start_time):
            continue

        if pod.metadata.name not in current_pods_monitored:
            current_pods_monitored.add(pod.metadata.name)
            new_thread = Thread(
                target=tail_pod_log,
                args=(layer.name, pod, color_idx, since_seconds),
                daemon=True,
            )
            new_thread.start()
            count += 1
Ejemplo n.º 22
0
def tail_pod_log(namespace: str, pod: V1Pod, color_idx: int,
                 seconds: Optional[int]) -> None:
    v1 = CoreV1Api()
    watch = Watch()
    print(
        f"{fg(color_idx)}Showing the logs for server {pod.metadata.name} of your service{attr(0)}"
    )
    retry_count = 0
    while True:
        try:
            for logline in watch.stream(
                    v1.read_namespaced_pod_log,
                    name=pod.metadata.name,
                    namespace=namespace,
                    container="k8s-service",
                    since_seconds=seconds,
            ):
                print(f"{fg(color_idx)}{pod.metadata.name} {logline}{attr(0)}")
        except Exception as e:
            if type(e) == ApiException:
                if e.status == 404:  # type: ignore
                    print(
                        f"{fg(color_idx)}Server {pod.metadata.name} has been terminated{attr(0)}"
                    )
                    return

            if retry_count < 15:
                print(
                    f"{fg(color_idx)}Couldn't get logs, waiting a bit and retrying{attr(0)}"
                )
                time.sleep(retry_count)
                retry_count += 1
            else:
                logger.error(
                    f"Got the following error while trying to fetch the logs for pod {pod.metadata.name} in namespace {namespace}: {e}"
                )
                return
Ejemplo n.º 23
0
    def delete_all(self):
        # We must pass a new default API client to avoid urllib conn pool warnings
        start_time = datetime.now()
        print("Deleting items")
        for uid in self.resources["pods"]:
            print("  - Pod %s:%s" % uid)
            try:
                res = self.core_api.delete_namespaced_pod(
                    namespace=uid[0], name=uid[1], body=V1DeleteOptions())
            except:
                print("    (issue cleaning up, ignored)")

        for uid in self.resources["services"]:
            print("  - Service %s:%s" % uid)
            try:
                res = self.core_api.delete_namespaced_service(namespace=uid[0],
                                                              name=uid[1])
            except:
                print("    (issue cleaning up, ignored)")

        for uid in self.resources["ingress"]:
            print("  - Ingress %s:%s" % uid)
            try:
                res = self.beta1_api.delete_namespaced_ingress(
                    namespace=uid[0], name=uid[1], body=V1DeleteOptions())
            except:
                print("    (issue cleaning up, ignored)")
        self.resources["ingress"] = {}

        for uid in self.resources["config_maps"]:
            print("  - ConfigMap %s:%s" % uid)
            try:
                res = self.core_api.delete_namespaced_config_map(
                    namespace=uid[0], name=uid[1], body=V1DeleteOptions())
            except:
                print("    (issue cleaning up, ignored)")
        self.resources["config_maps"] = {}

        for uid in self.resources["role_bindings"]:
            print("  - RoleBinding %s:%s" % uid)
            try:
                res = self.rbac_api.delete_namespaced_role_binding(
                    namespace=uid[0], name=uid[1], body=V1DeleteOptions())
            except:
                print("    (issue cleaning up, ignored)")
        self.resources["role_bindings"] = {}

        for uid in self.resources["roles"]:
            print("  - Role %s:%s" % uid)
            try:
                res = self.rbac_api.delete_namespaced_role(
                    namespace=uid[0], name=uid[1], body=V1DeleteOptions())
            except:
                print("    (issue cleaning up, ignored)")
        self.resources["roles"] = {}

        for uid in self.resources["service_accounts"]:
            print("  - ServiceAccount %s:%s" % uid)
            try:
                res = self.rbac_api.delete_namespaced_service_account(
                    namespace=uid[0], name=uid[1], body=V1DeleteOptions())
            except:
                print("    (issue cleaning up, ignored)")
        self.resources["service_accounts"] = {}

        # Not checking for possibly deleted pods, pods take a while to
        # delete and they will not be listed anymore

        print("Waiting for pod and service deletion")
        #print("Waiting for pods to be deleted: %s" % ', '.join(["%s:%s" % uid for uid in self.resources["pods"]]))
        while self.resources["pods"]:
            current_pods = [(i.metadata.namespace, i.metadata.name)
                            for i in self.core_api.list_namespaced_pod(
                                self.namespace).items]
            #print("Current pods: %s" % ', '.join(["%s:%s" % uid for uid in current_pods]))
            deleted_pods = [
                uid for uid in self.resources["pods"]
                if uid not in current_pods
            ]
            #print("Deleted pods: %s" % ', '.join(["%s:%s" % uid for uid in deleted_pods]))
            for uid in deleted_pods:
                print("  - Pod %s:%s*" % uid)
                del self.resources["pods"][uid]
            if not self.resources["pods"]: break

            #print("Remaining: %s" % ', '.join(["%s:%s" % uid for uid in self.resources["pods"]]))
            w = Watch()
            for event in w.stream(self.core_api.list_namespaced_pod,
                                  self.namespace,
                                  timeout_seconds=30):
                object = event['object']
                etype = event['type']
                uid = (object.metadata.namespace, object.metadata.name)
                if etype == "DELETED" and uid in self.resources["pods"]:
                    print("  - Pod %s:%s" % uid)
                    del self.resources["pods"][uid]
                    if not self.resources["pods"]: w.stop()
        #print("Done deleting pods")

        #print("Waiting for services to be deleted: %s" % ', '.join(["%s:%s" % uid for uid in self.resources["services"]]))
        while self.resources["services"]:
            current_services = [(i.metadata.namespace, i.metadata.name)
                                for i in self.core_api.list_namespaced_service(
                                    self.namespace).items]
            #print("Current services: %s" % ', '.join(["%s:%s" % uid for uid in current_services]))
            deleted_services = [
                uid for uid in self.resources["services"]
                if uid not in current_services
            ]
            #print("Deleted services: %s" % ', '.join(["%s:%s" % uid for uid in deleted_services]))
            for uid in deleted_services:
                print("  - Service %s:%s*" % uid)
                del self.resources["services"][uid]
            if not self.resources["services"]: break

            # There is a short gap here that could trigger a race condition
            # but there seems to be no "query and keep watching" API that could
            # prevent that.

            #print("Remaining: %s" % ', '.join(["%s:%s" % uid for uid in self.resources["services"]]))
            w = Watch()
            for event in w.stream(self.core_api.list_namespaced_service,
                                  self.namespace,
                                  timeout_seconds=30):
                object = event['object']
                etype = event['type']
                uid = (object.metadata.namespace, object.metadata.name)
                if etype == "DELETED" and uid in self.resources["services"]:
                    print("  - Service %s:%s" % uid)
                    del self.resources["services"][uid]
                    if not self.resources["services"]: w.stop()
        #print("Done deleting services")

        all_deleted_time = datetime.now()
        print("All items deleted (deletion took %s)" %
              str(all_deleted_time - start_time))
Ejemplo n.º 24
0
    def monitor_pods(self):
        # Wrap watch in outer loop, it might get interrupted before we
        # are finished looking
        printed_all_up = False
        start_time = datetime.now()
        while self.resources["pods"]:
            try:
                w = Watch()
                for event in w.stream(self.core_api.list_namespaced_pod,
                                      self.namespace):
                    object = event['object']
                    etype = event['type']
                    uid = (object.metadata.namespace, object.metadata.name)
                    if uid in self.resources["pods"]:
                        if etype == "MODIFIED":

                            #print("************************************\n%s %s\n%s" \
                            #      % (etype, object.metadata.name, object))

                            ready = 0
                            total = len(object.spec.containers)
                            pod_name_ip = "n/a"
                            status = object.status.phase
                            if object.status.reason is not None:
                                status = object.status.reason
                            if object.spec.node_name and object.spec.node_name != "":
                                pod_name_ip = object.spec.node_name
                            if object.status.pod_ip and object.status.pod_ip != "":
                                pod_name_ip += "/" + object.status.pod_ip

                            initializing = False

                            # On Kubernetes 1.5, get init container status out of the annotation manually
                            if not object.status.init_container_statuses \
                               and object.metadata.annotations \
                               and "pod.alpha.kubernetes.io/init-container-statuses" in object.metadata.annotations:
                                jp = json.loads(object.metadata.annotations[
                                    "pod.alpha.kubernetes.io/init-containers"])
                                js = json.loads(object.metadata.annotations[
                                    "pod.alpha.kubernetes.io/init-container-statuses"]
                                                )
                                a = ApiClient()
                                object.spec.init_containers = \
                                 a._ApiClient__deserialize(jp, "list[V1Container]")
                                object.status.init_container_statuses = \
                                 a._ApiClient__deserialize(js, "list[V1ContainerStatus]")

                            if object.status.init_container_statuses is not None:
                                for i, cs in enumerate(
                                        object.status.init_container_statuses):
                                    if cs.state.terminated and cs.state.terminated.exit_code == 0:
                                        continue
                                    elif cs.state.terminated:
                                        if len(cs.state.terminated.reason
                                               ) == 0:
                                            if cs.state.terminated.signal != 0:
                                                status = "Init:Signal:%d" % cs.state.terminated.signal
                                            else:
                                                status = "Init:ExitCode:%d" % cs.state.terminated.exit_code
                                        else:
                                            status = "Init:" + cs.state.terminated.reason
                                        initializing = True
                                    elif cs.state.waiting and len(cs.state.waiting.reason) > 0 \
                                      and cs.state.waiting.reason != "PodInitializing":
                                        status = "Init:" + cs.state.waiting.reason
                                        initializing = True
                                    else:
                                        status = "Init:%d/%d" % (
                                            i, len(
                                                object.spec.init_containers))
                                        initializing = True
                                    break

                            if not initializing and object.status.container_statuses is not None:
                                for cs in object.status.container_statuses:
                                    if cs.ready: ready += 1
                                    if cs.state.waiting and cs.state.waiting.reason != "":
                                        status = cs.state.waiting.reason
                                    elif cs.state.terminated and cs.state.terminated.reason != "":
                                        status = cs.state.terminated.reason
                                    elif cs.state.terminated and cs.state.terminated.reason == "":
                                        if cs.state.terminated.signal != 0:
                                            status = "Signal:%d" % cs.state.terminated.signal
                                        else:
                                            statis = "ExitCode:%d" % cs.state.terminated.exit_code

                            print(" - %-24s %-18s %d/%d  %s" \
                               % (object.metadata.name, status, ready, total, pod_name_ip))

                            self.resources["pods"][uid][
                                "phase"] = object.status.phase
                            self.resources["pods"][uid]["status"] = status
                            self.resources["pods"][uid]["ready"] = ready
                            self.resources["pods"][uid]["total"] = total
                            if ((object.status.phase == "Succeeded"
                                 or object.status.phase == "Failed")
                                    and object.metadata.deletion_timestamp
                                    == None):

                                if object.status.phase == "Failed":
                                    return False

                                #print("Pod %s/%s is finished" % (object.metadata.namespace, object.metadata.name))
                                #self.delete_all()

                            if object.status.container_statuses is not None:
                                for c in filter(
                                        lambda c: c.state.terminated,
                                        object.status.container_statuses):

                                    # If any container failed, assume overall failure
                                    if c.state.terminated.exit_code != 0:
                                        print(
                                            "Container '%s' of pod '%s:%s' failed"
                                            % (c.name, uid[0], uid[1]))
                                        return False

                                    # If a sufficient container completed, assume overall completion
                                    elif c.name in self.resources["pods"][uid][
                                            "sufficient_containers"]:
                                        print(
                                            "Container '%s' of pod '%s:%s' succeeded, finishing"
                                            % (c.name, uid[0], uid[1]))
                                        return True

                        if etype == "DELETED":
                            print("Pod %s/%s has been deleted" %
                                  (object.metadata.namespace,
                                   object.metadata.name))
                            del self.resources["pods"][uid]
                            if not self.resources["pods"]:
                                w.stop()
                                print("Done watching events")

                    if not printed_all_up:
                        all_up = True
                        for k, p in self.resources["pods"].items():
                            if p["status"] != "Running":
                                all_up = False
                            if p["ready"] != p["total"]:
                                all_up = False
                        if all_up:
                            printed_all_up = True
                            all_up_time = datetime.now()
                            print("All pods up and running (setup took %s)" %
                                  str(all_up_time - start_time))

            except Exception as e:
                if str(e) != "TERM":
                    print("Exception while monitoring pods")
                    print(traceback.format_exc())
                return False

        return True
Ejemplo n.º 25
0
def tail_namespace_events(
        layer: "Layer",
        earliest_event_start_time: Optional[datetime.datetime] = None,
        color_idx: int = 15,  # White Color
) -> None:
    load_opta_kube_config()
    v1 = EventsV1Api()
    watch = Watch()
    print(f"{fg(color_idx)}Showing events for namespace {layer.name}{attr(0)}")
    retry_count = 0
    old_events: List[EventsV1Event] = v1.list_namespaced_event(
        namespace=layer.name).items
    # Filter by time
    if earliest_event_start_time is not None:
        # Redefine so mypy doesn't complain about earliest_event_start_time being Optional during lambda call
        filter_start_time = earliest_event_start_time

        old_events = list(
            filter(
                lambda x: _event_last_observed(x) > filter_start_time,
                old_events,
            ))
    # Sort by timestamp
    old_events = sorted(old_events, key=lambda x: _event_last_observed(x))
    event: EventsV1Event
    for event in old_events:
        if do_not_show_event(event):
            continue
        earliest_event_start_time = _event_last_observed(event)
        print(
            f"{fg(color_idx)}{earliest_event_start_time} Namespace {layer.name} event: {event.note}{attr(0)}"
        )
    deleted_pods = set()
    while True:
        try:
            for stream_obj in watch.stream(
                    v1.list_namespaced_event,
                    namespace=layer.name,
            ):
                event = stream_obj["object"]
                event_time = _event_last_observed(event)
                if (earliest_event_start_time is None
                        or event_time > earliest_event_start_time):
                    if "Deleted pod:" in event.note:
                        deleted_pods.add(event.note.split(" ")[-1])
                    involved_object: Optional[
                        V1ObjectReference] = event.regarding
                    if (involved_object is not None
                            and involved_object.kind == "Pod"
                            and involved_object.name in deleted_pods):
                        continue
                    if do_not_show_event(event):
                        continue
                    print(
                        f"{fg(color_idx)}{event_time} Namespace {layer.name} event: {event.note}{attr(0)}"
                    )
        except ApiException as e:
            if retry_count < 5:
                print(
                    f"{fg(color_idx)}Couldn't get logs, waiting a bit and retrying{attr(0)}"
                )
                time.sleep(1 << retry_count)
                retry_count += 1
            else:
                logger.error(
                    f"{fg(color_idx)}Got the following error while trying to fetch the events in namespace {layer.name}: {e}"
                )
                return
        except Exception as e:
            # print(sys.exc_info()[2])
            logger.error(
                f"{fg(color_idx)}Got the following error while trying to fetch the events in namespace {layer.name}: {e}{attr(0)}"
            )
            logger.debug("Event watch exception", exc_info=True)
            return