Exemple #1
0
async def pod_info(
    pod: V1Pod,
    client: kubernetes_tools.KubeClient,
    num_tail_lines: int,
):
    container_statuses = pod.status.container_statuses or []
    pod_event_messages = await get_pod_event_messages(client, pod)
    containers = [
        dict(
            name=container.name,
            tail_lines=await get_tail_lines_for_kubernetes_container(
                client,
                pod,
                container,
                num_tail_lines,
            ),
        ) for container in container_statuses
    ]
    return {
        "name": pod.metadata.name,
        "host": kubernetes_tools.get_pod_hostname(client, pod),
        "deployed_timestamp": pod.metadata.creation_timestamp.timestamp(),
        "phase": pod.status.phase,
        "ready": kubernetes_tools.is_pod_ready(pod),
        "containers": containers,
        "reason": pod.status.reason,
        "message": pod.status.message,
        "events": pod_event_messages,
        "git_sha": pod.metadata.labels.get("paasta.yelp.com/git_sha"),
        "config_sha": pod.metadata.labels.get("paasta.yelp.com/config_sha"),
    }
Exemple #2
0
def get_pod_status(
    pod: V1Pod,
    backends: Optional[Set[str]],
) -> Dict[str, Any]:
    reason = pod.status.reason
    message = pod.status.message
    scheduled = kubernetes_tools.is_pod_scheduled(pod)
    ready = kubernetes_tools.is_pod_ready(pod)
    delete_timestamp = (pod.metadata.deletion_timestamp.timestamp()
                        if pod.metadata.deletion_timestamp else None)

    if not scheduled:
        sched_condition = kubernetes_tools.get_pod_condition(
            pod, "PodScheduled")
        reason = sched_condition.reason
        message = sched_condition.message

    if ready and backends is not None:
        # Replace readiness with whether or not it is actually registered in the mesh
        # TODO: Replace this once k8s readiness reflects mesh readiness, PAASTA-17266
        ready = pod.status.pod_ip in backends

    return {
        "name": pod.metadata.name,
        "ip": pod.status.pod_ip,
        "host": pod.status.host_ip,
        "phase": pod.status.phase,
        "reason": reason,
        "message": message,
        "scheduled": scheduled,
        "ready": ready,
        "containers": get_pod_containers(pod),
        "create_timestamp": pod.metadata.creation_timestamp.timestamp(),
        "delete_timestamp": delete_timestamp,
    }
Exemple #3
0
async def get_pod_status(
    pod: V1Pod,
    backends_task: "asyncio.Future[Dict[str, Any]]",
    client: Any,
    num_tail_lines: int,
) -> Dict[str, Any]:
    events_task = asyncio.create_task(
        get_pod_event_messages(client, pod, max_age_in_seconds=900)
    )
    containers_task = asyncio.create_task(
        get_pod_containers(pod, client, num_tail_lines)
    )

    await asyncio.gather(events_task, containers_task, return_exceptions=True)

    reason = pod.status.reason
    message = pod.status.message
    scheduled = kubernetes_tools.is_pod_scheduled(pod)
    ready = kubernetes_tools.is_pod_ready(pod)
    delete_timestamp = (
        pod.metadata.deletion_timestamp.timestamp()
        if pod.metadata.deletion_timestamp
        else None
    )

    try:
        # Filter events to only last 15m
        pod_event_messages = events_task.result()
    except asyncio.TimeoutError:
        pod_event_messages = [{"error": "Could not retrieve events. Please try again."}]

    if not scheduled and reason != "Evicted":
        sched_condition = kubernetes_tools.get_pod_condition(pod, "PodScheduled")
        # If the condition is not yet available (e.g. pod not fully created yet), defer to Status messages
        if sched_condition:
            reason = sched_condition.reason
            message = sched_condition.message

    mesh_ready = None
    if backends_task is not None:
        # TODO: Remove this once k8s readiness reflects mesh readiness, PAASTA-17266
        mesh_ready = pod.status.pod_ip in (await backends_task)

    return {
        "name": pod.metadata.name,
        "ip": pod.status.pod_ip,
        "host": pod.status.host_ip,
        "phase": pod.status.phase,
        "reason": reason,
        "message": message,
        "scheduled": scheduled,
        "ready": ready,
        "mesh_ready": mesh_ready,
        "containers": containers_task.result(),
        "create_timestamp": pod.metadata.creation_timestamp.timestamp(),
        "delete_timestamp": delete_timestamp,
        "events": pod_event_messages,
    }
def healthy_flink_containers_cnt(si_pods: Sequence[V1Pod],
                                 container_type: str) -> int:
    """Return count of healthy Flink containers with given type
    """
    return len([
        pod for pod in si_pods
        if pod.metadata.labels["flink-container-type"] == container_type
        and is_pod_ready(pod) and container_lifetime(pod).total_seconds() > 60
    ])
def get_cr_name(si_pods: Sequence[V1Pod]) -> str:
    """Returns the flink custom resource name based on the pod name.  We are randomly choosing jobmanager pod here.
    This change is related to FLINK-3129
    """
    jobmanager_pod = [
        pod for pod in si_pods
        if pod.metadata.labels["flink.yelp.com/container-type"] == "jobmanager"
        and is_pod_ready(pod) and container_lifetime(pod).total_seconds() > 60
    ]
    if len(jobmanager_pod) == 1:
        return jobmanager_pod[0].metadata.name.split("-jobmanager-")[0]
    else:
        return ""
Exemple #6
0
def get_version_for_controller_revision(
    cr: V1ControllerRevision,
    pods: Sequence[V1Pod],
    backends: Optional[Set[str]],
) -> KubernetesVersionDict:
    ready_pods = [pod for pod in pods if kubernetes_tools.is_pod_ready(pod)]
    return {
        "name": cr.metadata.name,
        "type": "ControllerRevision",
        "replicas": len(pods),
        "ready_replicas": len(ready_pods),
        "create_timestamp": cr.metadata.creation_timestamp.timestamp(),
        "git_sha": cr.metadata.labels.get("paasta.yelp.com/git_sha"),
        "config_sha": cr.metadata.labels.get("paasta.yelp.com/config_sha"),
        "pods": [get_pod_status(pod, backends) for pod in pods],
    }
Exemple #7
0
def check_healthy_kubernetes_tasks_for_service_instance(
    instance_config: KubernetesDeploymentConfig,
    expected_count: int,
    all_pods: Sequence[V1Pod],
) -> None:
    si_pods = filter_pods_by_service_instance(
        pod_list=all_pods,
        service=instance_config.service,
        instance=instance_config.instance,
    )
    num_healthy_tasks = len([pod for pod in si_pods if is_pod_ready(pod)])
    log.info(
        f"Checking {instance_config.service}.{instance_config.instance} in kubernetes as it is not in smartstack"
    )
    monitoring_tools.send_replication_event_if_under_replication(
        instance_config=instance_config,
        expected_count=expected_count,
        num_available=num_healthy_tasks,
    )
Exemple #8
0
async def get_pod_status_tasks_by_sha_and_readiness(
    pods_task: "asyncio.Future[V1Pod]",
    backends_task: "asyncio.Future[Dict[str, Any]]",
    client: kubernetes_tools.KubeClient,
    verbose: int,
) -> DefaultDict[
    Tuple[str, str], DefaultDict[bool, List["asyncio.Future[Dict[str, Any]]"]]
]:
    num_tail_lines = calculate_tail_lines(verbose)
    tasks_by_sha_and_readiness: DefaultDict[
        Tuple[str, str], DefaultDict[bool, List["asyncio.Future[Dict[str, Any]]"]]
    ] = defaultdict(lambda: defaultdict(list))
    for pod in await pods_task:
        git_sha = pod.metadata.labels["paasta.yelp.com/git_sha"]
        config_sha = pod.metadata.labels["paasta.yelp.com/config_sha"]
        is_ready = kubernetes_tools.is_pod_ready(pod)
        pod_status_task = asyncio.create_task(
            get_pod_status(pod, backends_task, client, num_tail_lines)
        )
        tasks_by_sha_and_readiness[(git_sha, config_sha)][is_ready].append(
            pod_status_task
        )

    return tasks_by_sha_and_readiness
Exemple #9
0
async def job_status(
    kstatus: MutableMapping[str, Any],
    client: kubernetes_tools.KubeClient,
    job_config: LongRunningServiceConfig,
    pod_list: Sequence[V1Pod],
    replicaset_list: Sequence[V1ReplicaSet],
    verbose: int,
    namespace: str,
) -> None:
    app_id = job_config.get_sanitised_deployment_name()
    kstatus["app_id"] = app_id
    kstatus["pods"] = []
    kstatus["replicasets"] = []
    if verbose > 0:
        num_tail_lines = calculate_tail_lines(verbose)

        for pod in pod_list:
            container_statuses = pod.status.container_statuses or []
            containers = [
                dict(
                    name=container.name,
                    tail_lines=await get_tail_lines_for_kubernetes_container(
                        client, pod, container, num_tail_lines,
                    ),
                )
                for container in container_statuses
            ]
            kstatus["pods"].append(
                {
                    "name": pod.metadata.name,
                    "host": kubernetes_tools.get_pod_hostname(client, pod),
                    "deployed_timestamp": pod.metadata.creation_timestamp.timestamp(),
                    "phase": pod.status.phase,
                    "ready": kubernetes_tools.is_pod_ready(pod),
                    "containers": containers,
                    "reason": pod.status.reason,
                    "message": pod.status.message,
                }
            )
        for replicaset in replicaset_list:
            try:
                ready_replicas = replicaset.status.ready_replicas
                if ready_replicas is None:
                    ready_replicas = 0
            except AttributeError:
                ready_replicas = 0

            kstatus["replicasets"].append(
                {
                    "name": replicaset.metadata.name,
                    "replicas": replicaset.spec.replicas,
                    "ready_replicas": ready_replicas,
                    "create_timestamp": replicaset.metadata.creation_timestamp.timestamp(),
                }
            )

    kstatus["expected_instance_count"] = job_config.get_instances()

    app = kubernetes_tools.get_kubernetes_app_by_name(
        name=app_id, kube_client=client, namespace=namespace
    )
    deploy_status = kubernetes_tools.get_kubernetes_app_deploy_status(
        app=app, desired_instances=job_config.get_instances()
    )
    kstatus["deploy_status"] = kubernetes_tools.KubernetesDeployStatus.tostring(
        deploy_status
    )
    kstatus["running_instance_count"] = (
        app.status.ready_replicas if app.status.ready_replicas else 0
    )
    kstatus["create_timestamp"] = app.metadata.creation_timestamp.timestamp()
    kstatus["namespace"] = app.metadata.namespace