def bounce_status( service: str, instance: str, settings: Any, ): status: Dict[str, Any] = {} job_config = kubernetes_tools.load_kubernetes_service_config( service=service, instance=instance, cluster=settings.cluster, soa_dir=settings.soa_dir, load_deployments=True, ) expected_instance_count = job_config.get_instances() status["expected_instance_count"] = expected_instance_count desired_state = job_config.get_desired_state() status["desired_state"] = desired_state kube_client = settings.kubernetes_client if kube_client is None: raise RuntimeError("Could not load Kubernetes client!") app = kubernetes_tools.get_kubernetes_app_by_name( name=job_config.get_sanitised_deployment_name(), kube_client=kube_client, namespace=job_config.get_kubernetes_namespace(), ) status["running_instance_count"] = (app.status.ready_replicas if app.status.ready_replicas else 0) deploy_status, message = kubernetes_tools.get_kubernetes_app_deploy_status( app=app, desired_instances=(expected_instance_count if desired_state != "stop" else 0), ) status["deploy_status"] = kubernetes_tools.KubernetesDeployStatus.tostring( deploy_status) if job_config.get_persistent_volumes(): version_objects = kubernetes_tools.controller_revisions_for_service_instance( service=job_config.service, instance=job_config.instance, kube_client=kube_client, namespace=job_config.get_kubernetes_namespace(), ) else: replicasets = kubernetes_tools.replicasets_for_service_instance( service=job_config.service, instance=job_config.instance, kube_client=kube_client, namespace=job_config.get_kubernetes_namespace(), ) version_objects = filter_actually_running_replicasets(replicasets) active_shas = kubernetes_tools.get_active_shas_for_service( [app, *version_objects], ) status["active_shas"] = list(active_shas) status["app_count"] = len(active_shas) return status
async def job_status( kstatus: MutableMapping[str, Any], client: kubernetes_tools.KubeClient, job_config: LongRunningServiceConfig, pod_list: Sequence[V1Pod], replicaset_list: Sequence[V1ReplicaSet], verbose: int, namespace: str, ) -> None: app_id = job_config.get_sanitised_deployment_name() kstatus["app_id"] = app_id kstatus["pods"] = [] kstatus["replicasets"] = [] if verbose > 0: num_tail_lines = calculate_tail_lines(verbose) kstatus["pods"] = await asyncio.gather( *[pod_info(pod, client, num_tail_lines) for pod in pod_list]) for replicaset in replicaset_list: try: ready_replicas = replicaset.status.ready_replicas if ready_replicas is None: ready_replicas = 0 except AttributeError: ready_replicas = 0 kstatus["replicasets"].append({ "name": replicaset.metadata.name, "replicas": replicaset.spec.replicas, "ready_replicas": ready_replicas, "create_timestamp": replicaset.metadata.creation_timestamp.timestamp(), }) kstatus["expected_instance_count"] = job_config.get_instances() app = kubernetes_tools.get_kubernetes_app_by_name(name=app_id, kube_client=client, namespace=namespace) desired_instances = (job_config.get_instances() if job_config.get_desired_state() != "stop" else 0) deploy_status, message = await kubernetes_tools.get_kubernetes_app_deploy_status( app=app, kube_client=client, desired_instances=desired_instances, ) kstatus[ "deploy_status"] = kubernetes_tools.KubernetesDeployStatus.tostring( deploy_status) kstatus["deploy_status_message"] = message kstatus["running_instance_count"] = (app.status.ready_replicas if app.status.ready_replicas else 0) kstatus["create_timestamp"] = app.metadata.creation_timestamp.timestamp() kstatus["namespace"] = app.metadata.namespace
def kubernetes_job_status( kstatus: MutableMapping[str, Any], client: kubernetes_tools.KubeClient, job_config: kubernetes_tools.KubernetesDeploymentConfig, pod_list: Sequence[V1Pod], replicaset_list: Sequence[V1ReplicaSet], verbose: int, ) -> None: app_id = job_config.get_sanitised_deployment_name() kstatus["app_id"] = app_id kstatus["pods"] = [] kstatus["replicasets"] = [] if verbose > 0: for pod in pod_list: kstatus["pods"].append({ "name": pod.metadata.name, "host": pod.spec.node_name, "deployed_timestamp": pod.metadata.creation_timestamp.timestamp(), "phase": pod.status.phase, }) for replicaset in replicaset_list: kstatus["replicasets"].append({ "name": replicaset.metadata.name, "replicas": replicaset.spec.replicas, "ready_replicas": replicaset.status.ready_replicas, "create_timestamp": replicaset.metadata.creation_timestamp.timestamp(), }) kstatus["expected_instance_count"] = job_config.get_instances() app = kubernetes_tools.get_kubernetes_app_by_name(app_id, client) deploy_status = kubernetes_tools.get_kubernetes_app_deploy_status( client, app, job_config.get_instances()) kstatus[ "deploy_status"] = kubernetes_tools.KubernetesDeployStatus.tostring( deploy_status) kstatus["running_instance_count"] = (app.status.ready_replicas if app.status.ready_replicas else 0) kstatus["create_timestamp"] = app.metadata.creation_timestamp.timestamp() kstatus["namespace"] = app.metadata.namespace
def kubernetes_job_status( kstatus: MutableMapping[str, Any], client: kubernetes_tools.KubeClient, job_config: kubernetes_tools.KubernetesDeploymentConfig, pod_list: Sequence[V1Pod], verbose: bool, ) -> None: app_id = job_config.get_sanitised_deployment_name() kstatus['app_id'] = app_id if verbose is True: kstatus['slaves'] = [ pod.spec.node_name for pod in pod_list ] kstatus['expected_instance_count'] = job_config.get_instances() app = kubernetes_tools.get_kubernetes_app_by_name(app_id, client) deploy_status = kubernetes_tools.get_kubernetes_app_deploy_status(client, app, job_config.get_instances()) kstatus['deploy_status'] = kubernetes_tools.KubernetesDeployStatus.tostring(deploy_status) kstatus['running_instance_count'] = app.status.ready_replicas if app.status.ready_replicas else 0
def kubernetes_status( service: str, instance: str, verbose: int, include_smartstack: bool, instance_type: str, settings: Any, ) -> Mapping[str, Any]: kstatus: Dict[str, Any] = {} config_loader = LONG_RUNNING_INSTANCE_TYPE_HANDLERS[instance_type].loader job_config = config_loader( service=service, instance=instance, cluster=settings.cluster, soa_dir=settings.soa_dir, load_deployments=True, ) kube_client = settings.kubernetes_client if kube_client is None: return kstatus app = kubernetes_tools.get_kubernetes_app_by_name( name=job_config.get_sanitised_deployment_name(), kube_client=kube_client, namespace=job_config.get_kubernetes_namespace(), ) # bouncing status can be inferred from app_count, ref get_bouncing_status pod_list = kubernetes_tools.pods_for_service_instance( service=job_config.service, instance=job_config.instance, kube_client=kube_client, namespace=job_config.get_kubernetes_namespace(), ) replicaset_list = kubernetes_tools.replicasets_for_service_instance( service=job_config.service, instance=job_config.instance, kube_client=kube_client, namespace=job_config.get_kubernetes_namespace(), ) active_shas = kubernetes_tools.get_active_shas_for_service( [app, *pod_list, *replicaset_list] ) kstatus["app_count"] = max( len(active_shas["config_sha"]), len(active_shas["git_sha"]) ) kstatus["desired_state"] = job_config.get_desired_state() kstatus["bounce_method"] = job_config.get_bounce_method() job_status( kstatus=kstatus, client=kube_client, namespace=job_config.get_kubernetes_namespace(), job_config=job_config, verbose=verbose, pod_list=pod_list, replicaset_list=replicaset_list, ) if job_config.is_autoscaling_enabled() is True: try: kstatus["autoscaling_status"] = autoscaling_status( kube_client, job_config, job_config.get_kubernetes_namespace() ) except ApiException as e: error_message = ( f"Error while reading autoscaling information: {e.getResponseBody()}" ) kstatus["error_message"].append(error_message) evicted_count = 0 for pod in pod_list: if pod.status.reason == "Evicted": evicted_count += 1 kstatus["evicted_count"] = evicted_count if include_smartstack: service_namespace_config = kubernetes_tools.load_service_namespace_config( service=job_config.get_service_name_smartstack(), namespace=job_config.get_nerve_namespace(), soa_dir=settings.soa_dir, ) if "proxy_port" in service_namespace_config: kstatus["smartstack"] = smartstack_status( service=job_config.get_service_name_smartstack(), instance=job_config.get_nerve_namespace(), job_config=job_config, service_namespace_config=service_namespace_config, pods=pod_list, should_return_individual_backends=verbose > 0, settings=settings, ) return kstatus
async def job_status( kstatus: MutableMapping[str, Any], client: kubernetes_tools.KubeClient, job_config: LongRunningServiceConfig, pod_list: Sequence[V1Pod], replicaset_list: Sequence[V1ReplicaSet], verbose: int, namespace: str, ) -> None: app_id = job_config.get_sanitised_deployment_name() kstatus["app_id"] = app_id kstatus["pods"] = [] kstatus["replicasets"] = [] if verbose > 0: num_tail_lines = calculate_tail_lines(verbose) for pod in pod_list: container_statuses = pod.status.container_statuses or [] containers = [ dict( name=container.name, tail_lines=await get_tail_lines_for_kubernetes_container( client, pod, container, num_tail_lines, ), ) for container in container_statuses ] kstatus["pods"].append( { "name": pod.metadata.name, "host": kubernetes_tools.get_pod_hostname(client, pod), "deployed_timestamp": pod.metadata.creation_timestamp.timestamp(), "phase": pod.status.phase, "ready": kubernetes_tools.is_pod_ready(pod), "containers": containers, "reason": pod.status.reason, "message": pod.status.message, } ) for replicaset in replicaset_list: try: ready_replicas = replicaset.status.ready_replicas if ready_replicas is None: ready_replicas = 0 except AttributeError: ready_replicas = 0 kstatus["replicasets"].append( { "name": replicaset.metadata.name, "replicas": replicaset.spec.replicas, "ready_replicas": ready_replicas, "create_timestamp": replicaset.metadata.creation_timestamp.timestamp(), } ) kstatus["expected_instance_count"] = job_config.get_instances() app = kubernetes_tools.get_kubernetes_app_by_name( name=app_id, kube_client=client, namespace=namespace ) deploy_status = kubernetes_tools.get_kubernetes_app_deploy_status( app=app, desired_instances=job_config.get_instances() ) kstatus["deploy_status"] = kubernetes_tools.KubernetesDeployStatus.tostring( deploy_status ) kstatus["running_instance_count"] = ( app.status.ready_replicas if app.status.ready_replicas else 0 ) kstatus["create_timestamp"] = app.metadata.creation_timestamp.timestamp() kstatus["namespace"] = app.metadata.namespace
def kubernetes_status( service: str, instance: str, verbose: int, include_smartstack: bool, include_envoy: bool, instance_type: str, settings: Any, ) -> Mapping[str, Any]: kstatus: Dict[str, Any] = {} config_loader = LONG_RUNNING_INSTANCE_TYPE_HANDLERS[instance_type].loader job_config = config_loader( service=service, instance=instance, cluster=settings.cluster, soa_dir=settings.soa_dir, load_deployments=True, ) kube_client = settings.kubernetes_client if kube_client is None: return kstatus app = kubernetes_tools.get_kubernetes_app_by_name( name=job_config.get_sanitised_deployment_name(), kube_client=kube_client, namespace=job_config.get_kubernetes_namespace(), ) # bouncing status can be inferred from app_count, ref get_bouncing_status pod_list = kubernetes_tools.pods_for_service_instance( service=job_config.service, instance=job_config.instance, kube_client=kube_client, namespace=job_config.get_kubernetes_namespace(), ) replicaset_list = kubernetes_tools.replicasets_for_service_instance( service=job_config.service, instance=job_config.instance, kube_client=kube_client, namespace=job_config.get_kubernetes_namespace(), ) # For the purpose of active_shas/app_count, don't count replicasets that are at 0/0. actually_running_replicasets = filter_actually_running_replicasets( replicaset_list) active_shas = kubernetes_tools.get_active_shas_for_service( [app, *pod_list, *actually_running_replicasets]) kstatus["app_count"] = len(active_shas) kstatus["desired_state"] = job_config.get_desired_state() kstatus["bounce_method"] = job_config.get_bounce_method() kstatus["active_shas"] = list(active_shas) job_status( kstatus=kstatus, client=kube_client, namespace=job_config.get_kubernetes_namespace(), job_config=job_config, verbose=verbose, pod_list=pod_list, replicaset_list=replicaset_list, ) if (job_config.is_autoscaling_enabled() is True and job_config.get_autoscaling_params().get( "decision_policy", "") != "bespoke" # type: ignore ): try: kstatus["autoscaling_status"] = autoscaling_status( kube_client, job_config, job_config.get_kubernetes_namespace()) except Exception as e: kstatus[ "error_message"] = f"Unknown error occurred while fetching autoscaling status. Please contact #compute-infra for help: {e}" evicted_count = 0 for pod in pod_list: if pod.status.reason == "Evicted": evicted_count += 1 kstatus["evicted_count"] = evicted_count if include_smartstack or include_envoy: service_namespace_config = kubernetes_tools.load_service_namespace_config( service=service, namespace=job_config.get_nerve_namespace(), soa_dir=settings.soa_dir, ) if "proxy_port" in service_namespace_config: if include_smartstack: kstatus["smartstack"] = mesh_status( service=service, service_mesh=ServiceMesh.SMARTSTACK, instance=job_config.get_nerve_namespace(), job_config=job_config, service_namespace_config=service_namespace_config, pods=pod_list, should_return_individual_backends=verbose > 0, settings=settings, ) if include_envoy: kstatus["envoy"] = mesh_status( service=service, service_mesh=ServiceMesh.ENVOY, instance=job_config.get_nerve_namespace(), job_config=job_config, service_namespace_config=service_namespace_config, pods=pod_list, should_return_individual_backends=verbose > 0, settings=settings, ) return kstatus