def perform_command(command, service, instance, cluster, verbose, soa_dir, app_id=None, delta=None): """Performs a start/stop/restart/status on an instance :param command: String of start, stop, restart, status :param service: service name :param instance: instance name, like "main" or "canary" :param cluster: cluster name :param verbose: int verbosity level :returns: A unix-style return code """ system_config = load_system_paasta_config() marathon_config = marathon_tools.load_marathon_config() job_config = marathon_tools.load_marathon_service_config(service, instance, cluster, soa_dir=soa_dir) if not app_id: try: app_id = job_config.format_marathon_app_dict()['id'] except NoDockerImageError: job_id = compose_job_id(service, instance) print "Docker image for %s not in deployments.json. Exiting. Has Jenkins deployed it?" % job_id return 1 normal_instance_count = job_config.get_instances() normal_smartstack_count = marathon_tools.get_expected_instance_count_for_namespace(service, instance, cluster) proxy_port = marathon_tools.get_proxy_port_for_instance(service, instance, cluster, soa_dir=soa_dir) client = marathon_tools.get_marathon_client(marathon_config.get_url(), marathon_config.get_username(), marathon_config.get_password()) if command == 'restart': restart_marathon_job(service, instance, app_id, client, cluster) elif command == 'status': print status_desired_state(service, instance, client, job_config) print status_marathon_job(service, instance, app_id, normal_instance_count, client) tasks, out = status_marathon_job_verbose(service, instance, client) if verbose > 0: print out print status_mesos_tasks(service, instance, normal_instance_count) if verbose > 0: tail_lines = calculate_tail_lines(verbose_level=verbose) print status_mesos_tasks_verbose( job_id=app_id, get_short_task_id=get_short_task_id, tail_lines=tail_lines, ) if proxy_port is not None: print status_smartstack_backends( service=service, instance=instance, cluster=cluster, job_config=job_config, tasks=tasks, expected_count=normal_smartstack_count, soa_dir=soa_dir, verbose=verbose > 0, synapse_port=system_config.get_synapse_port(), synapse_haproxy_url_format=system_config.get_synapse_haproxy_url_format(), ) else: # The command parser shouldn't have let us get this far... raise NotImplementedError("Command %s is not implemented!" % command) return 0
async def job_status( kstatus: MutableMapping[str, Any], client: kubernetes_tools.KubeClient, job_config: LongRunningServiceConfig, pod_list: Sequence[V1Pod], replicaset_list: Sequence[V1ReplicaSet], verbose: int, namespace: str, ) -> None: app_id = job_config.get_sanitised_deployment_name() kstatus["app_id"] = app_id kstatus["pods"] = [] kstatus["replicasets"] = [] if verbose > 0: num_tail_lines = calculate_tail_lines(verbose) kstatus["pods"] = await asyncio.gather( *[pod_info(pod, client, num_tail_lines) for pod in pod_list]) for replicaset in replicaset_list: try: ready_replicas = replicaset.status.ready_replicas if ready_replicas is None: ready_replicas = 0 except AttributeError: ready_replicas = 0 kstatus["replicasets"].append({ "name": replicaset.metadata.name, "replicas": replicaset.spec.replicas, "ready_replicas": ready_replicas, "create_timestamp": replicaset.metadata.creation_timestamp.timestamp(), }) kstatus["expected_instance_count"] = job_config.get_instances() app = kubernetes_tools.get_kubernetes_app_by_name(name=app_id, kube_client=client, namespace=namespace) desired_instances = (job_config.get_instances() if job_config.get_desired_state() != "stop" else 0) deploy_status, message = await kubernetes_tools.get_kubernetes_app_deploy_status( app=app, kube_client=client, desired_instances=desired_instances, ) kstatus[ "deploy_status"] = kubernetes_tools.KubernetesDeployStatus.tostring( deploy_status) kstatus["deploy_status_message"] = message kstatus["running_instance_count"] = (app.status.ready_replicas if app.status.ready_replicas else 0) kstatus["create_timestamp"] = app.metadata.creation_timestamp.timestamp() kstatus["namespace"] = app.metadata.namespace
async def marathon_mesos_status(service: str, instance: str, verbose: int) -> MutableMapping[str, Any]: mesos_status: MutableMapping[str, Any] = {} job_id = marathon_tools.format_job_id(service, instance) job_id_filter_string = f"{job_id}{marathon_tools.MESOS_TASK_SPACER}" try: running_and_active_tasks = select_tasks_by_id( await get_cached_list_of_running_tasks_from_frameworks(), job_id=job_id_filter_string, ) except (ReadTimeout, asyncio.TimeoutError): return { "error_message": "Talking to Mesos timed out. It may be overloaded." } mesos_status["running_task_count"] = len(running_and_active_tasks) if verbose > 0: num_tail_lines = calculate_tail_lines(verbose) running_task_dict_futures = [] for task in running_and_active_tasks: running_task_dict_futures.append( asyncio.ensure_future( get_mesos_running_task_dict(task, num_tail_lines))) non_running_tasks = select_tasks_by_id( await get_cached_list_of_not_running_tasks_from_frameworks(), job_id=job_id_filter_string, ) non_running_tasks.sort( key=lambda task: get_first_status_timestamp(task) or 0) non_running_tasks = list(reversed(non_running_tasks[-10:])) non_running_task_dict_futures = [] for task in non_running_tasks: non_running_task_dict_futures.append( asyncio.ensure_future( get_mesos_non_running_task_dict(task, num_tail_lines))) all_task_dict_futures = (running_task_dict_futures + non_running_task_dict_futures) if len(all_task_dict_futures): await asyncio.wait(all_task_dict_futures) mesos_status["running_tasks"] = [ task_future.result() for task_future in running_task_dict_futures ] mesos_status["non_running_tasks"] = [ task_future.result() for task_future in non_running_task_dict_futures ] return mesos_status
def format_chronos_job_status(client, job, running_tasks, verbose=0): """Given a job, returns a pretty-printed human readable output regarding the status of the job. :param job: dictionary of the job status :param running_tasks: a list of Mesos tasks associated with ``job``, e.g. the result of ``mesos_tools.get_running_tasks_from_frameworks()``. :param verbose: int verbosity level """ job_name = _format_job_name(job) is_temporary = chronos_tools.is_temporary_job( job) if 'name' in job else 'UNKNOWN' job_name = modify_string_for_rerun_status(job_name, is_temporary) disabled_state = _format_disabled_status(job) service, instance = chronos_tools.decompose_job_id(job['name']) chronos_state = chronos_tools.get_chronos_status_for_job( client, service, instance) (last_result, formatted_time) = _format_last_result(job) job_type = chronos_tools.get_job_type(job) schedule_type = _get_schedule_field_for_job_type(job_type) schedule_formatter = get_schedule_formatter(job_type, verbose) schedule_value = schedule_formatter(job) command = _format_command(job) mesos_status = _format_mesos_status(running_tasks) if verbose > 0: tail_lines = calculate_tail_lines(verbose_level=verbose) mesos_status_verbose = status_mesos_tasks_verbose( job_id=job["name"], get_short_task_id=get_short_task_id, tail_lines=tail_lines, ) mesos_status = "%s\n%s" % (mesos_status, mesos_status_verbose) return ("Job: %(job_name)s\n" " Status: %(disabled_state)s (%(chronos_state)s)" " Last: %(last_result)s (%(formatted_time)s)\n" " %(schedule_type)s: %(schedule_value)s\n" " Command: %(command)s\n" " Mesos: %(mesos_status)s" % { "job_name": job_name, "is_temporary": is_temporary, "schedule_type": schedule_type, "chronos_state": PaastaColors.grey(chronos_state), "disabled_state": disabled_state, "last_result": last_result, "formatted_time": formatted_time, "schedule_value": schedule_value, "command": command, "mesos_status": mesos_status, })
def format_chronos_job_status(client, job, running_tasks, verbose=0): """Given a job, returns a pretty-printed human readable output regarding the status of the job. :param job: dictionary of the job status :param running_tasks: a list of Mesos tasks associated with ``job``, e.g. the result of ``mesos_tools.get_running_tasks_from_active_frameworks()``. :param verbose: int verbosity level """ job_name = _format_job_name(job) is_temporary = chronos_tools.is_temporary_job(job) if 'name' in job else 'UNKNOWN' job_name = modify_string_for_rerun_status(job_name, is_temporary) disabled_state = _format_disabled_status(job) service, instance = chronos_tools.decompose_job_id(job['name']) chronos_state = chronos_tools.get_chronos_status_for_job(client, service, instance) (last_result, formatted_time) = _format_last_result(job) job_type = chronos_tools.get_job_type(job) schedule_type = _get_schedule_field_for_job_type(job_type) schedule_formatter = get_schedule_formatter(job_type, verbose) schedule_value = schedule_formatter(job) command = _format_command(job) mesos_status = _format_mesos_status(job, running_tasks) if verbose > 0: tail_lines = calculate_tail_lines(verbose_level=verbose) mesos_status_verbose = status_mesos_tasks_verbose( job_id=job["name"], get_short_task_id=get_short_task_id, tail_lines=tail_lines, ) mesos_status = "%s\n%s" % (mesos_status, mesos_status_verbose) return ( "Job: %(job_name)s\n" " Status: %(disabled_state)s (%(chronos_state)s)" " Last: %(last_result)s (%(formatted_time)s)\n" " %(schedule_type)s: %(schedule_value)s\n" " Command: %(command)s\n" " Mesos: %(mesos_status)s" % { "job_name": job_name, "is_temporary": is_temporary, "schedule_type": schedule_type, "chronos_state": PaastaColors.grey(chronos_state), "disabled_state": disabled_state, "last_result": last_result, "formatted_time": formatted_time, "schedule_value": schedule_value, "command": command, "mesos_status": mesos_status, } )
def perform_command(command, service, instance, cluster, verbose, soa_dir): tail_lines = calculate_tail_lines(verbose_level=verbose) # We have to add a spacer at the end to make sure we only return # things for service.main and not service.main_foo task_id_prefix = "{}{}".format(compose_job_id(service, instance), MESOS_TASK_SPACER) if command == 'status': paasta_print(status_mesos_tasks_verbose( job_id=task_id_prefix, get_short_task_id=lambda x: x, tail_lines=tail_lines, ))
def status_mesos_tasks( service: str, instance: str, normal_instance_count: int, verbose: int, ) -> str: job_id = marathon_tools.format_job_id(service, instance) # We have to add a spacer at the end to make sure we only return # things for service.main and not service.main_foo filter_string = f"{job_id}{marathon_tools.MESOS_TASK_SPACER}" try: count = len( select_tasks_by_id( a_sync.block(get_cached_list_of_running_tasks_from_frameworks), filter_string)) if count >= normal_instance_count: status = PaastaColors.green("Healthy") count_str = PaastaColors.green("(%d/%d)" % (count, normal_instance_count)) elif count == 0: status = PaastaColors.red("Critical") count_str = PaastaColors.red("(%d/%d)" % (count, normal_instance_count)) else: status = PaastaColors.yellow("Warning") count_str = PaastaColors.yellow("(%d/%d)" % (count, normal_instance_count)) running_string = PaastaColors.bold('TASK_RUNNING') output = f"Mesos: {status} - {count_str} tasks in the {running_string} state." except ReadTimeout: return "Error: talking to Mesos timed out. It may be overloaded." if verbose > 0: tail_lines = calculate_tail_lines(verbose_level=verbose) output += '\n' + status_mesos_tasks_verbose( filter_string=filter_string, get_short_task_id=get_short_task_id, tail_lines=tail_lines, ) return output
async def get_pod_status_tasks_by_replicaset( pods_task: "asyncio.Future[V1Pod]", backends_task: "asyncio.Future[Dict[str, Any]]", client: kubernetes_tools.KubeClient, verbose: int, ) -> Dict[str, List["asyncio.Future[Dict[str, Any]]"]]: num_tail_lines = calculate_tail_lines(verbose) pods = await pods_task tasks_by_replicaset: DefaultDict[ str, List["asyncio.Future[Dict[str, Any]]"] ] = defaultdict(list) for pod in pods: for owner_reference in pod.metadata.owner_references: if owner_reference.kind == "ReplicaSet": pod_status_task = asyncio.create_task( get_pod_status(pod, backends_task, client, num_tail_lines) ) tasks_by_replicaset[owner_reference.name].append(pod_status_task) return tasks_by_replicaset
async def get_pod_status_tasks_by_sha_and_readiness( pods_task: "asyncio.Future[V1Pod]", backends_task: "asyncio.Future[Dict[str, Any]]", client: kubernetes_tools.KubeClient, verbose: int, ) -> DefaultDict[ Tuple[str, str], DefaultDict[bool, List["asyncio.Future[Dict[str, Any]]"]] ]: num_tail_lines = calculate_tail_lines(verbose) tasks_by_sha_and_readiness: DefaultDict[ Tuple[str, str], DefaultDict[bool, List["asyncio.Future[Dict[str, Any]]"]] ] = defaultdict(lambda: defaultdict(list)) for pod in await pods_task: git_sha = pod.metadata.labels["paasta.yelp.com/git_sha"] config_sha = pod.metadata.labels["paasta.yelp.com/config_sha"] is_ready = kubernetes_tools.is_pod_ready(pod) pod_status_task = asyncio.create_task( get_pod_status(pod, backends_task, client, num_tail_lines) ) tasks_by_sha_and_readiness[(git_sha, config_sha)][is_ready].append( pod_status_task ) return tasks_by_sha_and_readiness
async def job_status( kstatus: MutableMapping[str, Any], client: kubernetes_tools.KubeClient, job_config: LongRunningServiceConfig, pod_list: Sequence[V1Pod], replicaset_list: Sequence[V1ReplicaSet], verbose: int, namespace: str, ) -> None: app_id = job_config.get_sanitised_deployment_name() kstatus["app_id"] = app_id kstatus["pods"] = [] kstatus["replicasets"] = [] if verbose > 0: num_tail_lines = calculate_tail_lines(verbose) for pod in pod_list: container_statuses = pod.status.container_statuses or [] containers = [ dict( name=container.name, tail_lines=await get_tail_lines_for_kubernetes_container( client, pod, container, num_tail_lines, ), ) for container in container_statuses ] kstatus["pods"].append( { "name": pod.metadata.name, "host": kubernetes_tools.get_pod_hostname(client, pod), "deployed_timestamp": pod.metadata.creation_timestamp.timestamp(), "phase": pod.status.phase, "ready": kubernetes_tools.is_pod_ready(pod), "containers": containers, "reason": pod.status.reason, "message": pod.status.message, } ) for replicaset in replicaset_list: try: ready_replicas = replicaset.status.ready_replicas if ready_replicas is None: ready_replicas = 0 except AttributeError: ready_replicas = 0 kstatus["replicasets"].append( { "name": replicaset.metadata.name, "replicas": replicaset.spec.replicas, "ready_replicas": ready_replicas, "create_timestamp": replicaset.metadata.creation_timestamp.timestamp(), } ) kstatus["expected_instance_count"] = job_config.get_instances() app = kubernetes_tools.get_kubernetes_app_by_name( name=app_id, kube_client=client, namespace=namespace ) deploy_status = kubernetes_tools.get_kubernetes_app_deploy_status( app=app, desired_instances=job_config.get_instances() ) kstatus["deploy_status"] = kubernetes_tools.KubernetesDeployStatus.tostring( deploy_status ) kstatus["running_instance_count"] = ( app.status.ready_replicas if app.status.ready_replicas else 0 ) kstatus["create_timestamp"] = app.metadata.creation_timestamp.timestamp() kstatus["namespace"] = app.metadata.namespace