async def marathon_mesos_status(service: str, instance: str, verbose: int) -> MutableMapping[str, Any]: mesos_status: MutableMapping[str, Any] = {} job_id = marathon_tools.format_job_id(service, instance) job_id_filter_string = f"{job_id}{marathon_tools.MESOS_TASK_SPACER}" try: running_and_active_tasks = select_tasks_by_id( await get_cached_list_of_running_tasks_from_frameworks(), job_id=job_id_filter_string, ) except (ReadTimeout, asyncio.TimeoutError): return { "error_message": "Talking to Mesos timed out. It may be overloaded." } mesos_status["running_task_count"] = len(running_and_active_tasks) if verbose > 0: num_tail_lines = calculate_tail_lines(verbose) running_task_dict_futures = [] for task in running_and_active_tasks: running_task_dict_futures.append( asyncio.ensure_future( get_mesos_running_task_dict(task, num_tail_lines))) non_running_tasks = select_tasks_by_id( await get_cached_list_of_not_running_tasks_from_frameworks(), job_id=job_id_filter_string, ) non_running_tasks.sort( key=lambda task: get_first_status_timestamp(task) or 0) non_running_tasks = list(reversed(non_running_tasks[-10:])) non_running_task_dict_futures = [] for task in non_running_tasks: non_running_task_dict_futures.append( asyncio.ensure_future( get_mesos_non_running_task_dict(task, num_tail_lines))) all_task_dict_futures = (running_task_dict_futures + non_running_task_dict_futures) if len(all_task_dict_futures): await asyncio.wait(all_task_dict_futures) mesos_status["running_tasks"] = [ task_future.result() for task_future in running_task_dict_futures ] mesos_status["non_running_tasks"] = [ task_future.result() for task_future in non_running_task_dict_futures ] return mesos_status
def status_chronos_jobs(client, jobs, job_config, verbose): """Returns a formatted string of the status of a list of chronos jobs :param jobs: list of dicts of chronos job info as returned by the chronos client :param job_config: dict containing configuration about these jobs as provided by chronos_tools.load_chronos_job_config(). :param verbose: int verbosity level """ if jobs == []: return "%s: chronos job is not set up yet" % PaastaColors.yellow( "Warning") else: output = [] desired_state = job_config.get_desired_state_human() output.append("Desired: %s" % desired_state) for job in jobs: running_task_count = len( select_tasks_by_id( get_cached_list_of_running_tasks_from_frameworks(), job["name"])) output.append( format_chronos_job_status(client, job, running_task_count, verbose)) return "\n".join(output)
def status_mesos_tasks(service, instance, normal_instance_count): job_id = marathon_tools.format_job_id(service, instance) # We have to add a spacer at the end to make sure we only return # things for service.main and not service.main_foo filter_string = "%s%s" % (job_id, marathon_tools.MESOS_TASK_SPACER) try: count = len( select_tasks_by_id( get_cached_list_of_running_tasks_from_frameworks(), filter_string)) if count >= normal_instance_count: status = PaastaColors.green("Healthy") count = PaastaColors.green("(%d/%d)" % (count, normal_instance_count)) elif count == 0: status = PaastaColors.red("Critical") count = PaastaColors.red("(%d/%d)" % (count, normal_instance_count)) else: status = PaastaColors.yellow("Warning") count = PaastaColors.yellow("(%d/%d)" % (count, normal_instance_count)) running_string = PaastaColors.bold('TASK_RUNNING') return "Mesos: %s - %s tasks in the %s state." % (status, count, running_string) except ReadTimeout: return "Error: talking to Mesos timed out. It may be overloaded."
def chronos_instance_status(instance_status, service, instance, verbose): cstatus = {} chronos_config = chronos_tools.load_chronos_config() client = chronos_tools.get_chronos_client(chronos_config) job_config = chronos_tools.load_chronos_job_config( service=service, instance=instance, cluster=settings.cluster, soa_dir=settings.soa_dir, ) cstatus['desired_state'] = job_config.get_desired_state() job_type = chronos_tools.get_job_type(job_config.config_dict) if job_type == chronos_tools.JobType.Scheduled: schedule_type = 'schedule' schedule = job_config.get_schedule() epsilon = job_config.get_epsilon() time_zone = job_config.get_schedule_time_zone() if time_zone == 'null' or time_zone is None: time_zone = 'UTC' cstatus['schedule'] = {} cstatus['schedule']['schedule'] = schedule cstatus['schedule']['epsilon'] = epsilon cstatus['schedule']['time_zone'] = time_zone elif job_type == chronos_tools.JobType.Dependent: schedule_type = 'parents' parents = job_config.get_parents() cstatus['parents'] = parents else: schedule_type = 'unknown' cstatus['schedule_type'] = schedule_type cstatus['status'] = {} if verbose: running_task_count = len( select_tasks_by_id( a_sync.block(get_cached_list_of_running_tasks_from_frameworks), job_config.get_job_name(), ), ) cstatus['status']['mesos_state'] = 'running' if running_task_count else 'not_running' cstatus['status']['disabled_state'] = 'not_scheduled' if job_config.get_disabled() else 'scheduled' cstatus['status']['chronos_state'] = chronos_tools.get_chronos_status_for_job(client, service, instance) cstatus['command'] = job_config.get_cmd() last_time, last_status = chronos_tools.get_status_last_run(job_config.config_dict) if last_status == chronos_tools.LastRunState.Success: last_status = 'success' elif last_status == chronos_tools.LastRunState.Fail: last_status = 'fail' elif last_status == chronos_tools.LastRunState.NotRun: last_status = 'not_run' else: last_status = '' if last_status == 'not_run' or last_status == '': last_time = 'never' cstatus['last_status'] = {} cstatus['last_status']['result'] = last_status cstatus['last_status']['time'] = last_time return cstatus
def status_mesos_tasks( service: str, instance: str, normal_instance_count: int, verbose: int, ) -> str: job_id = marathon_tools.format_job_id(service, instance) # We have to add a spacer at the end to make sure we only return # things for service.main and not service.main_foo filter_string = f"{job_id}{marathon_tools.MESOS_TASK_SPACER}" try: count = len( select_tasks_by_id( a_sync.block(get_cached_list_of_running_tasks_from_frameworks), filter_string)) if count >= normal_instance_count: status = PaastaColors.green("Healthy") count_str = PaastaColors.green("(%d/%d)" % (count, normal_instance_count)) elif count == 0: status = PaastaColors.red("Critical") count_str = PaastaColors.red("(%d/%d)" % (count, normal_instance_count)) else: status = PaastaColors.yellow("Warning") count_str = PaastaColors.yellow("(%d/%d)" % (count, normal_instance_count)) running_string = PaastaColors.bold('TASK_RUNNING') output = f"Mesos: {status} - {count_str} tasks in the {running_string} state." except ReadTimeout: return "Error: talking to Mesos timed out. It may be overloaded." if verbose > 0: tail_lines = calculate_tail_lines(verbose_level=verbose) output += '\n' + status_mesos_tasks_verbose( filter_string=filter_string, get_short_task_id=get_short_task_id, tail_lines=tail_lines, ) return output