def status_mesos_tasks_verbose(job_id, get_short_task_id): """Returns detailed information about the mesos tasks for a service. :param job_id: An id used for looking up Mesos tasks :param get_short_task_id: A function which given a task_id returns a short task_id suitable for printing. """ output = [] running_and_active_tasks = get_running_tasks_from_active_frameworks(job_id) output.append(" Running Tasks:") rows_running = [[ "Mesos Task ID", "Host deployed to", "Ram", "CPU", "Deployed at what localtime" ]] for task in running_and_active_tasks: rows_running.append( format_running_mesos_task_row(task, get_short_task_id)) output.extend([" %s" % row for row in format_table(rows_running)]) non_running_tasks = reversed( get_non_running_tasks_from_active_frameworks(job_id)[-10:]) output.append(PaastaColors.grey(" Non-Running Tasks")) rows_non_running = [[ PaastaColors.grey("Mesos Task ID"), PaastaColors.grey("Host deployed to"), PaastaColors.grey("Deployed at what localtime"), PaastaColors.grey("Status"), ]] for task in non_running_tasks: rows_non_running.append( format_non_running_mesos_task_row(task, get_short_task_id)) output.extend([" %s" % row for row in format_table(rows_non_running)]) return "\n".join(output)
def status_mesos_tasks_verbose(job_id, get_short_task_id): """Returns detailed information about the mesos tasks for a service. :param job_id: An id used for looking up Mesos tasks :param get_short_task_id: A function which given a task_id returns a short task_id suitable for printing. """ output = [] running_and_active_tasks = get_running_tasks_from_active_frameworks(job_id) output.append(" Running Tasks:") rows_running = [[ "Mesos Task ID", "Host deployed to", "Ram", "CPU", "Deployed at what localtime" ]] for task in running_and_active_tasks: rows_running.append(format_running_mesos_task_row(task, get_short_task_id)) output.extend([" %s" % row for row in format_table(rows_running)]) non_running_tasks = reversed(get_non_running_tasks_from_active_frameworks(job_id)[-10:]) output.append(PaastaColors.grey(" Non-Running Tasks")) rows_non_running = [[ PaastaColors.grey("Mesos Task ID"), PaastaColors.grey("Host deployed to"), PaastaColors.grey("Deployed at what localtime"), PaastaColors.grey("Status"), ]] for task in non_running_tasks: rows_non_running.append(format_non_running_mesos_task_row(task, get_short_task_id)) output.extend([" %s" % row for row in format_table(rows_non_running)]) return "\n".join(output)
def get_verbose_status_of_marathon_app(marathon_client, app, service, instance, cluster, soa_dir): """Takes a given marathon app object and returns the verbose details about the tasks, times, hosts, etc""" output = [] create_datetime = datetime_from_utc_to_local( isodate.parse_datetime(app.version)) output.append(" Marathon app ID: %s" % PaastaColors.bold(app.id)) output.append( " App created: %s (%s)" % (str(create_datetime), humanize.naturaltime(create_datetime))) autoscaling_info = get_autoscaling_info(marathon_client, service, instance, cluster, soa_dir) if autoscaling_info: output.append(" Autoscaling Info:") headers = [ field.replace("_", " ").capitalize() for field in ServiceAutoscalingInfo._fields ] table = [headers, autoscaling_info] output.append('\n'.join( [" %s" % line for line in format_table(table)])) output.append(" Tasks:") rows = [("Mesos Task ID", "Host deployed to", "Deployed at what localtime", "Health")] for task in app.tasks: local_deployed_datetime = datetime_from_utc_to_local(task.staged_at) if task.host is not None: hostname = "%s:%s" % (task.host.split(".")[0], task.ports[0]) else: hostname = "Unknown" if not task.health_check_results: health_check_status = PaastaColors.grey("N/A") elif marathon_tools.is_task_healthy(task): health_check_status = PaastaColors.green("Healthy") else: health_check_status = PaastaColors.red("Unhealthy") rows.append(( get_short_task_id(task.id), hostname, '%s (%s)' % ( local_deployed_datetime.strftime("%Y-%m-%dT%H:%M"), humanize.naturaltime(local_deployed_datetime), ), health_check_status, )) output.append('\n'.join([" %s" % line for line in format_table(rows)])) if len(app.tasks) == 0: output.append(" No tasks associated with this marathon app") return app.tasks, "\n".join(output)
def test_format_table(): actual = utils.format_table([['looooong', 'y', 'z'], ['a', 'looooong', 'c'], ['j', 'k', 'looooong']]) expected = [ 'looooong y z', 'a looooong c', 'j k looooong', ] assert actual == expected assert ["a b c"] == utils.format_table([['a', 'b', 'c']], min_spacing=5)
def test_format_table(): actual = utils.format_table( [ ['looooong', 'y', 'z'], ['a', 'looooong', 'c'], ['j', 'k', 'looooong'] ] ) expected = [ 'looooong y z', 'a looooong c', 'j k looooong', ] assert actual == expected assert ["a b c"] == utils.format_table([['a', 'b', 'c']], min_spacing=5)
def format_task_list(tasks, list_title, table_header, get_short_task_id, format_task_row, grey, tail_stdstreams): """Formats a list of tasks, returns a list of output lines :param tasks: List of tasks as returned by get_*_tasks_from_active_frameworks. :param list_title: 'Running Tasks:' or 'Non-Running Tasks'. :param table_header: List of column names used in the tasks table. :param get_short_task_id: A function which given a task_id returns a short task_id suitable for printing. :param format_task_row: Formatting function, works on a task and a get_short_task_id function. :param tail_stdstreams: If True, also display the stdout/stderr tail, as obtained from the Mesos sandbox. :param grey: If True, the list will be made less visually prominent. :return output: Formatted output (list of output lines). """ if not grey: def colorize(x): return(x) else: def colorize(x): return(PaastaColors.grey(x)) output = [] output.append(colorize(" %s" % list_title)) table_rows = [ [colorize(th) for th in table_header] ] for task in tasks: table_rows.append(format_task_row(task, get_short_task_id)) tasks_table = [" %s" % row for row in format_table(table_rows)] if not tail_stdstreams: output.extend(tasks_table) else: stdstreams = [] for task in tasks: stdstreams.append(format_stdstreams_tail_for_task(task, get_short_task_id)) output.append(tasks_table[0]) # header output.extend(zip_tasks_verbose_output(tasks_table[1:], stdstreams)) return output
def get_verbose_status_of_marathon_app(app): """Takes a given marathon app object and returns the verbose details about the tasks, times, hosts, etc""" output = [] create_datetime = datetime_from_utc_to_local(isodate.parse_datetime(app.version)) output.append(" Marathon app ID: %s" % PaastaColors.bold(app.id)) output.append(" App created: %s (%s)" % (str(create_datetime), humanize.naturaltime(create_datetime))) output.append(" Tasks:") rows = [("Mesos Task ID", "Host deployed to", "Deployed at what localtime")] for task in app.tasks: local_deployed_datetime = datetime_from_utc_to_local(task.staged_at) if task.host is not None: hostname = "%s:%s" % (task.host.split(".")[0], task.ports[0]) else: hostname = "Unknown" rows.append(( get_short_task_id(task.id), hostname, '%s (%s)' % ( local_deployed_datetime.strftime("%Y-%m-%dT%H:%M"), humanize.naturaltime(local_deployed_datetime), ) )) output.append('\n'.join([" %s" % line for line in format_table(rows)])) if len(app.tasks) == 0: output.append(" No tasks associated with this marathon app") return app.tasks, "\n".join(output)
def get_verbose_status_of_marathon_app(app): """Takes a given marathon app object and returns the verbose details about the tasks, times, hosts, etc""" output = [] create_datetime = datetime_from_utc_to_local( isodate.parse_datetime(app.version)) output.append(" Marathon app ID: %s" % PaastaColors.bold(app.id)) output.append( " App created: %s (%s)" % (str(create_datetime), humanize.naturaltime(create_datetime))) output.append(" Tasks:") rows = [("Mesos Task ID", "Host deployed to", "Deployed at what localtime") ] for task in app.tasks: local_deployed_datetime = datetime_from_utc_to_local(task.staged_at) if task.host is not None: hostname = "%s:%s" % (task.host.split(".")[0], task.ports[0]) else: hostname = "Unknown" rows.append((get_short_task_id(task.id), hostname, '%s (%s)' % ( local_deployed_datetime.strftime("%Y-%m-%dT%H:%M"), humanize.naturaltime(local_deployed_datetime), ))) output.append('\n'.join([" %s" % line for line in format_table(rows)])) if len(app.tasks) == 0: output.append(" No tasks associated with this marathon app") return app.tasks, "\n".join(output)
def assert_extra_slave_data(mesos_state, humanize_output=False): if not slaves_registered(mesos_state): return (' No mesos slaves registered on this cluster!', False) extra_slave_data = get_extra_mesos_slave_data(mesos_state) rows = [('Hostname', 'CPU (free/total)', 'RAM (free/total)', 'Disk (free/total)')] for slave in extra_slave_data: if humanize_output: formatted_line = ( slave['hostname'], '%.2f/%.2f' % (slave['free_resources']['cpus'], slave['total_resources']['cpus']), '%s/%s' % (naturalsize(slave['free_resources']['mem'] * 1024 * 1024, gnu=True), naturalsize(slave['total_resources']['mem'] * 1024 * 1024, gnu=True)), '%s/%s' % (naturalsize(slave['free_resources']['disk'] * 1024 * 1024, gnu=True), naturalsize(slave['total_resources']['disk'] * 1024 * 1024, gnu=True)), ) else: formatted_line = ( slave['hostname'], '%.2f/%.2f' % (slave['free_resources']['cpus'], slave['total_resources']['cpus']), '%.2f/%.2f' % (slave['free_resources']['mem'], slave['total_resources']['mem']), '%.2f/%.2f' % (slave['free_resources']['disk'], slave['total_resources']['disk']), ) rows.append(formatted_line) result = ('\n'.join((' %s' % row for row in format_table(rows)))[2:], True) return result
def pretty_print_smartstack_backends_for_locations(service_instance, tasks, locations, expected_count, verbose): """ Pretty prints the status of smartstack backends of a specified service and instance in the specified locations """ rows = [(" Name", "LastCheck", "LastChange", "Status")] expected_count_per_location = int(expected_count / len(locations)) for location in sorted(locations): hosts = locations[location] # arbitrarily choose the first host with a given attribute to query for replication stats synapse_host = hosts[0] sorted_backends = sorted( get_backends(service_instance, synapse_host=synapse_host, synapse_port=DEFAULT_SYNAPSE_PORT), key=lambda backend: backend['status'], reverse=True ) # Specify reverse so that backends in 'UP' are placed above 'MAINT' matched_tasks = match_backends_and_tasks(sorted_backends, tasks) running_count = sum(1 for backend, task in matched_tasks if backend and backend_is_up(backend)) rows.append( " %s - %s" % (location, haproxy_backend_report( expected_count_per_location, running_count))) # If verbose mode is specified, show status of individual backends if verbose: for backend, task in matched_tasks: if backend is not None: rows.append( format_haproxy_backend_row(backend, task is not None)) return format_table(rows)
def create_mesos_non_running_tasks_table(non_running_tasks): rows = [] table_header = [ "Mesos Task ID", "Host deployed to", "Deployed at what localtime", "Status", ] rows.append(table_header) for task in non_running_tasks or []: if task.deployed_timestamp is None: deployed_at_string = "Unknown" else: deployed_at = datetime.fromtimestamp(task.deployed_timestamp) deployed_at_string = "{} ({})".format( deployed_at.strftime("%Y-%m-%dT%H:%M"), humanize.naturaltime(deployed_at), ) rows.append([task.id, task.hostname, deployed_at_string, task.state]) rows.extend(format_tail_lines_for_mesos_task(task.tail_lines, task.id)) table = format_table(rows) return [PaastaColors.grey(formatted_row) for formatted_row in table]
def pretty_print_smartstack_backends_for_locations(service_instance, tasks, locations, expected_count, verbose, synapse_port, synapse_haproxy_url_format): """ Pretty prints the status of smartstack backends of a specified service and instance in the specified locations """ rows = [(" Name", "LastCheck", "LastChange", "Status")] if verbose else [] expected_count_per_location = int(expected_count / len(locations)) for location in sorted(locations): hosts = locations[location] # arbitrarily choose the first host with a given attribute to query for replication stats synapse_host = hosts[0] sorted_backends = sorted( get_backends( service_instance, synapse_host=synapse_host, synapse_port=synapse_port, synapse_haproxy_url_format=synapse_haproxy_url_format, ), key=lambda backend: backend['status'], reverse=True, # Specify reverse so that backends in 'UP' are placed above 'MAINT' ) matched_tasks = match_backends_and_tasks(sorted_backends, tasks) running_count = sum(1 for backend, task in matched_tasks if backend and backend_is_up(backend)) rows.append(" %s - %s" % (location, haproxy_backend_report(expected_count_per_location, running_count))) # If verbose mode is specified, show status of individual backends if verbose: for backend, task in matched_tasks: if backend is not None: rows.append(format_haproxy_backend_row(backend, task is not None)) return format_table(rows)
def list_previous_commits(service, deploy_groups, any_given_deploy_groups, git_shas): def format_timestamp(tstamp): return naturaltime(datetime_from_utc_to_local(parse_timestamp(tstamp))) print("Below is a list of recent commits:") git_shas = sorted(git_shas.items(), key=lambda x: x[1], reverse=True)[:10] rows = [("Timestamp -- UTC", "Human time", "deploy_group", "Git SHA")] for sha, (timestamp, deploy_group) in git_shas: rows.extend([(timestamp, format_timestamp(timestamp), deploy_group, sha)]) for line in format_table(rows): print(line) if len(git_shas) >= 2: sha, (timestamp, deploy_group) = git_shas[1] deploy_groups_arg_line = ( "-l %s " % ",".join(deploy_groups) if any_given_deploy_groups else "" ) print( "\nFor example, to use the second to last commit from {} used on {}, run:".format( format_timestamp(timestamp), PaastaColors.bold(deploy_group) ) ) print( PaastaColors.bold( f" paasta rollback -s {service} {deploy_groups_arg_line}-k {sha}" ) )
def assert_extra_attribute_data(mesos_state): if not slaves_registered(mesos_state): return (' No mesos slaves registered on this cluster!', False) extra_attribute_data = list(get_extra_mesos_attribute_data(mesos_state)) rows = [] for attribute, resource_dict in extra_attribute_data: if len( resource_dict.keys() ) >= 2: # filter out attributes that apply to every slave in the cluster rows.append( (attribute.capitalize(), 'CPU free', 'RAM free', 'Disk free')) for attribute_location, resources_remaining in resource_dict.items( ): rows.append(( attribute_location, '%.2f' % resources_remaining['cpus'], '%.2f' % resources_remaining['mem'], '%.2f' % resources_remaining['disk'], )) if len(rows) == 0: result = ( " No slave attributes that apply to more than one slave were detected.", True) else: result = ('\n'.join( (' %s' % row for row in format_table(rows)))[2:], True) return result
def list_previous_versions( service: str, deploy_groups: Collection[str], any_given_deploy_groups: bool, versions: Mapping[DeploymentVersion, Tuple], ) -> None: def format_timestamp(tstamp: str) -> str: return naturaltime(datetime_from_utc_to_local(parse_timestamp(tstamp))) print("Below is a list of recent commits:") # Latest 10 versions sorted by deployment time list_of_versions = sorted(versions.items(), key=lambda x: x[1], reverse=True)[:10] rows = [("Timestamp -- UTC", "Human time", "deploy_group", "Version")] for version, (timestamp, deploy_group) in list_of_versions: rows.extend([(timestamp, format_timestamp(timestamp), deploy_group, repr(version))]) for line in format_table(rows): print(line) if len(list_of_versions) >= 2: version, (timestamp, deploy_group) = list_of_versions[1] deploy_groups_arg_line = ("-l %s " % ",".join(deploy_groups) if any_given_deploy_groups else "") version_arg = (f" --image-version {version.image_version}" if version.image_version else "") print( "\nFor example, to use the second to last version from {} used on {}, run:" .format(format_timestamp(timestamp), PaastaColors.bold(deploy_group))) print( PaastaColors.bold( f" paasta rollback -s {service} {deploy_groups_arg_line}-k {version.sha}{version_arg}" ))
def format_kubernetes_replicaset_table(replicasets): rows = [("ReplicaSet Name", "Ready / Desired", "Created at what localtime")] for replicaset in replicasets: local_created_datetime = datetime_from_utc_to_local( datetime.fromtimestamp(replicaset.create_timestamp) ) replica_status = f"{replicaset.ready_replicas}/{replicaset.replicas}" if replicaset.ready_replicas >= replicaset.replicas: replica_status = PaastaColors.green(replica_status) else: replica_status = PaastaColors.red(replica_status) rows.append( ( replicaset.name, replica_status, "{} ({})".format( local_created_datetime.strftime("%Y-%m-%dT%H:%M"), humanize.naturaltime(local_created_datetime), ), ) ) return format_table(rows)
def build_smartstack_backends_table(backends): rows = [("Name", "LastCheck", "LastChange", "Status")] for backend in backends: if backend.status == "UP": status = PaastaColors.default(backend.status) elif backend.status == "DOWN": status = PaastaColors.red(backend.status) elif backend.status == "MAINT": status = PaastaColors.grey(backend.status) else: status = PaastaColors.yellow(backend.status) if backend.check_duration is None: check_duration = "" else: check_duration = str(backend.check_duration) row = ( f"{backend.hostname}:{backend.port}", f"{backend.check_status}/{backend.check_code} in {check_duration}ms", humanize.naturaltime(timedelta(seconds=backend.last_change)), status, ) if not backend.has_associated_task: row = tuple( PaastaColors.grey(remove_ansi_escape_sequences(col)) for col in row ) rows.append(row) return format_table(rows)
def format_marathon_task_table(tasks): rows = [ ("Mesos Task ID", "Host deployed to", "Deployed at what localtime", "Health") ] for task in tasks: local_deployed_datetime = datetime_from_utc_to_local( datetime.fromtimestamp(task.deployed_timestamp) ) if task.host is not None: hostname = f"{task.host}:{task.port}" else: hostname = "Unknown" if task.is_healthy is None: health_check_status = PaastaColors.grey("N/A") elif task.is_healthy: health_check_status = PaastaColors.green("Healthy") else: health_check_status = PaastaColors.red("Unhealthy") rows.append( ( task.id, hostname, "{} ({})".format( local_deployed_datetime.strftime("%Y-%m-%dT%H:%M"), humanize.naturaltime(local_deployed_datetime), ), health_check_status, ) ) return format_table(rows)
def format_kubernetes_pod_table(pods): rows = [("Pod ID", "Host deployed to", "Deployed at what localtime", "Health")] for pod in pods: local_deployed_datetime = datetime_from_utc_to_local( datetime.fromtimestamp(pod.deployed_timestamp) ) hostname = f"{pod.host}" if pod.host is not None else "Unknown" if pod.phase is None or pod.phase == "Pending": health_check_status = PaastaColors.grey("N/A") elif pod.phase == "Running": health_check_status = PaastaColors.green("Healthy") else: health_check_status = PaastaColors.red("Unhealthy") rows.append( ( pod.name, hostname, "{} ({})".format( local_deployed_datetime.strftime("%Y-%m-%dT%H:%M"), humanize.naturaltime(local_deployed_datetime), ), health_check_status, ) ) return format_table(rows)
def format_task_list(tasks, list_title, table_header, get_short_task_id, format_task_row, grey, tail_lines): """Formats a list of tasks, returns a list of output lines :param tasks: List of tasks as returned by get_*_tasks_from_all_frameworks. :param list_title: 'Running Tasks:' or 'Non-Running Tasks'. :param table_header: List of column names used in the tasks table. :param get_short_task_id: A function which given a task_id returns a short task_id suitable for printing. :param format_task_row: Formatting function, works on a task and a get_short_task_id function. :param tail_lines (int): number of lines of stdout/stderr to tail, as obtained from the Mesos sandbox. :param grey: If True, the list will be made less visually prominent. :return output: Formatted output (list of output lines). """ if not grey: def colorize(x): return(x) else: def colorize(x): return(PaastaColors.grey(x)) output = [] output.append(colorize(" %s" % list_title)) table_rows = [ [colorize(th) for th in table_header] ] for task in tasks: table_rows.append(format_task_row(task, get_short_task_id)) tasks_table = [" %s" % row for row in format_table(table_rows)] if tail_lines == 0: output.extend(tasks_table) else: stdstreams = [] for task in tasks: stdstreams.append(format_stdstreams_tail_for_task(task, get_short_task_id, nlines=tail_lines)) output.append(tasks_table[0]) # header output.extend(zip_tasks_verbose_output(tasks_table[1:], stdstreams)) return output
def list_previous_commits(service, deploy_groups, any_given_deploy_groups, soa_dir): def format_timestamp(tstamp): return naturaltime(datetime_from_utc_to_local(parse_timestamp(tstamp))) print "Please specify a commit to mark for rollback (-k, --commit). Below is a list of recent commits:" git_shas = sorted(get_git_shas_for_service(service, deploy_groups, soa_dir), key=lambda x: x[1], reverse=True)[:10] rows = [('Timestamp -- UTC', 'Human time', 'deploy_group', 'Git SHA')] for sha, (timestamp, deploy_group) in git_shas: print timestamp rows.extend([(timestamp, format_timestamp(timestamp), deploy_group, sha)]) for line in format_table(rows): print line if len(git_shas) >= 2: print "" sha, (timestamp, deploy_group) = git_shas[1] deploy_groups_arg_line = '-d %s ' % ','.join( deploy_groups) if any_given_deploy_groups else '' print "For example, to use the second to last commit from %s used on %s, run:" % ( format_timestamp(timestamp), PaastaColors.bold(deploy_group)) print PaastaColors.bold(" paasta rollback -s %s %s-k %s" % (service, deploy_groups_arg_line, sha))
async def format_task_list( tasks: Sequence[Task], list_title: str, table_header: Sequence[str], get_short_task_id: Callable[[str], str], format_task_row: Callable[[Task, Callable[[str], str]], Awaitable[Union[Sequence[str], str]]], grey: bool, tail_lines: int, ) -> List[str]: """Formats a list of tasks, returns a list of output lines :param tasks: List of tasks as returned by get_*_tasks_from_all_frameworks. :param list_title: 'Running Tasks:' or 'Non-Running Tasks'. :param table_header: List of column names used in the tasks table. :param get_short_task_id: A function which given a task_id returns a short task_id suitable for printing. :param format_task_row: Formatting function, works on a task and a get_short_task_id function. :param tail_lines (int): number of lines of stdout/stderr to tail, as obtained from the Mesos sandbox. :param grey: If True, the list will be made less visually prominent. :return output: Formatted output (list of output lines). """ if not grey: def colorize(x): return x else: def colorize(x): return PaastaColors.grey(x) output = [] output.append(colorize(" %s" % list_title)) table_rows: List[Union[str, Sequence[str]]] = [[ colorize(th) for th in table_header ]] if tasks: task_row_futures = [ asyncio.ensure_future(format_task_row(task, get_short_task_id)) for task in tasks ] await asyncio.wait(task_row_futures) for future in task_row_futures: table_rows.append(future.result()) tasks_table = [" %s" % row for row in format_table(table_rows)] if tail_lines == 0: output.extend(tasks_table) else: stdstreams = [] for task in tasks: stdstreams.append(await format_stdstreams_tail_for_task( task, get_short_task_id, nlines=tail_lines)) output.append(tasks_table[0]) # header output.extend(zip_tasks_verbose_output(tasks_table[1:], stdstreams)) return output
def test_format_table_with_interjected_lines(): actual = utils.format_table([['looooong', 'y', 'z'], 'interjection', ['a', 'looooong', 'c'], u'unicode interjection', ['j', 'k', 'looooong']]) expected = [ 'looooong y z', 'interjection', 'a looooong c', u'unicode interjection', 'j k looooong', ] assert actual == expected
def assert_extra_slave_data(mesos_state): extra_slave_data = get_extra_mesos_slave_data(mesos_state) if extra_slave_data: rows = [('Hostname', 'CPU free', 'RAM free')] for slave in extra_slave_data: rows.append(( slave['hostname'], '%.2f' % slave['free_resources']['cpus'], '%.2f' % slave['free_resources']['mem'], )) result = ('\n'.join((' %s' % row for row in format_table(rows)))[2:], True) else: result = (' No mesos slaves registered on this cluster!', False) return result
def assert_extra_slave_data(mesos_state): if not slaves_registered(mesos_state): return (' No mesos slaves registered on this cluster!', False) extra_slave_data = get_extra_mesos_slave_data(mesos_state) rows = [('Hostname', 'CPU free', 'RAM free', 'Disk free')] for slave in extra_slave_data: rows.append(( slave['hostname'], '%.2f' % slave['free_resources']['cpus'], '%.2f' % slave['free_resources']['mem'], '%.2f' % slave['free_resources']['disk'], )) result = ('\n'.join( (' %s' % row for row in format_table(rows)))[2:], True) return result
def list_previous_commits(service, deploy_groups, any_given_deploy_groups, soa_dir): def format_timestamp(tstamp): return naturaltime(datetime_from_utc_to_local(parse_timestamp(tstamp))) print "Please specify a commit to mark for rollback (-k, --commit). Below is a list of recent commits:" git_shas = sorted(get_git_shas_for_service(service, deploy_groups, soa_dir), key=lambda x: x[1], reverse=True)[:10] rows = [('Timestamp -- UTC', 'Git SHA')] rows.extend([('%s (%s)' % (timestamp, format_timestamp(timestamp)), sha) for sha, timestamp in git_shas]) for line in format_table(rows): print line if len(git_shas) >= 2: sha, tstamp = git_shas[1] deploy_groups_arg_line = '-d %s ' % ','.join(deploy_groups) if any_given_deploy_groups else '' print "For example, to roll back to the second to last commit from %s, run:" % format_timestamp(tstamp) print PaastaColors.bold(" paasta rollback -s %s %s-k %s" % (service, deploy_groups_arg_line, sha))
def status_marathon_job_verbose( service: str, instance: str, clients: marathon_tools.MarathonClients, cluster: str, soa_dir: str, job_config: marathon_tools.MarathonServiceConfig, dashboards: Dict[marathon_tools.MarathonClient, str], ) -> Tuple[List[MarathonTask], str]: """Returns detailed information about a marathon apps for a service and instance. Does not make assumptions about what the *exact* appid is, but instead does a fuzzy match on any marathon apps that match the given service.instance""" all_tasks: List[MarathonTask] = [] all_output: List[str] = [] # For verbose mode, we want to see *any* matching app. As it may # not be the one that we think should be deployed. For example # during a bounce we want to see the old and new ones. marathon_apps_with_clients = marathon_tools.get_marathon_apps_with_clients( clients=clients.get_all_clients_for_service(job_config), embed_tasks=True, ) autoscaling_info = get_autoscaling_info(clients, job_config) if autoscaling_info: all_output.append(" Autoscaling Info:") headers = [ field.replace("_", " ").capitalize() for field in ServiceAutoscalingInfo._fields ] table = [headers, autoscaling_info] all_output.append('\n'.join( [" %s" % line for line in format_table(table)])) for app, client in marathon_tools.get_matching_apps_with_clients( service, instance, marathon_apps_with_clients): tasks, output = get_verbose_status_of_marathon_app( marathon_client=client, app=app, service=service, instance=instance, cluster=cluster, soa_dir=soa_dir, dashboards=dashboards, ) all_tasks.extend(tasks) all_output.append(output) return all_tasks, "\n".join(all_output)
def test_format_table_with_interjected_lines(): actual = utils.format_table( [ ['looooong', 'y', 'z'], 'interjection', ['a', 'looooong', 'c'], u'unicode interjection', ['j', 'k', 'looooong'] ] ) expected = [ 'looooong y z', 'interjection', 'a looooong c', u'unicode interjection', 'j k looooong', ] assert actual == expected
def test_format_table_with_interjected_lines(): actual = utils.format_table( [ ["looooong", "y", "z"], "interjection", ["a", "looooong", "c"], u"unicode interjection", ["j", "k", "looooong"], ] ) expected = [ "looooong y z", "interjection", "a looooong c", u"unicode interjection", "j k looooong", ] assert actual == expected
def list_previous_commits(service, deploy_groups, any_given_deploy_groups, git_shas): def format_timestamp(tstamp): return naturaltime(datetime_from_utc_to_local(parse_timestamp(tstamp))) paasta_print('Below is a list of recent commits:') git_shas = sorted(git_shas.items(), key=lambda x: x[1], reverse=True)[:10] rows = [('Timestamp -- UTC', 'Human time', 'deploy_group', 'Git SHA')] for sha, (timestamp, deploy_group) in git_shas: rows.extend([(timestamp, format_timestamp(timestamp), deploy_group, sha)]) for line in format_table(rows): paasta_print(line) if len(git_shas) >= 2: sha, (timestamp, deploy_group) = git_shas[1] deploy_groups_arg_line = '-l %s ' % ','.join(deploy_groups) if any_given_deploy_groups else '' paasta_print("\nFor example, to use the second to last commit from %s used on %s, run:" % ( format_timestamp(timestamp), PaastaColors.bold(deploy_group), )) paasta_print(PaastaColors.bold(" paasta rollback -s %s %s-k %s" % (service, deploy_groups_arg_line, sha)))
def create_autoscaling_info_table(autoscaling_info): output = ["Autoscaling Info:"] if autoscaling_info.current_utilization is not None: autoscaling_info.current_utilization = "{:.1f}%".format( autoscaling_info.current_utilization * 100 ) else: autoscaling_info.current_utilization = "Exception" if autoscaling_info.target_instances is None: autoscaling_info.target_instances = "Exception" headers = list(autoscaling_fields_to_headers.values()) row = [ str(getattr(autoscaling_info, field)) for field in autoscaling_fields_to_headers ] table = [f" {line}" for line in format_table([headers, row])] output.extend(table) return output
def assert_extra_attribute_data(mesos_state): if not slaves_registered(mesos_state): return (' No mesos slaves registered on this cluster!', False) extra_attribute_data = list(get_extra_mesos_attribute_data(mesos_state)) rows = [] for attribute, resource_dict in extra_attribute_data: if len(resource_dict.keys()) >= 2: # filter out attributes that apply to every slave in the cluster rows.append((attribute.capitalize(), 'CPU free', 'RAM free', 'Disk free')) for attribute_location, resources_remaining in resource_dict.items(): rows.append(( attribute_location, '%.2f' % resources_remaining['cpus'], '%.2f' % resources_remaining['mem'], '%.2f' % resources_remaining['disk'], )) if len(rows) == 0: result = (" No slave attributes that apply to more than one slave were detected.", True) else: result = ('\n'.join((' %s' % row for row in format_table(rows)))[2:], True) return result
def create_queue_entries_table(service_instances) -> List[str]: if len(service_instances) == 0: return [PaastaColors.grey("Empty")] table_header = [ "Service Instance", "Bounce by", "Wait until", "Enqueue time", "Bounce Start Time", "Processed Count", "Failures", "Watcher", ] rows = [table_header] for service_instance in service_instances: now = time.time() bounce_by = format_timestamp(service_instance.bounce_by) if service_instance.bounce_by < now: bounce_by = PaastaColors.red(bounce_by) failures = str(service_instance.failures) if service_instance.failures > 10: failures = PaastaColors.red(failures) processed_count = str(service_instance.processed_count) if service_instance.processed_count > 50: processed_count = PaastaColors.red(processed_count) rows.append([ f"{service_instance.service}.{service_instance.instance}", bounce_by, format_timestamp(service_instance.wait_until), format_timestamp(service_instance.enqueue_time), format_timestamp(service_instance.bounce_start_time), processed_count, failures, service_instance.watcher, ]) return format_table(rows)
def assert_extra_attribute_data(mesos_state, humanize_output=False): if not slaves_registered(mesos_state): return (' No mesos slaves registered on this cluster!', False) extra_attribute_data = list(get_extra_mesos_attribute_data(mesos_state)) rows = [] for attribute, resource_dict in extra_attribute_data: resource_free_dict = resource_dict["free"] resource_availability_dict = resource_dict["availability"] if len(resource_free_dict.keys()) >= 2: # filter out attributes that apply to every slave in the cluster rows.append((attribute.capitalize(), 'CPU (free/total)', 'RAM (free/total)', 'Disk (free/total)')) for attribute_location in sorted(resource_free_dict.keys()): resources_remaining = resource_free_dict[attribute_location] resources_available = resource_availability_dict[attribute_location] if humanize_output: formatted_line = ( attribute_location, '%.2f/%.2f' % (resources_remaining['cpus'], resources_available['cpus']), '%s/%s' % (naturalsize(resources_remaining['mem'] * 1024 * 1024, gnu=True), naturalsize(resources_available['mem'] * 1024 * 1024, gnu=True)), '%s/%s' % (naturalsize(resources_remaining['disk'] * 1024 * 1024, gnu=True), naturalsize(resources_available['disk'] * 1024 * 1024, gnu=True)) ) else: formatted_line = ( attribute_location, '%.2f/%.2f' % (resources_remaining['cpus'], resources_available['cpus']), '%.2f/%.2f' % (resources_remaining['mem'], resources_available['mem']), '%.2f/%.2f' % (resources_remaining['disk'], resources_available['disk']) ) rows.append(formatted_line) if len(rows) == 0: result = (" No slave attributes that apply to more than one slave were detected.", True) else: result = ('\n'.join((' %s' % row for row in format_table(rows)))[2:], True) return result
def assert_extra_attribute_data(mesos_state, humanize_output=False): if not slaves_registered(mesos_state): return (' No mesos slaves registered on this cluster!', False) extra_attribute_data = list(get_extra_mesos_attribute_data(mesos_state)) rows = [] for attribute, resource_dict in extra_attribute_data: resource_free_dict = resource_dict['free'] resource_total_dict = resource_dict['total'] if len(resource_free_dict.keys()) >= 2: # filter out attributes that apply to every slave in the cluster rows.append((attribute.capitalize(), 'CPU (free/total)', 'RAM (free/total)', 'Disk (free/total)')) for attribute_location in sorted(resource_free_dict.keys()): resources_remaining = resource_free_dict[attribute_location] resources_total = resource_total_dict[attribute_location] if humanize_output: formatted_line = ( attribute_location, '%.2f/%.2f' % (resources_remaining['cpus'], resources_total['cpus']), '%s/%s' % (naturalsize(resources_remaining['mem'] * 1024 * 1024, gnu=True), naturalsize(resources_total['mem'] * 1024 * 1024, gnu=True)), '%s/%s' % (naturalsize(resources_remaining['disk'] * 1024 * 1024, gnu=True), naturalsize(resources_total['disk'] * 1024 * 1024, gnu=True)) ) else: formatted_line = ( attribute_location, '%.2f/%.2f' % (resources_remaining['cpus'], resources_total['cpus']), '%.2f/%.2f' % (resources_remaining['mem'], resources_total['mem']), '%.2f/%.2f' % (resources_remaining['disk'], resources_total['disk']) ) rows.append(formatted_line) if len(rows) == 0: result = (" No slave attributes that apply to more than one slave were detected.", True) else: result = ('\n'.join((' %s' % row for row in format_table(rows)))[2:], True) return result
def create_mesos_running_tasks_table(running_tasks): rows = [] table_header = [ "Mesos Task ID", "Host deployed to", "Ram", "CPU", "Deployed at what localtime", ] rows.append(table_header) for task in running_tasks or []: mem_string = get_mesos_task_memory_string(task) cpu_string = get_mesos_task_cpu_string(task) deployed_at = datetime.fromtimestamp(task.deployed_timestamp) deployed_at_string = "{} ({})".format( deployed_at.strftime("%Y-%m-%dT%H:%M"), humanize.naturaltime(deployed_at) ) rows.append( [task.id, task.hostname, mem_string, cpu_string, deployed_at_string] ) rows.extend(format_tail_lines_for_mesos_task(task.tail_lines, task.id)) return format_table(rows)
def main(): marathon_config = None chronos_config = None args = parse_args() master = get_mesos_master() try: mesos_state = master.state except MasterNotAvailableException as e: # if we can't connect to master at all, # then bomb out early print(PaastaColors.red("CRITICAL: %s" % e.message)) sys.exit(2) mesos_state_status = metastatus_lib.get_mesos_state_status( mesos_state=mesos_state, ) metrics = master.metrics_snapshot() mesos_metrics_status = metastatus_lib.get_mesos_resource_utilization_health(mesos_metrics=metrics, mesos_state=mesos_state) framework_metrics_healthchecks = metastatus_lib.get_framework_metrics_status(metrics=metrics) all_mesos_results = mesos_state_status + mesos_metrics_status + framework_metrics_healthchecks # Check to see if Marathon should be running here by checking for config marathon_config = marathon_tools.load_marathon_config() # Check to see if Chronos should be running here by checking for config chronos_config = load_chronos_config() if marathon_config: marathon_client = metastatus_lib.get_marathon_client(marathon_config) try: marathon_results = metastatus_lib.get_marathon_status(marathon_client) except MarathonError as e: print(PaastaColors.red("CRITICAL: Unable to contact Marathon! Error: %s" % e)) sys.exit(2) else: marathon_results = [metastatus_lib.HealthCheckResult(message='Marathon is not configured to run here', healthy=True)] if chronos_config: chronos_client = get_chronos_client(chronos_config) try: chronos_results = metastatus_lib.get_chronos_status(chronos_client) except (chronos.ChronosAPIError) as e: print(PaastaColors.red("CRITICAL: Unable to contact Chronos! Error: %s" % e)) sys.exit(2) else: chronos_results = [metastatus_lib.HealthCheckResult(message='Chronos is not configured to run here', healthy=True)] mesos_ok = all(metastatus_lib.status_for_results(all_mesos_results)) marathon_ok = all(metastatus_lib.status_for_results(marathon_results)) chronos_ok = all(metastatus_lib.status_for_results(chronos_results)) mesos_summary = metastatus_lib.generate_summary_for_check("Mesos", mesos_ok) marathon_summary = metastatus_lib.generate_summary_for_check("Marathon", marathon_ok) chronos_summary = metastatus_lib.generate_summary_for_check("Chronos", chronos_ok) healthy_exit = True if all([mesos_ok, marathon_ok, chronos_ok]) else False print "Master paasta_tools version: {0}".format(__version__) metastatus_lib.print_results_for_healthchecks(mesos_summary, mesos_ok, all_mesos_results, args.verbose) if args.verbose > 1: for grouping in args.groupings: print_with_indent('Resources Grouped by %s' % grouping, 2) grouping_function = metastatus_lib.key_func_for_attribute(grouping) resource_info_dict = metastatus_lib.get_resource_utilization_by_grouping(grouping_function, mesos_state) all_rows = [[grouping.capitalize(), 'CPU (free/total)', 'RAM (free/total)', 'Disk (free/total)']] table_rows = [] for attribute_value, resource_info_dict in resource_info_dict.items(): resource_utilizations = metastatus_lib.resource_utillizations_from_resource_info( total=resource_info_dict['total'], free=resource_info_dict['free'], ) healthcheck_utilization_pairs = [ metastatus_lib.healthcheck_result_resource_utilization_pair_for_resource_utilization(utilization, args.threshold) for utilization in resource_utilizations ] healthy_exit = all(pair[0].healthy for pair in healthcheck_utilization_pairs) table_rows.append(metastatus_lib.get_table_rows_for_resource_info_dict( attribute_value, healthcheck_utilization_pairs, args.humanize )) table_rows = sorted(table_rows, key=lambda x: x[0]) all_rows.extend(table_rows) for line in format_table(all_rows): print_with_indent(line, 4) if args.verbose == 3: print_with_indent('Per Slave Utilization', 2) slave_resource_dict = metastatus_lib.get_resource_utilization_by_grouping(lambda slave: slave['hostname'], mesos_state) all_rows = [['Hostname', 'CPU (free/total)', 'RAM (free/total)', 'Disk (free/total)']] # print info about slaves here. Note that we don't make modifications to # the healthy_exit variable here, because we don't care about a single slave # having high usage. for attribute_value, resource_info_dict in slave_resource_dict.items(): table_rows = [] resource_utilizations = metastatus_lib.resource_utillizations_from_resource_info( total=resource_info_dict['total'], free=resource_info_dict['free'], ) healthcheck_utilization_pairs = [ metastatus_lib.healthcheck_result_resource_utilization_pair_for_resource_utilization(utilization, args.threshold) for utilization in resource_utilizations ] table_rows.append(metastatus_lib.get_table_rows_for_resource_info_dict( attribute_value, healthcheck_utilization_pairs, args.humanize )) table_rows = sorted(table_rows, key=lambda x: x[0]) all_rows.extend(table_rows) for line in format_table(all_rows): print_with_indent(line, 4) metastatus_lib.print_results_for_healthchecks(marathon_summary, marathon_ok, marathon_results, args.verbose) metastatus_lib.print_results_for_healthchecks(chronos_summary, chronos_ok, chronos_results, args.verbose) if not healthy_exit: sys.exit(2) else: sys.exit(0)
def test_format_table_all_strings(): actual = utils.format_table(["foo", "bar", "baz"]) expected = ["foo", "bar", "baz"] assert actual == expected
def test_format_table(): actual = utils.format_table([["looooong", "y", "z"], ["a", "looooong", "c"], ["j", "k", "looooong"]]) expected = ["looooong y z", "a looooong c", "j k looooong"] assert actual == expected assert ["a b c"] == utils.format_table([["a", "b", "c"]], min_spacing=5)
def main(): marathon_config = None chronos_config = None args = parse_args() try: mesos_state = get_mesos_state_from_leader() except MasterNotAvailableException as e: # if we can't connect to master at all, # then bomb out early print (PaastaColors.red("CRITICAL: %s" % e.message)) sys.exit(2) mesos_state_status = get_mesos_state_status(mesos_state=mesos_state) metrics = get_mesos_stats() mesos_metrics_status = get_mesos_metrics_health(mesos_metrics=metrics) all_mesos_results = mesos_state_status + mesos_metrics_status # Check to see if Marathon should be running here by checking for config try: marathon_config = marathon_tools.load_marathon_config() except MarathonNotConfigured: marathon_results = [HealthCheckResult(message="Marathon is not configured to run here", healthy=True)] # Check to see if Chronos should be running here by checking for config try: chronos_config = load_chronos_config() except PaastaNotConfiguredError: chronos_results = [HealthCheckResult(message="Chronos is not configured to run here", healthy=True)] if marathon_config: marathon_client = get_marathon_client(marathon_config) try: marathon_results = get_marathon_status(marathon_client) except MarathonError as e: print (PaastaColors.red("CRITICAL: Unable to contact Marathon! Error: %s" % e)) sys.exit(2) if chronos_config: chronos_client = get_chronos_client(chronos_config) try: chronos_results = get_chronos_status(chronos_client) except ServerNotFoundError as e: print (PaastaColors.red("CRITICAL: Unable to contact Chronos! Error: %s" % e)) sys.exit(2) mesos_ok = all(status_for_results(all_mesos_results)) marathon_ok = all(status_for_results(marathon_results)) chronos_ok = all(status_for_results(chronos_results)) mesos_summary = generate_summary_for_check("Mesos", mesos_ok) marathon_summary = generate_summary_for_check("Marathon", marathon_ok) chronos_summary = generate_summary_for_check("Chronos", chronos_ok) healthy_exit = True if all([mesos_ok, marathon_ok, chronos_ok]) else False if args.verbose == 0: print mesos_summary print marathon_summary print chronos_summary elif args.verbose == 1: print mesos_summary print_results_for_healthchecks(mesos_ok, all_mesos_results, args.verbose) print marathon_summary print_results_for_healthchecks(marathon_ok, marathon_results, args.verbose) print chronos_summary print_results_for_healthchecks(chronos_ok, chronos_results, args.verbose) elif args.verbose == 2: print mesos_summary print_results_for_healthchecks(mesos_ok, all_mesos_results, args.verbose) for grouping in args.groupings: print_with_indent("Resources Grouped by %s" % grouping, 2) resource_info_dict = get_resource_utilization_by_grouping(key_func_for_attribute(grouping), mesos_state) all_rows = [[grouping.capitalize(), "CPU (free/total)", "RAM (free/total)", "Disk (free/total)"]] table_rows = [] for attribute_value, resource_info_dict in resource_info_dict.items(): resource_utilizations = resource_utillizations_from_resource_info( total=resource_info_dict["total"], free=resource_info_dict["free"] ) healthcheck_utilization_pairs = [ healthcheck_result_resource_utilization_pair_for_resource_utilization(utilization, args.threshold) for utilization in resource_utilizations ] healthy_exit = all(pair[0].healthy for pair in healthcheck_utilization_pairs) table_rows.append( get_table_rows_for_resource_info_dict(attribute_value, healthcheck_utilization_pairs, args.humanize) ) table_rows = sorted(table_rows, key=lambda x: x[0]) all_rows.extend(table_rows) for line in format_table(all_rows): print_with_indent(line, 4) print marathon_summary print_results_for_healthchecks(marathon_ok, marathon_results, args.verbose) print chronos_summary print_results_for_healthchecks(chronos_ok, chronos_results, args.verbose) else: print mesos_summary print_results_for_healthchecks(mesos_ok, all_mesos_results, args.verbose) for grouping in args.groupings: print_with_indent("Resources Grouped by %s" % grouping, 2) resource_info_dict = get_resource_utilization_by_grouping(key_func_for_attribute(grouping), mesos_state) all_rows = [[grouping.capitalize(), "CPU (free/total)", "RAM (free/total)", "Disk (free/total)"]] table_rows = [] for attribute_value, resource_info_dict in resource_info_dict.items(): resource_utilizations = resource_utillizations_from_resource_info( total=resource_info_dict["total"], free=resource_info_dict["free"] ) healthcheck_utilization_pairs = [ healthcheck_result_resource_utilization_pair_for_resource_utilization(utilization, args.threshold) for utilization in resource_utilizations ] healthy_exit = all(pair[0].healthy for pair in healthcheck_utilization_pairs) table_rows.append( get_table_rows_for_resource_info_dict(attribute_value, healthcheck_utilization_pairs, args.humanize) ) table_rows = sorted(table_rows, key=lambda x: x[0]) all_rows.extend(table_rows) for line in format_table(all_rows): print_with_indent(line, 4) print_with_indent("Per Slave Utilization", 2) slave_resource_dict = get_resource_utilization_by_grouping(lambda slave: slave["hostname"], mesos_state) all_rows = [["Hostname", "CPU (free/total)", "RAM (free/total)", "Disk (free/total)"]] # print info about slaves here. Note that we don't make modifications to # the healthy_exit variable here, because we don't care about a single slave # having high usage. for attribute_value, resource_info_dict in slave_resource_dict.items(): table_rows = [] resource_utilizations = resource_utillizations_from_resource_info( total=resource_info_dict["total"], free=resource_info_dict["free"] ) healthcheck_utilization_pairs = [ healthcheck_result_resource_utilization_pair_for_resource_utilization(utilization, args.threshold) for utilization in resource_utilizations ] table_rows.append( get_table_rows_for_resource_info_dict(attribute_value, healthcheck_utilization_pairs, args.humanize) ) table_rows = sorted(table_rows, key=lambda x: x[0]) all_rows.extend(table_rows) for line in format_table(all_rows): print_with_indent(line, 4) if not healthy_exit: sys.exit(2) else: sys.exit(0)
def print_output(argv: Optional[Sequence[str]] = None) -> None: mesos_available = is_mesos_available() kube_available = is_kubernetes_available() args = parse_args(argv) system_paasta_config = load_system_paasta_config() if mesos_available: master_kwargs = {} # we don't want to be passing False to not override a possible True # value from system config if args.use_mesos_cache: master_kwargs["use_mesos_cache"] = True master = get_mesos_master(**master_kwargs) marathon_servers = get_marathon_servers(system_paasta_config) marathon_clients = all_marathon_clients( get_marathon_clients(marathon_servers)) try: mesos_state = a_sync.block(master.state) all_mesos_results = _run_mesos_checks(mesos_master=master, mesos_state=mesos_state) except MasterNotAvailableException as e: # if we can't connect to master at all, # then bomb out early paasta_print(PaastaColors.red("CRITICAL: %s" % "\n".join(e.args))) raise FatalError(2) marathon_results = _run_marathon_checks(marathon_clients) else: marathon_results = [ metastatus_lib.HealthCheckResult( message="Marathon is not configured to run here", healthy=True) ] all_mesos_results = [ metastatus_lib.HealthCheckResult( message="Mesos is not configured to run here", healthy=True) ] if kube_available: kube_client = KubeClient() kube_results = _run_kube_checks(kube_client) else: kube_results = [ metastatus_lib.HealthCheckResult( message="Kubernetes is not configured to run here", healthy=True) ] mesos_ok = all(metastatus_lib.status_for_results(all_mesos_results)) marathon_ok = all(metastatus_lib.status_for_results(marathon_results)) kube_ok = all(metastatus_lib.status_for_results(kube_results)) mesos_summary = metastatus_lib.generate_summary_for_check( "Mesos", mesos_ok) marathon_summary = metastatus_lib.generate_summary_for_check( "Marathon", marathon_ok) kube_summary = metastatus_lib.generate_summary_for_check( "Kubernetes", kube_ok) healthy_exit = True if all([mesos_ok, marathon_ok]) else False paasta_print(f"Master paasta_tools version: {__version__}") paasta_print("Mesos leader: %s" % get_mesos_leader()) metastatus_lib.print_results_for_healthchecks(mesos_summary, mesos_ok, all_mesos_results, args.verbose) if args.verbose > 1 and mesos_available: print_with_indent( "Resources Grouped by %s" % ", ".join(args.groupings), 2) all_rows, healthy_exit = utilization_table_by_grouping_from_mesos_state( groupings=args.groupings, threshold=args.threshold, mesos_state=mesos_state) for line in format_table(all_rows): print_with_indent(line, 4) if args.autoscaling_info: print_with_indent("Autoscaling resources:", 2) headers = [ field.replace("_", " ").capitalize() for field in AutoscalingInfo._fields ] table = [headers] + [[ str(x) for x in asi ] for asi in get_autoscaling_info_for_all_resources(mesos_state)] for line in format_table(table): print_with_indent(line, 4) if args.verbose >= 3: print_with_indent("Per Slave Utilization", 2) cluster = system_paasta_config.get_cluster() service_instance_stats = get_service_instance_stats( args.service, args.instance, cluster) if service_instance_stats: print_with_indent( "Service-Instance stats:" + str(service_instance_stats), 2) # print info about slaves here. Note that we don't make modifications to # the healthy_exit variable here, because we don't care about a single slave # having high usage. all_rows, _ = utilization_table_by_grouping_from_mesos_state( groupings=args.groupings + ["hostname"], threshold=args.threshold, mesos_state=mesos_state, service_instance_stats=service_instance_stats, ) # The last column from utilization_table_by_grouping_from_mesos_state is "Agent count", which will always be # 1 for per-slave resources, so delete it. for row in all_rows: row.pop() for line in format_table(all_rows): print_with_indent(line, 4) metastatus_lib.print_results_for_healthchecks(marathon_summary, marathon_ok, marathon_results, args.verbose) metastatus_lib.print_results_for_healthchecks(kube_summary, kube_ok, kube_results, args.verbose) if args.verbose > 1 and kube_available: print_with_indent( "Resources Grouped by %s" % ", ".join(args.groupings), 2) all_rows, healthy_exit = utilization_table_by_grouping_from_kube( groupings=args.groupings, threshold=args.threshold, kube_client=kube_client) for line in format_table(all_rows): print_with_indent(line, 4) if args.autoscaling_info: print_with_indent("No autoscaling resources for Kubernetes", 2) if args.verbose >= 3: print_with_indent("Per Node Utilization", 2) cluster = system_paasta_config.get_cluster() service_instance_stats = get_service_instance_stats( args.service, args.instance, cluster) if service_instance_stats: print_with_indent( "Service-Instance stats:" + str(service_instance_stats), 2) # print info about nodes here. Note that we don't make # modifications to the healthy_exit variable here, because we don't # care about a single node having high usage. all_rows, _ = utilization_table_by_grouping_from_kube( groupings=args.groupings + ["hostname"], threshold=args.threshold, kube_client=kube_client, service_instance_stats=service_instance_stats, ) # The last column from utilization_table_by_grouping_from_kube is "Agent count", which will always be # 1 for per-node resources, so delete it. for row in all_rows: row.pop() for line in format_table(all_rows): print_with_indent(line, 4) if not healthy_exit: raise FatalError(2)
def main(): marathon_config = None chronos_config = None args = parse_args() try: mesos_state = get_mesos_state_from_leader() except MasterNotAvailableException as e: # if we can't connect to master at all, # then bomb out early print(PaastaColors.red("CRITICAL: %s" % e.message)) sys.exit(2) mesos_state_status = get_mesos_state_status( mesos_state=mesos_state, ) metrics = get_mesos_stats() mesos_metrics_status = get_mesos_metrics_health(mesos_metrics=metrics) all_mesos_results = mesos_state_status + mesos_metrics_status # Check to see if Marathon should be running here by checking for config marathon_config = marathon_tools.load_marathon_config() # Check to see if Chronos should be running here by checking for config chronos_config = load_chronos_config() if marathon_config: marathon_client = get_marathon_client(marathon_config) try: marathon_results = get_marathon_status(marathon_client) except MarathonError as e: print(PaastaColors.red("CRITICAL: Unable to contact Marathon! Error: %s" % e)) sys.exit(2) else: marathon_results = [HealthCheckResult(message='Marathon is not configured to run here', healthy=True)] if chronos_config: chronos_client = get_chronos_client(chronos_config) try: chronos_results = get_chronos_status(chronos_client) except (ServerNotFoundError, socket_error) as e: print(PaastaColors.red("CRITICAL: Unable to contact Chronos! Error: %s" % e)) sys.exit(2) else: chronos_results = [HealthCheckResult(message='Chronos is not configured to run here', healthy=True)] mesos_ok = all(status_for_results(all_mesos_results)) marathon_ok = all(status_for_results(marathon_results)) chronos_ok = all(status_for_results(chronos_results)) mesos_summary = generate_summary_for_check("Mesos", mesos_ok) marathon_summary = generate_summary_for_check("Marathon", marathon_ok) chronos_summary = generate_summary_for_check("Chronos", chronos_ok) healthy_exit = True if all([mesos_ok, marathon_ok, chronos_ok]) else False if args.verbose == 0: print mesos_summary print marathon_summary print chronos_summary elif args.verbose == 1: print mesos_summary print_results_for_healthchecks(mesos_ok, all_mesos_results, args.verbose) print marathon_summary print_results_for_healthchecks(marathon_ok, marathon_results, args.verbose) print chronos_summary print_results_for_healthchecks(chronos_ok, chronos_results, args.verbose) else: print mesos_summary print_results_for_healthchecks(mesos_ok, all_mesos_results, args.verbose) for grouping in args.groupings: print_with_indent('Resources Grouped by %s' % grouping, 2) resource_info_dict = get_resource_utilization_by_grouping(key_func_for_attribute(grouping), mesos_state) all_rows = [[grouping.capitalize(), 'CPU (free/total)', 'RAM (free/total)', 'Disk (free/total)']] table_rows = [] for attribute_value, resource_info_dict in resource_info_dict.items(): resource_utilizations = resource_utillizations_from_resource_info( total=resource_info_dict['total'], free=resource_info_dict['free'], ) healthcheck_utilization_pairs = [ healthcheck_result_resource_utilization_pair_for_resource_utilization(utilization, args.threshold) for utilization in resource_utilizations ] healthy_exit = all(pair[0].healthy for pair in healthcheck_utilization_pairs) table_rows.append(get_table_rows_for_resource_info_dict( attribute_value, healthcheck_utilization_pairs, args.humanize )) table_rows = sorted(table_rows, key=lambda x: x[0]) all_rows.extend(table_rows) for line in format_table(all_rows): print_with_indent(line, 4) if args.verbose == 3: print_with_indent('Per Slave Utilization', 2) slave_resource_dict = get_resource_utilization_by_grouping(lambda slave: slave['hostname'], mesos_state) all_rows = [['Hostname', 'CPU (free/total)', 'RAM (free/total)', 'Disk (free/total)']] # print info about slaves here. Note that we don't make modifications to # the healthy_exit variable here, because we don't care about a single slave # having high usage. for attribute_value, resource_info_dict in slave_resource_dict.items(): table_rows = [] resource_utilizations = resource_utillizations_from_resource_info( total=resource_info_dict['total'], free=resource_info_dict['free'], ) healthcheck_utilization_pairs = [ healthcheck_result_resource_utilization_pair_for_resource_utilization(utilization, args.threshold) for utilization in resource_utilizations ] table_rows.append(get_table_rows_for_resource_info_dict( attribute_value, healthcheck_utilization_pairs, args.humanize )) table_rows = sorted(table_rows, key=lambda x: x[0]) all_rows.extend(table_rows) for line in format_table(all_rows): print_with_indent(line, 4) print marathon_summary print_results_for_healthchecks(marathon_ok, marathon_results, args.verbose) print chronos_summary print_results_for_healthchecks(chronos_ok, chronos_results, args.verbose) print "Master paasta_tools version: {0}".format(__version__) if not healthy_exit: sys.exit(2) else: sys.exit(0)
def test_format_table_all_strings(): actual = utils.format_table(['foo', 'bar', 'baz']) expected = ['foo', 'bar', 'baz'] assert actual == expected
def main(argv=None): chronos_config = None args = parse_args(argv) system_paasta_config = load_system_paasta_config() master_kwargs = {} # we don't want to be passing False to not override a possible True # value from system config if args.use_mesos_cache: master_kwargs['use_mesos_cache'] = True master = get_mesos_master(**master_kwargs) marathon_servers = get_marathon_servers(system_paasta_config) marathon_clients = all_marathon_clients(get_marathon_clients(marathon_servers)) try: mesos_state = master.state all_mesos_results = _run_mesos_checks( mesos_master=master, mesos_state=mesos_state, marathon_clients=marathon_clients, ) except MasterNotAvailableException as e: # if we can't connect to master at all, # then bomb out early paasta_print(PaastaColors.red("CRITICAL: %s" % e.message)) sys.exit(2) # Check to see if Chronos should be running here by checking for config chronos_config = load_chronos_config() if chronos_config: chronos_client = get_chronos_client(chronos_config, cached=True) try: chronos_results = metastatus_lib.get_chronos_status(chronos_client) except (chronos.ChronosAPIError) as e: paasta_print(PaastaColors.red("CRITICAL: Unable to contact Chronos! Error: %s" % e)) sys.exit(2) else: chronos_results = [metastatus_lib.HealthCheckResult( message='Chronos is not configured to run here', healthy=True, )] marathon_results = _run_marathon_checks(marathon_clients) mesos_ok = all(metastatus_lib.status_for_results(all_mesos_results)) marathon_ok = all(metastatus_lib.status_for_results(marathon_results)) chronos_ok = all(metastatus_lib.status_for_results(chronos_results)) mesos_summary = metastatus_lib.generate_summary_for_check("Mesos", mesos_ok) marathon_summary = metastatus_lib.generate_summary_for_check("Marathon", marathon_ok) chronos_summary = metastatus_lib.generate_summary_for_check("Chronos", chronos_ok) healthy_exit = True if all([mesos_ok, marathon_ok, chronos_ok]) else False paasta_print("Master paasta_tools version: {}".format(__version__)) metastatus_lib.print_results_for_healthchecks(mesos_summary, mesos_ok, all_mesos_results, args.verbose) if args.verbose > 1: for grouping in args.groupings: print_with_indent('Resources Grouped by %s' % grouping, 2) grouping_function = metastatus_lib.key_func_for_attribute(grouping) resource_info_dict = metastatus_lib.get_resource_utilization_by_grouping( grouping_function, mesos_state, ) all_rows = [[ grouping.capitalize(), 'CPU (used/total)', 'RAM (used/total)', 'Disk (used/total)', 'GPU (used/total)', 'Agent count', ]] table_rows = [] for attribute_value, resource_info_dict in resource_info_dict.items(): resource_utilizations = metastatus_lib.resource_utillizations_from_resource_info( total=resource_info_dict['total'], free=resource_info_dict['free'], ) healthcheck_utilization_pairs = [ metastatus_lib.healthcheck_result_resource_utilization_pair_for_resource_utilization( utilization, args.threshold, ) for utilization in resource_utilizations ] healthy_exit = all(pair[0].healthy for pair in healthcheck_utilization_pairs) table_rows.append(metastatus_lib.get_table_rows_for_resource_info_dict( attribute_value, healthcheck_utilization_pairs, args.humanize, ) + [str(resource_info_dict['slave_count'])]) table_rows = sorted(table_rows, key=lambda x: x[0]) all_rows.extend(table_rows) for line in format_table(all_rows): print_with_indent(line, 4) if args.autoscaling_info: print_with_indent("Autoscaling resources:", 2) headers = [field.replace("_", " ").capitalize() for field in AutoscalingInfo._fields] table = functools.reduce( lambda x, y: x + [(y)], get_autoscaling_info_for_all_resources(mesos_state), [headers], ) for line in format_table(table): print_with_indent(line, 4) if args.verbose >= 3: print_with_indent('Per Slave Utilization', 2) slave_resource_dict = metastatus_lib.get_resource_utilization_by_grouping( lambda slave: slave['hostname'], mesos_state, ) all_rows = [['Hostname', 'CPU (used/total)', 'RAM (used//total)', 'Disk (used//total)', 'GPU (used/total)']] # print info about slaves here. Note that we don't make modifications to # the healthy_exit variable here, because we don't care about a single slave # having high usage. for attribute_value, resource_info_dict in slave_resource_dict.items(): table_rows = [] resource_utilizations = metastatus_lib.resource_utillizations_from_resource_info( total=resource_info_dict['total'], free=resource_info_dict['free'], ) healthcheck_utilization_pairs = [ metastatus_lib.healthcheck_result_resource_utilization_pair_for_resource_utilization( utilization, args.threshold, ) for utilization in resource_utilizations ] table_rows.append(metastatus_lib.get_table_rows_for_resource_info_dict( attribute_value, healthcheck_utilization_pairs, args.humanize, )) table_rows = sorted(table_rows, key=lambda x: x[0]) all_rows.extend(table_rows) for line in format_table(all_rows): print_with_indent(line, 4) metastatus_lib.print_results_for_healthchecks(marathon_summary, marathon_ok, marathon_results, args.verbose) metastatus_lib.print_results_for_healthchecks(chronos_summary, chronos_ok, chronos_results, args.verbose) if not healthy_exit: sys.exit(2) else: sys.exit(0)
def status_marathon_job( service: str, instance: str, cluster: str, soa_dir: str, dashboards: Dict[marathon_tools.MarathonClient, str], normal_instance_count: int, clients: marathon_tools.MarathonClients, job_config: marathon_tools.MarathonServiceConfig, desired_app_id: str, verbose: int, ) -> Tuple[List[MarathonTask], str]: marathon_apps_with_clients = marathon_tools.get_marathon_apps_with_clients( clients=clients.get_all_clients_for_service(job_config), embed_tasks=True, service_name=service, ) all_tasks = [] all_output = [ "" ] # One entry that will be replaced with status_marathon_job_human output later. running_instances = 0 if verbose > 0: autoscaling_info = get_autoscaling_info(marathon_apps_with_clients, job_config) if autoscaling_info: all_output.append(" Autoscaling Info:") headers = [ field.replace("_", " ").capitalize() for field in ServiceAutoscalingInfo._fields ] table = [headers, humanize_autoscaling_info(autoscaling_info)] all_output.append( "\n".join([" %s" % line for line in format_table(table)]) ) deploy_status_for_desired_app = "Waiting for bounce" matching_apps_with_clients = marathon_tools.get_matching_apps_with_clients( service, instance, marathon_apps_with_clients ) for app, client in matching_apps_with_clients: all_tasks.extend(app.tasks) ( deploy_status_for_current_app, running_instances_for_current_app, out, ) = status_marathon_app( marathon_client=client, app=app, service=service, instance=instance, cluster=cluster, soa_dir=soa_dir, dashboards=dashboards, verbose=verbose, ) if app.id.lstrip("/") == desired_app_id.lstrip("/"): deploy_status_for_desired_app = marathon_tools.MarathonDeployStatus.tostring( deploy_status_for_current_app ) running_instances += running_instances_for_current_app all_output.append(out) all_output[0] = status_marathon_job_human( service=service, instance=instance, deploy_status=deploy_status_for_desired_app, desired_app_id=desired_app_id, app_count=len(matching_apps_with_clients), running_instances=running_instances, normal_instance_count=normal_instance_count, ) return all_tasks, "\n".join(all_output)
def status_marathon_app( marathon_client: marathon_tools.MarathonClient, app: marathon_tools.MarathonApp, service: str, instance: str, cluster: str, soa_dir: str, dashboards: Dict[marathon_tools.MarathonClient, str], verbose: int, ) -> Tuple[int, int, str]: """Takes a given marathon app object and returns the details about start, times, hosts, etc""" output = [] create_datetime = datetime_from_utc_to_local(isodate.parse_datetime(app.version)) output.append(get_marathon_dashboard(marathon_client, dashboards, app.id)) output.append( " " + " ".join( [ f"{app.tasks_running} running,", f"{app.tasks_healthy} healthy,", f"{app.tasks_staged} staged", f"out of {app.instances}", ] ) ) output.append( " App created: {} ({})".format( str(create_datetime), humanize.naturaltime(create_datetime) ) ) deploy_status = marathon_tools.get_marathon_app_deploy_status(marathon_client, app) app_queue = marathon_tools.get_app_queue(marathon_client, app.id) unused_offers_summary = marathon_tools.summarize_unused_offers(app_queue) if deploy_status == marathon_tools.MarathonDeployStatus.Delayed: _, backoff_seconds = marathon_tools.get_app_queue_status_from_queue(app_queue) deploy_status_human = marathon_app_deploy_status_human( deploy_status, backoff_seconds ) else: deploy_status_human = marathon_app_deploy_status_human(deploy_status) output.append(f" Status: {deploy_status_human}") if unused_offers_summary is not None and len(unused_offers_summary) > 0: output.append(" Possibly stalled for:") output.append( " ".join([f"{k}: {n} times" for k, n in unused_offers_summary.items()]) ) if verbose > 0: output.append(" Tasks:") rows = [ ( "Mesos Task ID", "Host deployed to", "Deployed at what localtime", "Health", ) ] for task in app.tasks: local_deployed_datetime = datetime_from_utc_to_local(task.staged_at) if task.host is not None: hostname = "{}:{}".format(task.host.split(".")[0], task.ports[0]) else: hostname = "Unknown" if not task.health_check_results: health_check_status = PaastaColors.grey("N/A") elif marathon_tools.is_task_healthy(task): health_check_status = PaastaColors.green("Healthy") else: health_check_status = PaastaColors.red("Unhealthy") rows.append( ( get_short_task_id(task.id), hostname, "{} ({})".format( local_deployed_datetime.strftime("%Y-%m-%dT%H:%M"), humanize.naturaltime(local_deployed_datetime), ), health_check_status, ) ) output.append("\n".join([" %s" % line for line in format_table(rows)])) if len(app.tasks) == 0: output.append(" No tasks associated with this marathon app") return deploy_status, app.tasks_running, "\n".join(output)