def get_verbose_status_of_marathon_app(app): """Takes a given marathon app object and returns the verbose details about the tasks, times, hosts, etc""" output = [] create_datetime = datetime_from_utc_to_local(isodate.parse_datetime(app.version)) output.append(" Marathon app ID: %s" % PaastaColors.bold(app.id)) output.append(" App created: %s (%s)" % (str(create_datetime), humanize.naturaltime(create_datetime))) output.append(" Tasks:") rows = [("Mesos Task ID", "Host deployed to", "Deployed at what localtime")] for task in app.tasks: local_deployed_datetime = datetime_from_utc_to_local(task.staged_at) if task.host is not None: hostname = "%s:%s" % (task.host.split(".")[0], task.ports[0]) else: hostname = "Unknown" rows.append(( get_short_task_id(task.id), hostname, '%s (%s)' % ( local_deployed_datetime.strftime("%Y-%m-%dT%H:%M"), humanize.naturaltime(local_deployed_datetime), ) )) output.append('\n'.join([" %s" % line for line in format_table(rows)])) if len(app.tasks) == 0: output.append(" No tasks associated with this marathon app") return app.tasks, "\n".join(output)
def get_verbose_status_of_marathon_app(app): """Takes a given marathon app object and returns the verbose details about the tasks, times, hosts, etc""" output = [] create_datetime = datetime_from_utc_to_local( isodate.parse_datetime(app.version)) output.append(" Marathon app ID: %s" % PaastaColors.bold(app.id)) output.append( " App created: %s (%s)" % (str(create_datetime), humanize.naturaltime(create_datetime))) output.append(" Tasks:") rows = [("Mesos Task ID", "Host deployed to", "Deployed at what localtime") ] for task in app.tasks: local_deployed_datetime = datetime_from_utc_to_local(task.staged_at) if task.host is not None: hostname = "%s:%s" % (task.host.split(".")[0], task.ports[0]) else: hostname = "Unknown" rows.append((get_short_task_id(task.id), hostname, '%s (%s)' % ( local_deployed_datetime.strftime("%Y-%m-%dT%H:%M"), humanize.naturaltime(local_deployed_datetime), ))) output.append('\n'.join([" %s" % line for line in format_table(rows)])) if len(app.tasks) == 0: output.append(" No tasks associated with this marathon app") return app.tasks, "\n".join(output)
def get_verbose_status_of_marathon_app(marathon_client, app, service, instance, cluster, soa_dir): """Takes a given marathon app object and returns the verbose details about the tasks, times, hosts, etc""" output = [] create_datetime = datetime_from_utc_to_local( isodate.parse_datetime(app.version)) output.append(" Marathon app ID: %s" % PaastaColors.bold(app.id)) output.append( " App created: %s (%s)" % (str(create_datetime), humanize.naturaltime(create_datetime))) autoscaling_info = get_autoscaling_info(marathon_client, service, instance, cluster, soa_dir) if autoscaling_info: output.append(" Autoscaling Info:") headers = [ field.replace("_", " ").capitalize() for field in ServiceAutoscalingInfo._fields ] table = [headers, autoscaling_info] output.append('\n'.join( [" %s" % line for line in format_table(table)])) output.append(" Tasks:") rows = [("Mesos Task ID", "Host deployed to", "Deployed at what localtime", "Health")] for task in app.tasks: local_deployed_datetime = datetime_from_utc_to_local(task.staged_at) if task.host is not None: hostname = "%s:%s" % (task.host.split(".")[0], task.ports[0]) else: hostname = "Unknown" if not task.health_check_results: health_check_status = PaastaColors.grey("N/A") elif marathon_tools.is_task_healthy(task): health_check_status = PaastaColors.green("Healthy") else: health_check_status = PaastaColors.red("Unhealthy") rows.append(( get_short_task_id(task.id), hostname, '%s (%s)' % ( local_deployed_datetime.strftime("%Y-%m-%dT%H:%M"), humanize.naturaltime(local_deployed_datetime), ), health_check_status, )) output.append('\n'.join([" %s" % line for line in format_table(rows)])) if len(app.tasks) == 0: output.append(" No tasks associated with this marathon app") return app.tasks, "\n".join(output)
def filter_healthy_marathon_instances_for_short_app_id(all_tasks, app_id): tasks_for_app = [task for task in all_tasks if task.app_id.startswith('/%s' % app_id)] one_minute_ago = datetime.now() - timedelta(minutes=1) healthy_tasks = [] for task in tasks_for_app: if task.started_at is not None: print(datetime_from_utc_to_local(task.started_at)) if marathon_tools.is_task_healthy(task, default_healthy=True) \ and task.started_at is not None \ and datetime_from_utc_to_local(task.started_at) < one_minute_ago: healthy_tasks.append(task) return len(healthy_tasks)
def prettify_timestamp(timestamp): """Returns more human-friendly form of 'timestamp' without microseconds and in local time. """ dt = datetime.datetime.strptime(timestamp, "%Y-%m-%dT%H:%M:%S.%f") pretty_timestamp = datetime_from_utc_to_local(dt) return pretty_timestamp.strftime("%Y-%m-%d %H:%M:%S")
def test_format_parents_verbose(): example_job = { "name": "myexamplejob", "parents": ["testservice testinstance"] } fake_last_datetime = "2007-04-01T17:52:58.908Z" fake_last_datetime_dt = datetime.datetime(2007, 4, 1, 17, 52, 58, 908000, pytz.utc) example_status = (fake_last_datetime, chronos_tools.LastRunState.Success) with mock.patch( "paasta_tools.chronos_tools.get_jobs_for_service_instance", autospec=True, return_value=[{ "name": "testservice testinstance" }], ), mock.patch( "paasta_tools.chronos_tools.get_status_last_run", autospec=True, return_value=example_status, ): expected_time = humanize.naturaltime( datetime_from_utc_to_local(fake_last_datetime_dt)) actual = chronos_serviceinit._format_parents_verbose(example_job) assert "testservice testinstance" in actual assert (" Last Run: {} (2007-04-01T17:52, {})".format( PaastaColors.green("OK"), expected_time) in actual)
def format_kubernetes_replicaset_table(replicasets): rows = [("ReplicaSet Name", "Ready / Desired", "Created at what localtime")] for replicaset in replicasets: local_created_datetime = datetime_from_utc_to_local( datetime.fromtimestamp(replicaset.create_timestamp) ) replica_status = f"{replicaset.ready_replicas}/{replicaset.replicas}" if replicaset.ready_replicas >= replicaset.replicas: replica_status = PaastaColors.green(replica_status) else: replica_status = PaastaColors.red(replica_status) rows.append( ( replicaset.name, replica_status, "{} ({})".format( local_created_datetime.strftime("%Y-%m-%dT%H:%M"), humanize.naturaltime(local_created_datetime), ), ) ) return format_table(rows)
def format_kubernetes_pod_table(pods): rows = [("Pod ID", "Host deployed to", "Deployed at what localtime", "Health")] for pod in pods: local_deployed_datetime = datetime_from_utc_to_local( datetime.fromtimestamp(pod.deployed_timestamp) ) hostname = f"{pod.host}" if pod.host is not None else "Unknown" if pod.phase is None or pod.phase == "Pending": health_check_status = PaastaColors.grey("N/A") elif pod.phase == "Running": health_check_status = PaastaColors.green("Healthy") else: health_check_status = PaastaColors.red("Unhealthy") rows.append( ( pod.name, hostname, "{} ({})".format( local_deployed_datetime.strftime("%Y-%m-%dT%H:%M"), humanize.naturaltime(local_deployed_datetime), ), health_check_status, ) ) return format_table(rows)
def format_marathon_task_table(tasks): rows = [ ("Mesos Task ID", "Host deployed to", "Deployed at what localtime", "Health") ] for task in tasks: local_deployed_datetime = datetime_from_utc_to_local( datetime.fromtimestamp(task.deployed_timestamp) ) if task.host is not None: hostname = f"{task.host}:{task.port}" else: hostname = "Unknown" if task.is_healthy is None: health_check_status = PaastaColors.grey("N/A") elif task.is_healthy: health_check_status = PaastaColors.green("Healthy") else: health_check_status = PaastaColors.red("Unhealthy") rows.append( ( task.id, hostname, "{} ({})".format( local_deployed_datetime.strftime("%Y-%m-%dT%H:%M"), humanize.naturaltime(local_deployed_datetime), ), health_check_status, ) ) return format_table(rows)
def test_format_parents_verbose(): example_job = { 'name': 'myexamplejob', 'parents': ['testservice testinstance'], } fake_last_datetime = '2007-04-01T17:52:58.908Z' fake_last_datetime_dt = datetime.datetime(2007, 4, 1, 17, 52, 58, 908000, pytz.utc) example_status = (fake_last_datetime, chronos_tools.LastRunState.Success) with mock.patch( 'paasta_tools.chronos_tools.get_jobs_for_service_instance', autospec=True, return_value=[{ 'name': 'testservice testinstance' }], ), mock.patch( 'paasta_tools.chronos_tools.get_status_last_run', autospec=True, return_value=example_status, ): expected_time = humanize.naturaltime( datetime_from_utc_to_local(fake_last_datetime_dt)) actual = chronos_serviceinit._format_parents_verbose(example_job) assert "testservice testinstance" in actual assert " Last Run: %s (2007-04-01T17:52, %s)" % ( PaastaColors.green("OK"), expected_time) in actual
def prettify_timestamp(timestamp): """Returns more human-friendly form of 'timestamp' without microseconds and in local time. """ dt = datetime.datetime.strptime(timestamp, "%Y-%m-%dT%H:%M:%S.%f") pretty_timestamp = datetime_from_utc_to_local(dt) return pretty_timestamp.strftime("%Y-%m-%d %H:%M:%S")
def _prettify_time(time): """Given a time, return a formatted representation of that time""" try: dt = isodate.parse_datetime(time) except isodate.isoerror.ISO8601Error: print "unable to parse datetime %s" % time raise dt_localtime = datetime_from_utc_to_local(dt) pretty_dt = "%s, %s" % (dt_localtime.strftime("%Y-%m-%dT%H:%M"), humanize.naturaltime(dt_localtime)) return pretty_dt
def get_healthy_marathon_instances_for_short_app_id(client, app_id): tasks = client.list_tasks() tasks_for_app = [task for task in tasks if task.app_id.startswith('/%s' % app_id)] one_minute_ago = datetime.now() - timedelta(minutes=1) healthy_tasks = [] for task in tasks_for_app: if all([health_check_result.alive for health_check_result in task.health_check_results]) \ and datetime_from_utc_to_local(task.started_at) < one_minute_ago: healthy_tasks.append(task) return len(healthy_tasks)
def _prettify_time(time): """Given a time, return a formatted representation of that time""" try: dt = isodate.parse_datetime(time) except isodate.isoerror.ISO8601Error: print "unable to parse datetime %s" % time raise dt_localtime = datetime_from_utc_to_local(dt) pretty_dt = "%s, %s" % ( dt_localtime.strftime("%Y-%m-%dT%H:%M"), humanize.naturaltime(dt_localtime), ) return pretty_dt
def get_healthy_marathon_instances_for_short_app_id(client, app_id): tasks = client.list_tasks() tasks_for_app = [task for task in tasks if task.app_id.startswith('/%s' % app_id)] one_minute_ago = datetime.now() - timedelta(minutes=1) healthy_tasks = [] for task in tasks_for_app: if all([health_check_result.alive for health_check_result in task.health_check_results]) \ and task.started_at is not None \ and datetime_from_utc_to_local(task.started_at) < one_minute_ago: healthy_tasks.append(task) return len(healthy_tasks)
def get_healthy_marathon_instances_for_short_app_id(client, app_id): tasks = client.list_tasks() tasks_for_app = [task for task in tasks if task.app_id.startswith('/%s' % app_id)] one_minute_ago = datetime.now() - timedelta(minutes=1) healthy_tasks = [] for task in tasks_for_app: if marathon_tools.is_task_healthy(task, default_healthy=True) \ and task.started_at is not None \ and datetime_from_utc_to_local(task.started_at) < one_minute_ago: healthy_tasks.append(task) return len(healthy_tasks)
def status_marathon_app( marathon_client: marathon_tools.MarathonClient, app: marathon_tools.MarathonApp, service: str, instance: str, cluster: str, soa_dir: str, dashboards: Dict[marathon_tools.MarathonClient, str], verbose: int, ) -> Tuple[int, int, str]: """Takes a given marathon app object and returns the details about start, times, hosts, etc""" output = [] create_datetime = datetime_from_utc_to_local(isodate.parse_datetime(app.version)) output.append(get_marathon_dashboard(marathon_client, dashboards, app.id)) output.append( " " + " ".join( [ f"{app.tasks_running} running,", f"{app.tasks_healthy} healthy,", f"{app.tasks_staged} staged", f"out of {app.instances}", ] ) ) output.append( " App created: {} ({})".format( str(create_datetime), humanize.naturaltime(create_datetime) ) ) deploy_status = marathon_tools.get_marathon_app_deploy_status(marathon_client, app) app_queue = marathon_tools.get_app_queue(marathon_client, app.id) unused_offers_summary = marathon_tools.summarize_unused_offers(app_queue) if deploy_status == marathon_tools.MarathonDeployStatus.Delayed: _, backoff_seconds = marathon_tools.get_app_queue_status_from_queue(app_queue) deploy_status_human = marathon_app_deploy_status_human( deploy_status, backoff_seconds ) else: deploy_status_human = marathon_app_deploy_status_human(deploy_status) output.append(f" Status: {deploy_status_human}") if unused_offers_summary is not None and len(unused_offers_summary) > 0: output.append(" Possibly stalled for:") output.append( " ".join([f"{k}: {n} times" for k, n in unused_offers_summary.items()]) ) if verbose > 0: output.append(" Tasks:") rows = [ ( "Mesos Task ID", "Host deployed to", "Deployed at what localtime", "Health", ) ] for task in app.tasks: local_deployed_datetime = datetime_from_utc_to_local(task.staged_at) if task.host is not None: hostname = "{}:{}".format(task.host.split(".")[0], task.ports[0]) else: hostname = "Unknown" if not task.health_check_results: health_check_status = PaastaColors.grey("N/A") elif marathon_tools.is_task_healthy(task): health_check_status = PaastaColors.green("Healthy") else: health_check_status = PaastaColors.red("Unhealthy") rows.append( ( get_short_task_id(task.id), hostname, "{} ({})".format( local_deployed_datetime.strftime("%Y-%m-%dT%H:%M"), humanize.naturaltime(local_deployed_datetime), ), health_check_status, ) ) output.append("\n".join([" %s" % line for line in format_table(rows)])) if len(app.tasks) == 0: output.append(" No tasks associated with this marathon app") return deploy_status, app.tasks_running, "\n".join(output)
def print_flink_status( cluster: str, service: str, instance: str, output: List[str], flink: Mapping[str, Any], verbose: int, ) -> int: status = flink.get("status") if status is None: output.append(PaastaColors.red(" Flink cluster is not available yet")) return 1 # Since metadata should be available no matter the state, we show it first. If this errors out # then we cannot really do much to recover, because cluster is not in usable state anyway metadata = flink.get("metadata") config_sha = metadata.labels.get("yelp.com/paasta_config_sha") if config_sha is None: raise ValueError(f"expected config sha on Flink, but received {metadata}") if config_sha.startswith("config"): config_sha = config_sha[6:] output.append(f" Config SHA: {config_sha}") if status.state != "running": output.append( " State: {state}".format(state=PaastaColors.yellow(status.state)) ) output.append(f" No other information available in non-running state") return 0 dashboard_url = metadata.annotations.get("yelp.com/dashboard_url") if verbose: output.append( f" Flink version: {status.config['flink-version']} {status.config['flink-revision']}" ) else: output.append(f" Flink version: {status.config['flink-version']}") output.append(f" URL: {dashboard_url}/") output.append(f" State: {status.state}") output.append( " Jobs:" f" {status.overview['jobs-running']} running," f" {status.overview['jobs-finished']} finished," f" {status.overview['jobs-failed']} failed," f" {status.overview['jobs-cancelled']} cancelled" ) output.append( " " f" {status.overview['taskmanagers']} taskmanagers," f" {status.overview['slots-available']}/{status.overview['slots-total']} slots available" ) output.append(f" Jobs:") if verbose: output.append( f" Job Name State Job ID Started" ) else: output.append(f" Job Name State Started") # Use only the most recent jobs unique_jobs = ( sorted(jobs, key=lambda j: -j["start-time"])[0] for _, jobs in groupby( sorted( (j for j in status.jobs if j.get("name") and j.get("start-time")), key=lambda j: j["name"], ), lambda j: j["name"], ) ) for job in unique_jobs: job_id = job["jid"] if verbose: fmt = """ {job_name: <32.32} {state: <11} {job_id} {start_time} {dashboard_url}""" else: fmt = " {job_name: <32.32} {state: <11} {start_time}" start_time = datetime_from_utc_to_local( datetime.utcfromtimestamp(int(job["start-time"]) // 1000) ) output.append( fmt.format( job_id=job_id, job_name=job["name"].split(".", 2)[2], state=(job.get("state") or "unknown"), start_time=f"{str(start_time)} ({humanize.naturaltime(start_time)})", dashboard_url=PaastaColors.grey(f"{dashboard_url}/#/jobs/{job_id}"), ) ) if verbose and job_id in status.exceptions: exceptions = status.exceptions[job_id] root_exception = exceptions["root-exception"] if root_exception is not None: output.append(f" Exception: {root_exception}") ts = exceptions["timestamp"] if ts is not None: exc_ts = datetime_from_utc_to_local( datetime.utcfromtimestamp(int(ts) // 1000) ) output.append( f" {str(exc_ts)} ({humanize.naturaltime(exc_ts)})" ) return 0
def print_flinkcluster_status( cluster: str, service: str, instance: str, output: List[str], status, verbose: int, ) -> int: if status is None: output.append( PaastaColors.red(" Flink cluster is not available yet")) return 1 if status.state != "running": output.append(" State: {state}".format(state=PaastaColors.yellow( status.state), )) output.append( f" No other information available in non-running state") return 0 dashboard_url = get_dashboard_url( cluster=cluster, service=service, instance=instance, ) if verbose: output.append( f" Flink version: {status.config['flink-version']} {status.config['flink-revision']}" ) else: output.append(f" Flink version: {status.config['flink-version']}") output.append(f" URL: {dashboard_url}/") output.append(f" State: {status.state}") output.append( " Jobs:" f" {status.overview['jobs-running']} running," f" {status.overview['jobs-finished']} finished," f" {status.overview['jobs-failed']} failed," f" {status.overview['jobs-cancelled']} cancelled", ) output.append( " " f" {status.overview['taskmanagers']} taskmanagers," f" {status.overview['slots-available']}/{status.overview['slots-total']} slots available", ) output.append(f" Jobs:") if verbose: output.append( f" Job Name State Job ID Started" ) else: output.append( f" Job Name State Started") # Use only the most recent jobs unique_jobs = (sorted(jobs, key=lambda j: -j['start-time'])[0] for _, jobs in groupby( sorted(status.jobs, key=lambda j: j['name']), lambda j: j['name'], )) for job in unique_jobs: job_id = job['jid'] if verbose: fmt = """ {job_name: <32.32} {state: <11} {job_id} {start_time} {dashboard_url}""" else: fmt = " {job_name: <32.32} {state: <11} {start_time}" start_time = datetime_from_utc_to_local( datetime.utcfromtimestamp(int(job['start-time']) // 1000)) output.append( fmt.format( job_id=job_id, job_name=job['name'].split('.', 2)[2], state=job['state'], start_time= f'{str(start_time)} ({humanize.naturaltime(start_time)})', dashboard_url=PaastaColors.grey( f'{dashboard_url}/#/jobs/{job_id}', ), )) if job_id in status.exceptions: exceptions = status.exceptions[job_id] root_exception = exceptions['root-exception'] if root_exception is not None: output.append(f" Exception: {root_exception}") ts = exceptions['timestamp'] if ts is not None: exc_ts = datetime_from_utc_to_local( datetime.utcfromtimestamp(int(ts) // 1000)) output.append( f" {str(exc_ts)} ({humanize.naturaltime(exc_ts)})" ) return 0
def format_timestamp(tstamp): return naturaltime(datetime_from_utc_to_local(parse_timestamp(tstamp)))
def format_timestamp(tstamp): return naturaltime(datetime_from_utc_to_local(parse_timestamp(tstamp)))