def test_status_smartstack_backends_multiple_locations(): service = 'my_service' instance = 'my_instance' service_instance = compose_job_id(service, instance) cluster = 'fake_cluster' good_task = mock.Mock() other_task = mock.Mock() fake_backend = {'status': 'UP', 'lastchg': '1', 'last_chk': 'OK', 'check_code': '200', 'svname': 'ipaddress1:1001_hostname1', 'check_status': 'L7OK', 'check_duration': 1} with contextlib.nested( mock.patch('paasta_tools.marathon_tools.load_service_namespace_config', autospec=True), mock.patch('paasta_tools.marathon_tools.read_namespace_for_service_instance'), mock.patch('paasta_tools.marathon_serviceinit.get_mesos_slaves_grouped_by_attribute'), mock.patch('paasta_tools.marathon_serviceinit.get_backends', autospec=True), mock.patch('paasta_tools.marathon_serviceinit.match_backends_and_tasks', autospec=True), ) as ( mock_load_service_namespace_config, mock_read_ns, mock_get_mesos_slaves_grouped_by_attribute, mock_get_backends, mock_match_backends_and_tasks, ): mock_load_service_namespace_config.return_value.get_discover.return_value = 'fake_discover' mock_read_ns.return_value = instance mock_get_backends.return_value = [fake_backend] mock_match_backends_and_tasks.return_value = [ (fake_backend, good_task), ] tasks = [good_task, other_task] mock_get_mesos_slaves_grouped_by_attribute.return_value = { 'fake_location1': ['fakehost1'], 'fake_location2': ['fakehost2'], } actual = marathon_serviceinit.status_smartstack_backends( service=service, instance=instance, cluster=cluster, job_config=fake_marathon_job_config, tasks=tasks, expected_count=len(mock_get_backends.return_value), soa_dir=None, verbose=False, synapse_port=123456, synapse_haproxy_url_format=DEFAULT_SYNAPSE_HAPROXY_URL_FORMAT, ) mock_get_backends.assert_any_call( service_instance, synapse_host='fakehost1', synapse_port=123456, synapse_haproxy_url_format=DEFAULT_SYNAPSE_HAPROXY_URL_FORMAT, ) mock_get_backends.assert_any_call( service_instance, synapse_host='fakehost2', synapse_port=123456, synapse_haproxy_url_format=DEFAULT_SYNAPSE_HAPROXY_URL_FORMAT, ) assert "fake_location1 - %s" % PaastaColors.green('Healthy') in actual assert "fake_location2 - %s" % PaastaColors.green('Healthy') in actual
def test_get_mesos_status( mock_get_mesos_stats, mock_get_num_masters, mock_get_configured_quorum_size, mock_getfqdn, ): mock_getfqdn.return_value = 'fakename' mock_get_mesos_stats.return_value = { 'master/cpus_total': 10, 'master/cpus_used': 8, 'master/mem_total': 10240, 'master/mem_used': 2048, 'master/disk_total': 10240, 'master/disk_used': 3072, 'master/tasks_running': 3, 'master/tasks_staging': 4, 'master/tasks_starting': 0, 'master/slaves_active': 4, 'master/slaves_inactive': 0, } mesos_state = { 'flags': { 'zk': 'zk://1.1.1.1:2222/fake_cluster', 'quorum': 2, }, 'frameworks': [ { 'name': 'test_framework1', }, { 'name': 'test_framework1', }, ] } mock_get_num_masters.return_value = 5 mock_get_configured_quorum_size.return_value = 3 expected_cpus_output = "CPUs: 8.00 / 10 in use (%s)" % PaastaColors.green("80.00%") expected_mem_output = \ "Memory: 2.00 / 10.00GB in use (%s)" % PaastaColors.green("20.00%") expected_disk_output = "Disk: 3.00 / 10.00GB in use (%s)" % PaastaColors.green("30.00%") expected_tasks_output = \ "tasks: running: 3 staging: 4 starting: 0" expected_duplicate_frameworks_output = \ "frameworks:\n%s" % \ PaastaColors.red(" CRITICAL: Framework test_framework1 has 2 instances running--expected no more than 1.") expected_slaves_output = \ "slaves: active: 4 inactive: 0" expected_masters_quorum_output = \ "quorum: masters: 5 configured quorum: 3 " results = paasta_metastatus.get_mesos_status(mesos_state, verbosity=0) assert mock_get_mesos_stats.called_once() assert (expected_masters_quorum_output, True) in results assert (expected_cpus_output, True) in results assert (expected_mem_output, True) in results assert (expected_disk_output, True) in results assert (expected_tasks_output, True) in results assert (expected_duplicate_frameworks_output, False) in results assert (expected_slaves_output, True) in results
def test_status_smartstack_backends_multiple_locations(): service = "my_service" instance = "my_instance" service_instance = compose_job_id(service, instance) cluster = "fake_cluster" good_task = mock.Mock() other_task = mock.Mock() fake_backend = { "status": "UP", "lastchg": "1", "last_chk": "OK", "check_code": "200", "svname": "ipaddress1:1001_hostname1", "check_status": "L7OK", "check_duration": 1, } with contextlib.nested( mock.patch("paasta_tools.marathon_tools.load_service_namespace_config", autospec=True), mock.patch("paasta_tools.marathon_tools.read_namespace_for_service_instance"), mock.patch("paasta_tools.marathon_serviceinit.get_mesos_slaves_grouped_by_attribute"), mock.patch("paasta_tools.marathon_serviceinit.get_backends", autospec=True), mock.patch("paasta_tools.marathon_serviceinit.match_backends_and_tasks", autospec=True), ) as ( mock_load_service_namespace_config, mock_read_ns, mock_get_mesos_slaves_grouped_by_attribute, mock_get_backends, mock_match_backends_and_tasks, ): mock_load_service_namespace_config.return_value.get_discover.return_value = "fake_discover" mock_read_ns.return_value = instance mock_get_backends.return_value = [fake_backend] mock_match_backends_and_tasks.return_value = [(fake_backend, good_task)] tasks = [good_task, other_task] mock_get_mesos_slaves_grouped_by_attribute.return_value = { "fake_location1": ["fakehost1"], "fake_location2": ["fakehost2"], } actual = marathon_serviceinit.status_smartstack_backends( service=service, instance=instance, cluster=cluster, job_config=fake_marathon_job_config, tasks=tasks, expected_count=len(mock_get_backends.return_value), soa_dir=None, verbose=False, ) mock_get_backends.assert_any_call(service_instance, synapse_host="fakehost1", synapse_port=DEFAULT_SYNAPSE_PORT) mock_get_backends.assert_any_call(service_instance, synapse_host="fakehost2", synapse_port=DEFAULT_SYNAPSE_PORT) assert "fake_location1 - %s" % PaastaColors.green("Healthy") in actual assert "fake_location2 - %s" % PaastaColors.green("Healthy") in actual
def status_mesos_tasks(service, instance, normal_instance_count): job_id = marathon_tools.format_job_id(service, instance) running_and_active_tasks = get_running_tasks_from_active_frameworks(job_id) count = len(running_and_active_tasks) if count >= normal_instance_count: status = PaastaColors.green("Healthy") count = PaastaColors.green("(%d/%d)" % (count, normal_instance_count)) elif count == 0: status = PaastaColors.red("Critical") count = PaastaColors.red("(%d/%d)" % (count, normal_instance_count)) else: status = PaastaColors.yellow("Warning") count = PaastaColors.yellow("(%d/%d)" % (count, normal_instance_count)) running_string = PaastaColors.bold('TASK_RUNNING') return "Mesos: %s - %s tasks in the %s state." % (status, count, running_string)
def haproxy_backend_report(normal_instance_count, up_backends): """Given that a service is in smartstack, this returns a human readable report of the up backends""" # TODO: Take into account a configurable threshold, PAASTA-1102 crit_threshold = 50 under_replicated, ratio = is_under_replicated(num_available=up_backends, expected_count=normal_instance_count, crit_threshold=crit_threshold) if under_replicated: status = PaastaColors.red("Critical") count = PaastaColors.red("(%d/%d, %d%%)" % (up_backends, normal_instance_count, ratio)) else: status = PaastaColors.green("Healthy") count = PaastaColors.green("(%d/%d)" % (up_backends, normal_instance_count)) up_string = PaastaColors.bold('UP') return "%s - in haproxy with %s total backends %s in this namespace." % (status, count, up_string)
def _format_disabled_status(job): status = PaastaColors.red("UNKNOWN") if job.get("disabled", False): status = PaastaColors.grey("Not scheduled") else: status = PaastaColors.green("Scheduled") return status
def generate_summary_for_check(name, ok): """Given a check name and a boolean indicating if the service is OK, return a formatted message. """ status = PaastaColors.green("OK") if ok is True else PaastaColors.red("CRITICAL") summary = "%s Status: %s" % (name, status) return summary
def test_format_parents_verbose(): example_job = { 'name': 'myexamplejob', 'parents': ['testservice testinstance'] } fake_last_datetime = '2007-04-01T17:52:58.908Z' example_status = (fake_last_datetime, chronos_tools.LastRunState.Success) with contextlib.nested( mock.patch( 'paasta_tools.chronos_tools.get_job_for_service_instance', autospec=True, return_value={ 'name': 'testservice testinstance' } ), mock.patch( 'paasta_tools.chronos_tools.get_status_last_run', autospec=True, return_value=example_status ), ): expected_years = dateutil.relativedelta.relativedelta( datetime.datetime.now(dateutil.tz.tzutc()), dateutil.parser.parse(fake_last_datetime) ).years actual = chronos_serviceinit._format_parents_verbose(example_job) assert "testservice testinstance" in actual assert " Last Run: %s (2007-04-01T17:52, %s years ago)" % (PaastaColors.green("OK"), expected_years) in actual
def assert_disk_health(metrics, mesos_state, threshold=10): total = metrics['master/disk_total'] / float(1024) used = metrics['master/disk_used'] for slave in mesos_state['slaves']: for role in slave['reserved_resources']: used += slave['reserved_resources'][role]['disk'] used /= float(1024) try: perc_used = percent_used(total, used) except ZeroDivisionError: return HealthCheckResult(message="Error reading total available disk from mesos!", healthy=False) if check_threshold(perc_used, threshold): return HealthCheckResult( message="Disk: %0.2f / %0.2fGB in use (%s)" % (used, total, PaastaColors.green("%.2f%%" % perc_used)), healthy=True ) else: return HealthCheckResult( message="CRITICAL: Less than %d%% disk available. (Currently using %.2f%%)" % (threshold, perc_used), healthy=False )
def test_format_chronos_job_status_enabled(): example_job = { 'disabled': False, } running_tasks = [] verbose = False actual = chronos_serviceinit.format_chronos_job_status(example_job, running_tasks, verbose) assert PaastaColors.green('Scheduled') in actual
def test_assert_memory_health(): ok_metrics = { 'master/mem_total': 1024, 'master/mem_used': 512, } ok_output, ok_health = paasta_metastatus.assert_memory_health(ok_metrics) assert ok_health assert "Memory: 0.50 / 1.00GB in use (%s)" % PaastaColors.green("50.00%") in ok_output
def test_ok_cpu_health(): ok_metrics = { 'master/cpus_total': 10, 'master/cpus_used': 1, } ok_output, ok_health = paasta_metastatus.assert_cpu_health(ok_metrics) assert ok_health assert "CPUs: 1.00 / 10 in use (%s)" % PaastaColors.green("10.00%") in ok_output
def test_format_chronos_job_status_success_no_failure(): example_job = {"lastError": "", "lastSuccess": "2015-04-20T23:20:00.420Z", "schedule": "foo"} running_tasks = [] verbose = False actual = chronos_serviceinit.format_chronos_job_status(example_job, running_tasks, verbose) assert PaastaColors.green("OK") in actual assert "(2015-04-20" in actual assert "ago)" in actual
def status_marathon_job_human(service, instance, deploy_status, app_id, running_instances, normal_instance_count): name = PaastaColors.cyan(compose_job_id(service, instance)) if deploy_status != 'NotRunning': if running_instances >= normal_instance_count: status = PaastaColors.green("Healthy") instance_count = PaastaColors.green("(%d/%d)" % (running_instances, normal_instance_count)) elif running_instances == 0: status = PaastaColors.yellow("Critical") instance_count = PaastaColors.red("(%d/%d)" % (running_instances, normal_instance_count)) else: status = PaastaColors.yellow("Warning") instance_count = PaastaColors.yellow("(%d/%d)" % (running_instances, normal_instance_count)) return "Marathon: %s - up with %s instances. Status: %s" % (status, instance_count, deploy_status) else: red_not = PaastaColors.red("NOT") status = PaastaColors.red("Critical") return "Marathon: %s - %s (app %s) is %s running in Marathon." % (status, name, app_id, red_not)
def status_mesos_tasks(service, instance, normal_instance_count): job_id = marathon_tools.format_job_id(service, instance) # We have to add a spacer at the end to make sure we only return # things for service.main and not service.main_foo filter_string = "%s%s" % (job_id, marathon_tools.MESOS_TASK_SPACER) running_and_active_tasks = get_running_tasks_from_active_frameworks(filter_string) count = len(running_and_active_tasks) if count >= normal_instance_count: status = PaastaColors.green("Healthy") count = PaastaColors.green("(%d/%d)" % (count, normal_instance_count)) elif count == 0: status = PaastaColors.red("Critical") count = PaastaColors.red("(%d/%d)" % (count, normal_instance_count)) else: status = PaastaColors.yellow("Warning") count = PaastaColors.yellow("(%d/%d)" % (count, normal_instance_count)) running_string = PaastaColors.bold('TASK_RUNNING') return "Mesos: %s - %s tasks in the %s state." % (status, count, running_string)
def bouncing_status_human(app_count, bounce_method): if app_count == 0: return PaastaColors.red("Disabled") elif app_count == 1: return PaastaColors.green("Configured") elif app_count > 1: return PaastaColors.yellow("Bouncing (%s)" % bounce_method) else: return PaastaColors.red("Unknown (count: %s)" % app_count)
def test_get_mesos_status( mock_get_mesos_state_from_leader, mock_get_mesos_stats, mock_get_num_masters, mock_get_configured_quorum_size, mock_getfqdn, ): mock_getfqdn.return_value = "fakename" mock_get_mesos_stats.return_value = { "master/cpus_total": 10, "master/cpus_used": 8, "master/mem_total": 10240, "master/mem_used": 2048, "master/tasks_running": 3, "master/tasks_staging": 4, "master/tasks_starting": 0, "master/slaves_active": 4, "master/slaves_inactive": 0, } mock_get_mesos_state_from_leader.return_value = { "flags": {"zk": "zk://1.1.1.1:2222/fake_cluster", "quorum": 2}, "frameworks": [{"name": "test_framework1"}, {"name": "test_framework1"}], } mock_get_num_masters.return_value = 5 mock_get_configured_quorum_size.return_value = 3 expected_cpus_output = "CPUs: 8.00 / 10 in use (%s)" % PaastaColors.green("80.00%") expected_mem_output = "Memory: 2.00 / 10.00GB in use (%s)" % PaastaColors.green("20.00%") expected_tasks_output = "tasks: running: 3 staging: 4 starting: 0" expected_duplicate_frameworks_output = "frameworks:\n%s" % PaastaColors.red( " CRITICAL: Framework test_framework1 has 2 instances running--expected no more than 1." ) expected_slaves_output = "slaves: active: 4 inactive: 0" expected_masters_quorum_output = "quorum: masters: 5 configured quorum: 3 " results = paasta_metastatus.get_mesos_status() assert mock_get_mesos_stats.called_once() assert mock_get_mesos_state_from_leader.called_once() assert (expected_masters_quorum_output, True) in results assert (expected_cpus_output, True) in results assert (expected_mem_output, True) in results assert (expected_tasks_output, True) in results assert (expected_duplicate_frameworks_output, False) in results assert (expected_slaves_output, True) in results
def test_format_chronos_job_status_enabled(mock_status): example_job = { 'name': 'my_service my_instance', 'disabled': False, 'schedule': 'foo' } running_tasks = [] verbose = False mock_client = mock.Mock() actual = chronos_serviceinit.format_chronos_job_status(mock_client, example_job, running_tasks, verbose) assert PaastaColors.green('Scheduled') in actual
def test_format_chronos_job_status_failure_and_then_success(): example_job = { 'lastError': '2015-04-20T23:20:00.420Z', 'lastSuccess': '2015-04-21T23:20:00.420Z', } running_tasks = [] verbose = False actual = chronos_serviceinit.format_chronos_job_status(example_job, running_tasks, verbose) assert PaastaColors.green('OK') in actual assert '(2015-04-21' in actual assert 'ago)' in actual
def test_format_table_column_for_healthcheck_resource_utilization_pair_healthy(): fake_healthcheckresult = Mock() fake_healthcheckresult.healthy = True fake_resource_utilization = Mock() fake_resource_utilization.free = 10 fake_resource_utilization.total = 20 expected = PaastaColors.green("10/20 (50.00%)") assert metastatus_lib.format_table_column_for_healthcheck_resource_utilization_pair( (fake_healthcheckresult, fake_resource_utilization), False ) == expected
def paasta_fsm(args): validate_args(args) (srvname, service_stanza, smartstack_stanza, monitoring_stanza, deploy_stanza, marathon_stanza, cluster_stanza, team) = ( get_paasta_config( args.yelpsoa_config_root, args.srvname, args.auto, args.port, args.team, args.description, args.external_link, ) ) srv = Service(srvname, args.yelpsoa_config_root) write_paasta_config( srv, service_stanza, smartstack_stanza, monitoring_stanza, deploy_stanza, marathon_stanza, cluster_stanza, ) print PaastaColors.yellow(" _ _(o)_(o)_ _") print PaastaColors.red(" ._\`:_ F S M _:' \_,") print PaastaColors.green(" / (`---'\ `-.") print PaastaColors.cyan(" ,-` _) (_,") print "With My Noodly Appendage I Have Written Configs For" print print PaastaColors.bold(" %s" % srvname) print print "Customize Them If It Makes You Happy -- http://y/paasta For Details" print "Remember To Add, Commit, And Push When You're Done:" print print "cd %s" % join(args.yelpsoa_config_root, srvname) print "# Review And/Or Customize Files" print "git add ." print "git commit -m'Initial Commit For %s'" % srvname print "git push origin HEAD # Pushmaster Or Ops Deputy Privs Required" print
def assert_cpu_health(metrics, threshold=10): total, used, available = get_mesos_cpu_status(metrics) perc_used = percent_used(total, used) if check_threshold(perc_used, threshold): return ("CPUs: %.2f / %d in use (%s)" % (used, total, PaastaColors.green("%.2f%%" % perc_used)), True) else: return (PaastaColors.red( "CRITICAL: Less than %d%% CPUs available. (Currently using %.2f%%)" % (threshold, perc_used)), False)
def test_format_table_column_for_healthcheck_resource_utilization_pair_healthy_human(): fake_healthcheckresult = Mock() fake_healthcheckresult.healthy = True fake_healthcheckresult.metric = 'mem' fake_resource_utilization = Mock() fake_resource_utilization.free = 10 fake_resource_utilization.total = 20 expected = PaastaColors.green("10.0M/20.0M") assert paasta_metastatus.format_table_column_for_healthcheck_resource_utilization_pair( (fake_healthcheckresult, fake_resource_utilization), True ) == expected
def test_format_chronos_job_status_success_no_failure(): example_job = { 'lastError': '', 'lastSuccess': '2015-04-20T23:20:00.420Z', } desired_state = '' running_tasks = [] verbose = False actual = chronos_serviceinit.format_chronos_job_status(example_job, desired_state, running_tasks, verbose) assert PaastaColors.green('OK') in actual assert '(2015-04-20' in actual assert 'ago)' in actual
def get_bouncing_status(service, instance, client, job_config): apps = marathon_tools.get_matching_appids(service, instance, client) bounce_method = job_config.get_bounce_method() app_count = len(apps) if app_count == 0: return PaastaColors.red("Stopped") elif app_count == 1: return PaastaColors.green("Running") elif app_count > 1: return PaastaColors.yellow("Bouncing (%s)" % bounce_method) else: return PaastaColors.red("Unknown (count: %s)" % app_count)
def status_marathon_job(service, instance, app_id, normal_instance_count, client): name = PaastaColors.cyan(compose_job_id(service, instance)) if marathon_tools.is_app_id_running(app_id, client): app = client.get_app(app_id) running_instances = app.tasks_running deploy_status = marathon_tools.get_marathon_app_deploy_status_human(app, app_id, client) if running_instances >= normal_instance_count: status = PaastaColors.green("Healthy") instance_count = PaastaColors.green("(%d/%d)" % (running_instances, normal_instance_count)) elif running_instances == 0: status = PaastaColors.yellow("Critical") instance_count = PaastaColors.red("(%d/%d)" % (running_instances, normal_instance_count)) else: status = PaastaColors.yellow("Warning") instance_count = PaastaColors.yellow("(%d/%d)" % (running_instances, normal_instance_count)) return "Marathon: %s - up with %s instances. Status: %s" % (status, instance_count, deploy_status) else: red_not = PaastaColors.red("NOT") status = PaastaColors.red("Critical") return "Marathon: %s - %s (app %s) is %s running in Marathon." % (status, name, app_id, red_not)
def paasta_fsm(args): variables = get_paasta_config(yelpsoa_config_root=args.yelpsoa_config_root) destination = args.yelpsoa_config_root paasta_config = load_system_paasta_config() template = paasta_config.get_fsm_template() write_paasta_config( variables=variables, template=template, destination=destination, ) print PaastaColors.yellow(" _ _(o)_(o)_ _") print PaastaColors.red(" ._\`:_ F S M _:' \_,") print PaastaColors.green(" / (`---'\ `-.") print PaastaColors.cyan(" ,-` _) (_,") print "With My Noodly Appendage I Have Written Configs!" print print "Customize Them If It Makes You Happy -- http://y/paasta For Details" print "Remember To Add, Commit, And Push When You're Done:" print
def _prettify_status(status): if status not in ( chronos_tools.LastRunState.Fail, chronos_tools.LastRunState.Success, chronos_tools.LastRunState.NotRun, ): raise ValueError("Expected valid state, got %s" % status) if status == chronos_tools.LastRunState.Fail: return PaastaColors.red("Failed") elif status == chronos_tools.LastRunState.Success: return PaastaColors.green("OK") elif status == chronos_tools.LastRunState.NotRun: return PaastaColors.yellow("New")
def test_format_chronos_job_status_failure_and_then_success(mock_status): example_job = { 'name': 'my_service my_instance', 'lastError': '2015-04-20T23:20:00.420Z', 'lastSuccess': '2015-04-21T23:20:00.420Z', 'schedule': 'foo' } running_tasks = [] verbose = False mock_client = mock.Mock() actual = chronos_serviceinit.format_chronos_job_status(mock_client, example_job, running_tasks, verbose) assert PaastaColors.green('OK') in actual assert '(2015-04-21' in actual assert 'ago)' in actual
def assert_memory_health(metrics, threshold=10): total = metrics['master/mem_total'] / float(1024) used = metrics['master/mem_used'] / float(1024) perc_used = percent_used(total, used) if check_threshold(perc_used, threshold): return ("Memory: %0.2f / %0.2fGB in use (%s)" % (used, total, PaastaColors.green("%.2f%%" % perc_used)), True) else: return (PaastaColors.red( "CRITICAL: Less than %d%% memory available. (Currently using %.2f%%)" % (threshold, perc_used)), False)
def paasta_rerun(args): """Reruns a Chronos job. :param args: argparse.Namespace obj created from sys.args by cli""" system_paasta_config = load_system_paasta_config() soa_dir = args.soa_dir service = figure_out_service_name( args, soa_dir) # exit with an error if the service doesn't exist if args.execution_date: execution_date = args.execution_date else: execution_date = None all_clusters = list_clusters(soa_dir=soa_dir) actual_deployments = get_actual_deployments( service, soa_dir) # cluster.instance: sha if actual_deployments: deploy_pipeline = list(get_planned_deployments( service, soa_dir)) # cluster.instance deployed_clusters = list_deployed_clusters(deploy_pipeline, actual_deployments) deployed_cluster_instance = _get_cluster_instance( actual_deployments.keys()) if args.clusters is not None: clusters = args.clusters.split(",") else: clusters = deployed_clusters for cluster in clusters: paasta_print("cluster: %s" % cluster) if cluster not in all_clusters: paasta_print( " Warning: \"%s\" does not look like a valid cluster." % cluster) continue if cluster not in deployed_clusters: paasta_print( f" Warning: service \"{service}\" has not been deployed to \"{cluster}\" yet." ) continue if not deployed_cluster_instance[cluster].get(args.instance, False): paasta_print((" Warning: instance \"%s\" is either invalid " "or has not been deployed to \"%s\" yet." % (args.instance, cluster))) continue try: chronos_job_config = chronos_tools.load_chronos_job_config( service, args.instance, cluster, load_deployments=False, soa_dir=soa_dir, ) if chronos_tools.uses_time_variables( chronos_job_config) and execution_date is None: paasta_print( (" Warning: \"%s\" uses time variables interpolation, " "please supply a `--execution_date` argument." % args.instance)) continue except NoConfigurationForServiceError as e: paasta_print(" Warning: %s" % e) continue if execution_date is None: execution_date = _get_default_execution_date() related_job_configs = get_related_jobs_configs(cluster, service, args.instance) if not args.rerun_type and len(related_job_configs) > 1: instance_names = sorted([ f'- {srv}{chronos_tools.INTERNAL_SPACER}{inst}' for srv, inst in related_job_configs if srv != service or inst != args.instance ]) paasta_print(PaastaColors.red(' error')) paasta_print( 'Instance {instance} has dependency relations with the following jobs:\n' '{relations}\n' '\n' 'Please specify the rerun policy via --rerun-type argument'. format( instance=args.instance, relations='\n'.join(instance_names), ), ) return rc, output = execute_chronos_rerun_on_remote_master( service=service, instancename=args.instance, cluster=cluster, verbose=args.verbose, execution_date=execution_date.strftime( chronos_tools.EXECUTION_DATE_FORMAT), system_paasta_config=system_paasta_config, run_all_related_jobs=args.rerun_type == 'graph', force_disabled=args.force_disabled, ) if rc == 0: paasta_print(PaastaColors.green(' successfully created job')) else: paasta_print(PaastaColors.red(' error')) paasta_print(output)
def test_status_smartstack_backends_verbose_multiple_locations(): service = 'my_service' instance = 'my_instance' service_instance = compose_job_id(service, instance) cluster = 'fake_cluster' good_task = mock.Mock() other_task = mock.Mock() fake_backend = { 'status': 'UP', 'lastchg': '1', 'last_chk': 'OK', 'check_code': '200', 'svname': 'ipaddress1:1001_hostname1', 'check_status': 'L7OK', 'check_duration': 1 } fake_other_backend = { 'status': 'UP', 'lastchg': '1', 'last_chk': 'OK', 'check_code': '200', 'svname': 'ipaddress1:1002_hostname2', 'check_status': 'L7OK', 'check_duration': 1 } with contextlib.nested( mock.patch( 'paasta_tools.marathon_tools.load_service_namespace_config', autospec=True), mock.patch( 'paasta_tools.marathon_tools.read_namespace_for_service_instance' ), mock.patch( 'paasta_tools.marathon_serviceinit.get_mesos_slaves_grouped_by_attribute' ), mock.patch('paasta_tools.marathon_serviceinit.get_backends', autospec=True, side_effect=[[fake_backend], [fake_other_backend]]), mock.patch( 'paasta_tools.marathon_serviceinit.match_backends_and_tasks', autospec=True, side_effect=[[(fake_backend, good_task)], [(fake_other_backend, good_task)]]), ) as ( mock_load_service_namespace_config, mock_read_ns, mock_get_mesos_slaves_grouped_by_attribute, mock_get_backends, mock_match_backends_and_tasks, ): mock_load_service_namespace_config.return_value.get_discover.return_value = 'fake_discover' mock_read_ns.return_value = instance tasks = [good_task, other_task] mock_get_mesos_slaves_grouped_by_attribute.return_value = { 'fake_location1': ['fakehost1'], 'fake_location2': ['fakehost2'], } actual = marathon_serviceinit.status_smartstack_backends( service=service, instance=instance, cluster=cluster, job_config=fake_marathon_job_config, tasks=tasks, expected_count=1, soa_dir=None, verbose=True, synapse_port=123456, synapse_haproxy_url_format=DEFAULT_SYNAPSE_HAPROXY_URL_FORMAT, ) mock_get_backends.assert_any_call( service_instance, synapse_host='fakehost1', synapse_port=123456, synapse_haproxy_url_format=DEFAULT_SYNAPSE_HAPROXY_URL_FORMAT, ) mock_get_backends.assert_any_call( service_instance, synapse_host='fakehost2', synapse_port=123456, synapse_haproxy_url_format=DEFAULT_SYNAPSE_HAPROXY_URL_FORMAT, ) mock_get_mesos_slaves_grouped_by_attribute.assert_called_once_with( attribute='fake_discover', blacklist=[], ) assert "fake_location1 - %s" % PaastaColors.green('Healthy') in actual assert "hostname1:1001" in actual assert "fake_location2 - %s" % PaastaColors.green('Healthy') in actual assert "hostname2:1002" in actual
def test_assert_memory_health(): ok_status = (1024, 512, 512) ok_output, ok_health = metastatus_lib.assert_memory_health(ok_status) assert ok_health assert ("Memory: 0.50 / 1.00GB in use (%s)" % PaastaColors.green("50.00%") in ok_output)
def remote_run_start(args): """ Start a task in Mesos Steps: 1. Accumulate overrides 2. Create task configuration 3. Build executor stack 4. Run the task on the executor stack """ # accumulate all configuration needed to build what we need to run a task system_paasta_config, service, cluster, \ soa_dir, instance, instance_type = extract_args(args) # TODO: move run_id into task identifier? run_id = args.run_id or generate_run_id(length=10) framework_name = create_framework_name(service, instance, run_id) overrides = accumulate_config_overrides(args, service, instance) # TODO: implement DryRunExecutor? taskproc_config = system_paasta_config.get_taskproc() native_job_config = load_paasta_native_job_config( service, instance, cluster, soa_dir=soa_dir, instance_type=instance_type, config_overrides=overrides, load_deployments=not args.docker_image, ) region = args.aws_region or taskproc_config.get('aws_region') default_role = system_paasta_config.get_remote_run_config().get( 'default_role') assert default_role role = native_job_config.get_role() or default_role pool = native_job_config.get_pool() processor = TaskProcessor() processor.load_plugin(provider_module='task_processing.plugins.stateful') processor.load_plugin(provider_module='task_processing.plugins.mesos') if args.detach: paasta_print("Running in background") if os.fork() > 0: return os.setsid() if os.fork() > 0: return sys.stdout = open('/dev/null', 'w') sys.stderr = open('/dev/null', 'w') # create factory functions for task_config and executors, which makes it # easier to recreate them for retry purposes def task_config_factory(): return create_mesos_task_config( processor=processor, service=service, instance=instance, system_paasta_config=system_paasta_config, native_job_config=native_job_config, offer_timeout=args.staging_timeout, docker_image=args.docker_image, ) framework_config = dict( cluster=cluster, framework_name=framework_name, framework_staging_timeout=args.staging_timeout, role=role, pool=pool, ) executor_kwargs = dict( # used to create mesos executor processor=processor, system_paasta_config=system_paasta_config, taskproc_config=taskproc_config, **framework_config, ) def executor_factory(): mesos_executor = create_mesos_executor(**executor_kwargs) return build_executor_stack( processor, mesos_executor, taskproc_config, cluster, region, ) if args.dry_run: task_config_dict = task_config_to_dict(task_config_factory()) pp = pprint.PrettyPrinter(indent=2) paasta_print( PaastaColors.green("Would have run task with:"), PaastaColors.green("Framework config:"), pp.pformat(framework_config), PaastaColors.green("Task config:"), pp.pformat(task_config_dict), sep='\n', ) return terminals = run_tasks_with_retries( executor_factory, task_config_factory, retries=args.retries, ) final_event, final_task_config = terminals[-1] exit_code = handle_terminal_event( event=final_event, service=service, instance=instance, run_id=run_id, email_address=args.notification_email, framework_config=framework_config, task_config=final_task_config, ) sys.exit(exit_code)
def simulate_healthcheck_on_service( instance_config, docker_client, container_id, healthcheck_mode, healthcheck_data, healthcheck_enabled, ): """Simulates Marathon-style healthcheck on given service if healthcheck is enabled :param instance_config: service manifest :param docker_client: Docker client object :param container_id: Docker container id :param healthcheck_data: tuple url to healthcheck :param healthcheck_enabled: boolean :returns: healthcheck_passed: boolean """ healthcheck_link = PaastaColors.cyan(healthcheck_data) if healthcheck_enabled: grace_period = instance_config.get_healthcheck_grace_period_seconds() timeout = instance_config.get_healthcheck_timeout_seconds() interval = instance_config.get_healthcheck_interval_seconds() max_failures = instance_config.get_healthcheck_max_consecutive_failures( ) print("\nStarting health check via %s (waiting %s seconds before " "considering failures due to grace period):" % (healthcheck_link, grace_period)) # silently start performing health checks until grace period ends or first check succeeds graceperiod_end_time = time.time() + grace_period after_grace_period_attempts = 0 healthchecking = True def _stream_docker_logs(container_id, generator): while healthchecking: try: # the generator will block until another log line is available log_line = next(generator).decode("utf-8").rstrip("\n") if healthchecking: print(f"container [{container_id[:12]}]: {log_line}") else: # stop streaming at first opportunity, since generator.close() # cant be used until the container is dead break except StopIteration: # natural end of logs break docker_logs_generator = docker_client.logs(container_id, stderr=True, stream=True) threading.Thread( target=_stream_docker_logs, daemon=True, args=(container_id, docker_logs_generator), ).start() while True: # First inspect the container for early exits container_state = docker_client.inspect_container(container_id) if not container_state["State"]["Running"]: print( PaastaColors.red("Container exited with code {}".format( container_state["State"]["ExitCode"]))) healthcheck_passed = False break healthcheck_passed, healthcheck_output = run_healthcheck_on_container( docker_client, container_id, healthcheck_mode, healthcheck_data, timeout) # Yay, we passed the healthcheck if healthcheck_passed: print("{}'{}' (via {})".format( PaastaColors.green("Healthcheck succeeded!: "), healthcheck_output, healthcheck_link, )) break # Otherwise, print why we failed if time.time() < graceperiod_end_time: color = PaastaColors.grey msg = "(disregarded due to grace period)" extra_msg = f" (via: {healthcheck_link}. Output: {healthcheck_output})" else: # If we've exceeded the grace period, we start incrementing attempts after_grace_period_attempts += 1 color = PaastaColors.red msg = "(Attempt {} of {})".format(after_grace_period_attempts, max_failures) extra_msg = f" (via: {healthcheck_link}. Output: {healthcheck_output})" print("{}{}".format(color(f"Healthcheck failed! {msg}"), extra_msg)) if after_grace_period_attempts == max_failures: break time.sleep(interval) healthchecking = False # end docker logs stream else: print("\nPaaSTA would have healthchecked your service via\n%s" % healthcheck_link) healthcheck_passed = True return healthcheck_passed
def test_status_smartstack_backends_multiple_locations(): service = 'my_service' instance = 'my_instance' service_instance = compose_job_id(service, instance) cluster = 'fake_cluster' good_task = mock.Mock() other_task = mock.Mock() fake_backend = { 'status': 'UP', 'lastchg': '1', 'last_chk': 'OK', 'check_code': '200', 'svname': 'ipaddress1:1001_hostname1', 'check_status': 'L7OK', 'check_duration': 1 } with contextlib.nested( mock.patch( 'paasta_tools.marathon_tools.load_service_namespace_config', autospec=True), mock.patch( 'paasta_tools.marathon_tools.read_namespace_for_service_instance', autospec=True), mock.patch( 'paasta_tools.marathon_serviceinit.get_all_slaves_for_blacklist_whitelist', autospec=True), mock.patch('paasta_tools.marathon_serviceinit.get_backends', autospec=True), mock.patch( 'paasta_tools.marathon_serviceinit.match_backends_and_tasks', autospec=True), ) as ( mock_load_service_namespace_config, mock_read_ns, mock_get_all_slaves_for_blacklist_whitelist, mock_get_backends, mock_match_backends_and_tasks, ): mock_load_service_namespace_config.return_value.get_discover.return_value = 'fake_discover' mock_read_ns.return_value = instance mock_get_backends.return_value = [fake_backend] mock_match_backends_and_tasks.return_value = [ (fake_backend, good_task), ] tasks = [good_task, other_task] mock_get_all_slaves_for_blacklist_whitelist.return_value = [{ 'hostname': 'fakehost', 'attributes': { 'fake_discover': 'fakelocation' } }, { 'hostname': 'fakeotherhost', 'attributes': { 'fake_discover': 'fakeotherlocation' } }] actual = marathon_serviceinit.status_smartstack_backends( service=service, instance=instance, cluster=cluster, job_config=fake_marathon_job_config, tasks=tasks, expected_count=len(mock_get_backends.return_value), soa_dir=None, verbose=False, synapse_port=123456, synapse_haproxy_url_format=DEFAULT_SYNAPSE_HAPROXY_URL_FORMAT, ) mock_get_backends.assert_any_call( service_instance, synapse_host='fakehost', synapse_port=123456, synapse_haproxy_url_format=DEFAULT_SYNAPSE_HAPROXY_URL_FORMAT, ) mock_get_backends.assert_any_call( service_instance, synapse_host='fakeotherhost', synapse_port=123456, synapse_haproxy_url_format=DEFAULT_SYNAPSE_HAPROXY_URL_FORMAT, ) assert "fakelocation - %s" % PaastaColors.green('Healthy') in actual assert "fakeotherlocation - %s" % PaastaColors.green( 'Healthy') in actual
def check_mark(): """ :return: string that can print a checkmark """ return PaastaColors.green(u'\u2713'.encode('utf-8'))
def test_assert_gpu_health(): ok_status = (3, 1, 2) ok_output, ok_health = metastatus_lib.assert_gpu_health(ok_status) assert ok_health assert "GPUs: 1 / 3 in use (%s)" % PaastaColors.green( "33.33%") in ok_output
def test_status_smartstack_backends_verbose_multiple_locations(): service = "servicename" instance = "instancename" cluster = "fake_cluster" good_task = mock.Mock() other_task = mock.Mock() fake_backend = { "status": "UP", "lastchg": "1", "last_chk": "OK", "check_code": "200", "svname": "ipaddress1:1001_hostname1", "check_status": "L7OK", "check_duration": 1, } fake_other_backend = { "status": "UP", "lastchg": "1", "last_chk": "OK", "check_code": "200", "svname": "ipaddress1:1002_hostname2", "check_status": "L7OK", "check_duration": 1, } with mock.patch( "paasta_tools.marathon_serviceinit.get_all_slaves_for_blacklist_whitelist", autospec=True, ) as mock_get_all_slaves_for_blacklist_whitelist, mock.patch( "paasta_tools.marathon_serviceinit.get_backends", autospec=True, side_effect=[[fake_backend], [fake_other_backend]], ) as mock_get_backends, mock.patch( "paasta_tools.marathon_serviceinit.match_backends_and_tasks", autospec=True, side_effect=[[(fake_backend, good_task)], [(fake_other_backend, good_task)]], ): fake_service_namespace_config = mock.Mock() fake_service_namespace_config.get_discover.return_value = "fake_discover" mock_get_all_slaves_for_blacklist_whitelist.return_value = [ { "hostname": "hostname1", "attributes": { "fake_discover": "fakelocation" } }, { "hostname": "hostname2", "attributes": { "fake_discover": "fakeotherlocation" }, }, ] tasks = [good_task, other_task] actual = marathon_serviceinit.status_smartstack_backends( service=service, instance=instance, cluster=cluster, job_config=fake_marathon_job_config, service_namespace_config=fake_service_namespace_config, tasks=tasks, expected_count=1, soa_dir=None, verbose=True, synapse_port=123456, synapse_haproxy_url_format=DEFAULT_SYNAPSE_HAPROXY_URL_FORMAT, system_deploy_blacklist=[], system_deploy_whitelist=[], ) mock_get_backends.assert_any_call( "servicename.fake_nerve_ns", synapse_host="hostname1", synapse_port=123456, synapse_haproxy_url_format=DEFAULT_SYNAPSE_HAPROXY_URL_FORMAT, ) mock_get_backends.assert_any_call( "servicename.fake_nerve_ns", synapse_host="hostname2", synapse_port=123456, synapse_haproxy_url_format=DEFAULT_SYNAPSE_HAPROXY_URL_FORMAT, ) mock_get_all_slaves_for_blacklist_whitelist.assert_called_once_with( blacklist=[], whitelist=[]) assert "fakelocation - %s" % PaastaColors.green("Healthy") in actual assert "hostname1:1001" in actual assert "fakeotherlocation - %s" % PaastaColors.green( "Healthy") in actual assert "hostname2:1002" in actual
def _format_disabled_status(job): if job.get("disabled", False): status = PaastaColors.grey("Not scheduled") else: status = PaastaColors.green("Scheduled") return status
def test_ok_cpu_health(): ok_status = (10, 1, 9) ok_output, ok_health = metastatus_lib.assert_cpu_health(ok_status) assert ok_health assert "CPUs: 1.00 / 10 in use (%s)" % PaastaColors.green( "10.00%") in ok_output
def test_get_mesos_status( mock_get_mesos_stats, mock_get_num_masters, mock_get_configured_quorum_size, mock_getfqdn, ): mock_getfqdn.return_value = 'fakename' mock_get_mesos_stats.return_value = { 'master/cpus_total': 10, 'master/cpus_used': 8, 'master/mem_total': 10240, 'master/mem_used': 2048, 'master/disk_total': 10240, 'master/disk_used': 3072, 'master/tasks_running': 3, 'master/tasks_staging': 4, 'master/tasks_starting': 0, 'master/slaves_active': 4, 'master/slaves_inactive': 0, } mesos_state = { 'flags': { 'zk': 'zk://1.1.1.1:2222/fake_cluster', 'quorum': 2, }, 'frameworks': [ { 'name': 'test_framework1', }, { 'name': 'test_framework1', }, ] } mock_get_num_masters.return_value = 5 mock_get_configured_quorum_size.return_value = 3 expected_cpus_output = "CPUs: 8.00 / 10 in use (%s)" % PaastaColors.green( "80.00%") expected_mem_output = \ "Memory: 2.00 / 10.00GB in use (%s)" % PaastaColors.green("20.00%") expected_disk_output = "Disk: 3.00 / 10.00GB in use (%s)" % PaastaColors.green( "30.00%") expected_tasks_output = \ "tasks: running: 3 staging: 4 starting: 0" expected_duplicate_frameworks_output = \ "frameworks:\n%s" % \ PaastaColors.red(" CRITICAL: Framework test_framework1 has 2 instances running--expected no more than 1.") expected_slaves_output = \ "slaves: active: 4 inactive: 0" expected_masters_quorum_output = \ "quorum: masters: 5 configured quorum: 3 " results = paasta_metastatus.get_mesos_status(mesos_state, verbosity=0) assert mock_get_mesos_stats.called_once() assert (expected_masters_quorum_output, True) in results assert (expected_cpus_output, True) in results assert (expected_mem_output, True) in results assert (expected_disk_output, True) in results assert (expected_tasks_output, True) in results assert (expected_duplicate_frameworks_output, False) in results assert (expected_slaves_output, True) in results
def test_status_smartstack_backends_verbose_multiple_locations(): service = 'my_service' instance = 'my_instance' service_instance = compose_job_id(service, instance) cluster = 'fake_cluster' good_task = mock.Mock() other_task = mock.Mock() fake_backend = { 'status': 'UP', 'lastchg': '1', 'last_chk': 'OK', 'check_code': '200', 'svname': 'ipaddress1:1001_hostname1', 'check_status': 'L7OK', 'check_duration': 1, } fake_other_backend = { 'status': 'UP', 'lastchg': '1', 'last_chk': 'OK', 'check_code': '200', 'svname': 'ipaddress1:1002_hostname2', 'check_status': 'L7OK', 'check_duration': 1, } with mock.patch( 'paasta_tools.marathon_tools.load_service_namespace_config', autospec=True, ) as mock_load_service_namespace_config, mock.patch( 'paasta_tools.marathon_tools.read_registration_for_service_instance', autospec=True, ) as mock_read_reg, mock.patch( 'paasta_tools.marathon_serviceinit.get_all_slaves_for_blacklist_whitelist', autospec=True, ) as mock_get_all_slaves_for_blacklist_whitelist, mock.patch( 'paasta_tools.marathon_serviceinit.get_backends', autospec=True, side_effect=[[fake_backend], [fake_other_backend]], ) as mock_get_backends, mock.patch( 'paasta_tools.marathon_serviceinit.match_backends_and_tasks', autospec=True, side_effect=[[(fake_backend, good_task)], [(fake_other_backend, good_task)]], ): mock_load_service_namespace_config.return_value.get_discover.return_value = 'fake_discover' mock_read_reg.return_value = service_instance mock_get_all_slaves_for_blacklist_whitelist.return_value = [ { 'hostname': 'hostname1', 'attributes': { 'fake_discover': 'fakelocation', }, }, { 'hostname': 'hostname2', 'attributes': { 'fake_discover': 'fakeotherlocation', }, }, ] tasks = [good_task, other_task] actual = marathon_serviceinit.status_smartstack_backends( service=service, instance=instance, cluster=cluster, job_config=fake_marathon_job_config, tasks=tasks, expected_count=1, soa_dir=None, verbose=True, synapse_port=123456, synapse_haproxy_url_format=DEFAULT_SYNAPSE_HAPROXY_URL_FORMAT, system_deploy_blacklist=[], system_deploy_whitelist=[], ) mock_get_backends.assert_any_call( service_instance, synapse_host='hostname1', synapse_port=123456, synapse_haproxy_url_format=DEFAULT_SYNAPSE_HAPROXY_URL_FORMAT, ) mock_get_backends.assert_any_call( service_instance, synapse_host='hostname2', synapse_port=123456, synapse_haproxy_url_format=DEFAULT_SYNAPSE_HAPROXY_URL_FORMAT, ) mock_get_all_slaves_for_blacklist_whitelist.assert_called_once_with( blacklist=[], whitelist=[], ) assert "fakelocation - %s" % PaastaColors.green('Healthy') in actual assert "hostname1:1001" in actual assert "fakeotherlocation - %s" % PaastaColors.green( 'Healthy') in actual assert "hostname2:1002" in actual
def check_mark(): """ :return: string that can print a checkmark """ return PaastaColors.green('\u2713')
def status_marathon_app( marathon_client: marathon_tools.MarathonClient, app: marathon_tools.MarathonApp, service: str, instance: str, cluster: str, soa_dir: str, dashboards: Dict[marathon_tools.MarathonClient, str], verbose: int, ) -> Tuple[int, int, str]: """Takes a given marathon app object and returns the details about start, times, hosts, etc""" output = [] create_datetime = datetime_from_utc_to_local(isodate.parse_datetime(app.version)) output.append(get_marathon_dashboard(marathon_client, dashboards, app.id)) output.append( " " + " ".join( [ f"{app.tasks_running} running,", f"{app.tasks_healthy} healthy,", f"{app.tasks_staged} staged", f"out of {app.instances}", ] ) ) output.append( " App created: {} ({})".format( str(create_datetime), humanize.naturaltime(create_datetime) ) ) deploy_status = marathon_tools.get_marathon_app_deploy_status(marathon_client, app) app_queue = marathon_tools.get_app_queue(marathon_client, app.id) unused_offers_summary = marathon_tools.summarize_unused_offers(app_queue) if deploy_status == marathon_tools.MarathonDeployStatus.Delayed: _, backoff_seconds = marathon_tools.get_app_queue_status_from_queue(app_queue) deploy_status_human = marathon_app_deploy_status_human( deploy_status, backoff_seconds ) else: deploy_status_human = marathon_app_deploy_status_human(deploy_status) output.append(f" Status: {deploy_status_human}") if unused_offers_summary is not None and len(unused_offers_summary) > 0: output.append(" Possibly stalled for:") output.append( " ".join([f"{k}: {n} times" for k, n in unused_offers_summary.items()]) ) if verbose > 0: output.append(" Tasks:") rows = [ ( "Mesos Task ID", "Host deployed to", "Deployed at what localtime", "Health", ) ] for task in app.tasks: local_deployed_datetime = datetime_from_utc_to_local(task.staged_at) if task.host is not None: hostname = "{}:{}".format(task.host.split(".")[0], task.ports[0]) else: hostname = "Unknown" if not task.health_check_results: health_check_status = PaastaColors.grey("N/A") elif marathon_tools.is_task_healthy(task): health_check_status = PaastaColors.green("Healthy") else: health_check_status = PaastaColors.red("Unhealthy") rows.append( ( get_short_task_id(task.id), hostname, "{} ({})".format( local_deployed_datetime.strftime("%Y-%m-%dT%H:%M"), humanize.naturaltime(local_deployed_datetime), ), health_check_status, ) ) output.append("\n".join([" %s" % line for line in format_table(rows)])) if len(app.tasks) == 0: output.append(" No tasks associated with this marathon app") return deploy_status, app.tasks_running, "\n".join(output)
def simulate_healthcheck_on_service( instance_config, docker_client, container_id, healthcheck_mode, healthcheck_data, healthcheck_enabled, ): """Simulates Marathon-style healthcheck on given service if healthcheck is enabled :param instance_config: service manifest :param docker_client: Docker client object :param container_id: Docker container id :param healthcheck_data: tuple url to healthcheck :param healthcheck_enabled: boolean :returns: healthcheck_passed: boolean """ healthcheck_link = PaastaColors.cyan(healthcheck_data) if healthcheck_enabled: grace_period = instance_config.get_healthcheck_grace_period_seconds() timeout = instance_config.get_healthcheck_timeout_seconds() interval = instance_config.get_healthcheck_interval_seconds() max_failures = instance_config.get_healthcheck_max_consecutive_failures() paasta_print('\nStarting health check via %s (waiting %s seconds before ' 'considering failures due to grace period):' % (healthcheck_link, grace_period)) # silenty start performing health checks until grace period ends or first check succeeds graceperiod_end_time = time.time() + grace_period after_grace_period_attempts = 0 while True: # First inspect the container for early exits container_state = docker_client.inspect_container(container_id) if not container_state['State']['Running']: paasta_print( PaastaColors.red('Container exited with code {}'.format( container_state['State']['ExitCode'], )), ) healthcheck_passed = False break healthcheck_passed, healthcheck_output = run_healthcheck_on_container( docker_client, container_id, healthcheck_mode, healthcheck_data, timeout, ) # Yay, we passed the healthcheck if healthcheck_passed: paasta_print("{}'{}' (via {})".format( PaastaColors.green("Healthcheck succeeded!: "), healthcheck_output, healthcheck_link, )) break # Otherwise, print why we failed if time.time() < graceperiod_end_time: color = PaastaColors.grey msg = '(disregarded due to grace period)' extra_msg = f' (via: {healthcheck_link}. Output: {healthcheck_output})' else: # If we've exceeded the grace period, we start incrementing attempts after_grace_period_attempts += 1 color = PaastaColors.red msg = '(Attempt {} of {})'.format( after_grace_period_attempts, max_failures, ) extra_msg = f' (via: {healthcheck_link}. Output: {healthcheck_output})' paasta_print('{}{}'.format( color(f'Healthcheck failed! {msg}'), extra_msg, )) if after_grace_period_attempts == max_failures: break time.sleep(interval) else: paasta_print('\nPaaSTA would have healthchecked your service via\n%s' % healthcheck_link) healthcheck_passed = True return healthcheck_passed
def simulate_healthcheck_on_service(instance_config, docker_client, container_id, healthcheck_mode, healthcheck_data, healthcheck_enabled): """Simulates Marathon-style healthcheck on given service if healthcheck is enabled :param instance_config: service manifest :param docker_client: Docker client object :param container_id: Docker container id :param healthcheck_data: tuple url to healthcheck :param healthcheck_enabled: boolean :returns: if healthcheck_enabled is true, then returns output of healthcheck, otherwise simply returns true """ healthcheck_link = PaastaColors.cyan(healthcheck_data) if healthcheck_enabled: grace_period = instance_config.get_healthcheck_grace_period_seconds() timeout = instance_config.get_healthcheck_timeout_seconds() interval = instance_config.get_healthcheck_interval_seconds() max_failures = instance_config.get_healthcheck_max_consecutive_failures( ) sys.stdout.write( '\nStarting health check via %s (waiting %s seconds before ' 'considering failures due to grace period):\n' % (healthcheck_link, grace_period)) # silenty start performing health checks until grace period ends or first check succeeds graceperiod_end_time = time.time() + grace_period while True: healthcheck_succeeded = run_healthcheck_on_container( docker_client, container_id, healthcheck_mode, healthcheck_data, timeout) if healthcheck_succeeded or time.time() > graceperiod_end_time: break else: sys.stdout.write("%s\n" % PaastaColors.grey( "Healthcheck failed (disregarded due to grace period)")) time.sleep(interval) failure = False for attempt in range(1, max_failures + 1): healthcheck_succeeded = run_healthcheck_on_container( docker_client, container_id, healthcheck_mode, healthcheck_data, timeout) if healthcheck_succeeded: sys.stdout.write("%s (via: %s)\n" % (PaastaColors.green("Healthcheck succeeded!"), healthcheck_link)) failure = False break else: sys.stdout.write("%s (via: %s)\n" % (PaastaColors.red( "Healthcheck failed! (Attempt %d of %d)" % (attempt, max_failures)), healthcheck_link)) failure = True time.sleep(interval) if failure: healthcheck_status = False else: healthcheck_status = True else: sys.stdout.write( '\nMesos would have healthchecked your service via\n%s\n' % healthcheck_link) healthcheck_status = True return healthcheck_status
def paasta_rerun(args): """Reruns a Chronos job. :param args: argparse.Namespace obj created from sys.args by cli""" soa_dir = args.soa_dir service = figure_out_service_name(args, soa_dir) # exit with an error if the service doesn't exist if args.execution_date: execution_date = args.execution_date else: execution_date = None all_clusters = list_clusters(soa_dir=soa_dir) actual_deployments = get_actual_deployments(service, soa_dir) # cluster.instance: sha if actual_deployments: deploy_pipeline = list(get_planned_deployments(service, soa_dir)) # cluster.instance deployed_clusters = list_deployed_clusters(deploy_pipeline, actual_deployments) deployed_cluster_instance = _get_cluster_instance(actual_deployments.keys()) if args.clusters is not None: clusters = args.clusters.split(",") else: clusters = deployed_clusters for cluster in clusters: print "cluster: %s" % cluster if cluster not in all_clusters: print " Warning: \"%s\" does not look like a valid cluster." % cluster continue if cluster not in deployed_clusters: print " Warning: service \"%s\" has not been deployed to \"%s\" yet." % (service, cluster) continue if not deployed_cluster_instance[cluster].get(args.instance, False): print (" Warning: instance \"%s\" is either invalid " "or has not been deployed to \"%s\" yet." % (args.instance, cluster)) continue try: chronos_job_config = chronos_tools.load_chronos_job_config( service, args.instance, cluster, load_deployments=False, soa_dir=soa_dir) if chronos_tools.uses_time_variables(chronos_job_config) and execution_date is None: print (" Warning: \"%s\" uses time variables interpolation, " "please supply a `--execution_date` argument." % args.instance) continue except chronos_tools.UnknownChronosJobError as e: print " Warning: %s" % e.message continue if execution_date is None: execution_date = _get_default_execution_date() rc, output = execute_chronos_rerun_on_remote_master( service=service, instancename=args.instance, cluster=cluster, verbose=args.verbose, execution_date=execution_date.strftime(chronos_tools.EXECUTION_DATE_FORMAT) ) if rc == 0: print PaastaColors.green(' successfully created job') else: print PaastaColors.red(' error') print output
def test_generate_summary_for_results_ok(): assert (metastatus_lib.generate_summary_for_check( "Myservice", True) == "Myservice Status: %s" % PaastaColors.green("OK"))