def test_status_smartstack_backends_multiple_locations():
    service = 'my_service'
    instance = 'my_instance'
    service_instance = compose_job_id(service, instance)
    cluster = 'fake_cluster'
    good_task = mock.Mock()
    other_task = mock.Mock()
    fake_backend = {'status': 'UP', 'lastchg': '1', 'last_chk': 'OK',
                    'check_code': '200', 'svname': 'ipaddress1:1001_hostname1',
                    'check_status': 'L7OK', 'check_duration': 1}
    with contextlib.nested(
        mock.patch('paasta_tools.marathon_tools.load_service_namespace_config', autospec=True),
        mock.patch('paasta_tools.marathon_tools.read_namespace_for_service_instance'),
        mock.patch('paasta_tools.marathon_serviceinit.get_mesos_slaves_grouped_by_attribute'),
        mock.patch('paasta_tools.marathon_serviceinit.get_backends', autospec=True),
        mock.patch('paasta_tools.marathon_serviceinit.match_backends_and_tasks', autospec=True),
    ) as (
        mock_load_service_namespace_config,
        mock_read_ns,
        mock_get_mesos_slaves_grouped_by_attribute,
        mock_get_backends,
        mock_match_backends_and_tasks,
    ):
        mock_load_service_namespace_config.return_value.get_discover.return_value = 'fake_discover'
        mock_read_ns.return_value = instance
        mock_get_backends.return_value = [fake_backend]
        mock_match_backends_and_tasks.return_value = [
            (fake_backend, good_task),
        ]
        tasks = [good_task, other_task]
        mock_get_mesos_slaves_grouped_by_attribute.return_value = {
            'fake_location1': ['fakehost1'],
            'fake_location2': ['fakehost2'],
        }
        actual = marathon_serviceinit.status_smartstack_backends(
            service=service,
            instance=instance,
            cluster=cluster,
            job_config=fake_marathon_job_config,
            tasks=tasks,
            expected_count=len(mock_get_backends.return_value),
            soa_dir=None,
            verbose=False,
            synapse_port=123456,
            synapse_haproxy_url_format=DEFAULT_SYNAPSE_HAPROXY_URL_FORMAT,
        )
        mock_get_backends.assert_any_call(
            service_instance,
            synapse_host='fakehost1',
            synapse_port=123456,
            synapse_haproxy_url_format=DEFAULT_SYNAPSE_HAPROXY_URL_FORMAT,
        )
        mock_get_backends.assert_any_call(
            service_instance,
            synapse_host='fakehost2',
            synapse_port=123456,
            synapse_haproxy_url_format=DEFAULT_SYNAPSE_HAPROXY_URL_FORMAT,
        )
        assert "fake_location1 - %s" % PaastaColors.green('Healthy') in actual
        assert "fake_location2 - %s" % PaastaColors.green('Healthy') in actual
def test_get_mesos_status(
    mock_get_mesos_stats,
    mock_get_num_masters,
    mock_get_configured_quorum_size,
    mock_getfqdn,
):
    mock_getfqdn.return_value = 'fakename'
    mock_get_mesos_stats.return_value = {
        'master/cpus_total': 10,
        'master/cpus_used': 8,
        'master/mem_total': 10240,
        'master/mem_used': 2048,
        'master/disk_total': 10240,
        'master/disk_used': 3072,
        'master/tasks_running': 3,
        'master/tasks_staging': 4,
        'master/tasks_starting': 0,
        'master/slaves_active': 4,
        'master/slaves_inactive': 0,
    }
    mesos_state = {
        'flags': {
            'zk': 'zk://1.1.1.1:2222/fake_cluster',
            'quorum': 2,
        },
        'frameworks': [
            {
                'name': 'test_framework1',
            },
            {
                'name': 'test_framework1',
            },
        ]
    }
    mock_get_num_masters.return_value = 5
    mock_get_configured_quorum_size.return_value = 3
    expected_cpus_output = "CPUs: 8.00 / 10 in use (%s)" % PaastaColors.green("80.00%")
    expected_mem_output = \
        "Memory: 2.00 / 10.00GB in use (%s)" % PaastaColors.green("20.00%")
    expected_disk_output = "Disk: 3.00 / 10.00GB in use (%s)" % PaastaColors.green("30.00%")
    expected_tasks_output = \
        "tasks: running: 3 staging: 4 starting: 0"
    expected_duplicate_frameworks_output = \
        "frameworks:\n%s" % \
        PaastaColors.red("    CRITICAL: Framework test_framework1 has 2 instances running--expected no more than 1.")
    expected_slaves_output = \
        "slaves: active: 4 inactive: 0"
    expected_masters_quorum_output = \
        "quorum: masters: 5 configured quorum: 3 "

    results = paasta_metastatus.get_mesos_status(mesos_state, verbosity=0)

    assert mock_get_mesos_stats.called_once()
    assert (expected_masters_quorum_output, True) in results
    assert (expected_cpus_output, True) in results
    assert (expected_mem_output, True) in results
    assert (expected_disk_output, True) in results
    assert (expected_tasks_output, True) in results
    assert (expected_duplicate_frameworks_output, False) in results
    assert (expected_slaves_output, True) in results
def test_status_smartstack_backends_multiple_locations():
    service = "my_service"
    instance = "my_instance"
    service_instance = compose_job_id(service, instance)
    cluster = "fake_cluster"
    good_task = mock.Mock()
    other_task = mock.Mock()
    fake_backend = {
        "status": "UP",
        "lastchg": "1",
        "last_chk": "OK",
        "check_code": "200",
        "svname": "ipaddress1:1001_hostname1",
        "check_status": "L7OK",
        "check_duration": 1,
    }
    with contextlib.nested(
        mock.patch("paasta_tools.marathon_tools.load_service_namespace_config", autospec=True),
        mock.patch("paasta_tools.marathon_tools.read_namespace_for_service_instance"),
        mock.patch("paasta_tools.marathon_serviceinit.get_mesos_slaves_grouped_by_attribute"),
        mock.patch("paasta_tools.marathon_serviceinit.get_backends", autospec=True),
        mock.patch("paasta_tools.marathon_serviceinit.match_backends_and_tasks", autospec=True),
    ) as (
        mock_load_service_namespace_config,
        mock_read_ns,
        mock_get_mesos_slaves_grouped_by_attribute,
        mock_get_backends,
        mock_match_backends_and_tasks,
    ):
        mock_load_service_namespace_config.return_value.get_discover.return_value = "fake_discover"
        mock_read_ns.return_value = instance
        mock_get_backends.return_value = [fake_backend]
        mock_match_backends_and_tasks.return_value = [(fake_backend, good_task)]
        tasks = [good_task, other_task]
        mock_get_mesos_slaves_grouped_by_attribute.return_value = {
            "fake_location1": ["fakehost1"],
            "fake_location2": ["fakehost2"],
        }
        actual = marathon_serviceinit.status_smartstack_backends(
            service=service,
            instance=instance,
            cluster=cluster,
            job_config=fake_marathon_job_config,
            tasks=tasks,
            expected_count=len(mock_get_backends.return_value),
            soa_dir=None,
            verbose=False,
        )
        mock_get_backends.assert_any_call(service_instance, synapse_host="fakehost1", synapse_port=DEFAULT_SYNAPSE_PORT)
        mock_get_backends.assert_any_call(service_instance, synapse_host="fakehost2", synapse_port=DEFAULT_SYNAPSE_PORT)
        assert "fake_location1 - %s" % PaastaColors.green("Healthy") in actual
        assert "fake_location2 - %s" % PaastaColors.green("Healthy") in actual
def status_mesos_tasks(service, instance, normal_instance_count):
    job_id = marathon_tools.format_job_id(service, instance)
    running_and_active_tasks = get_running_tasks_from_active_frameworks(job_id)
    count = len(running_and_active_tasks)
    if count >= normal_instance_count:
        status = PaastaColors.green("Healthy")
        count = PaastaColors.green("(%d/%d)" % (count, normal_instance_count))
    elif count == 0:
        status = PaastaColors.red("Critical")
        count = PaastaColors.red("(%d/%d)" % (count, normal_instance_count))
    else:
        status = PaastaColors.yellow("Warning")
        count = PaastaColors.yellow("(%d/%d)" % (count, normal_instance_count))
    running_string = PaastaColors.bold('TASK_RUNNING')
    return "Mesos:      %s - %s tasks in the %s state." % (status, count, running_string)
def haproxy_backend_report(normal_instance_count, up_backends):
    """Given that a service is in smartstack, this returns a human readable
    report of the up backends"""
    # TODO: Take into account a configurable threshold, PAASTA-1102
    crit_threshold = 50
    under_replicated, ratio = is_under_replicated(num_available=up_backends,
                                                  expected_count=normal_instance_count,
                                                  crit_threshold=crit_threshold)
    if under_replicated:
        status = PaastaColors.red("Critical")
        count = PaastaColors.red("(%d/%d, %d%%)" % (up_backends, normal_instance_count, ratio))
    else:
        status = PaastaColors.green("Healthy")
        count = PaastaColors.green("(%d/%d)" % (up_backends, normal_instance_count))
    up_string = PaastaColors.bold('UP')
    return "%s - in haproxy with %s total backends %s in this namespace." % (status, count, up_string)
def _format_disabled_status(job):
    status = PaastaColors.red("UNKNOWN")
    if job.get("disabled", False):
        status = PaastaColors.grey("Not scheduled")
    else:
        status = PaastaColors.green("Scheduled")
    return status
def generate_summary_for_check(name, ok):
    """Given a check name and a boolean indicating if the service is OK, return
    a formatted message.
    """
    status = PaastaColors.green("OK") if ok is True else PaastaColors.red("CRITICAL")
    summary = "%s Status: %s" % (name, status)
    return summary
def test_format_parents_verbose():
    example_job = {
        'name': 'myexamplejob',
        'parents': ['testservice testinstance']
    }
    fake_last_datetime = '2007-04-01T17:52:58.908Z'
    example_status = (fake_last_datetime, chronos_tools.LastRunState.Success)
    with contextlib.nested(
        mock.patch(
            'paasta_tools.chronos_tools.get_job_for_service_instance',
            autospec=True,
            return_value={
                'name': 'testservice testinstance'
            }
        ),
        mock.patch(
            'paasta_tools.chronos_tools.get_status_last_run',
            autospec=True,
            return_value=example_status
        ),
    ):
        expected_years = dateutil.relativedelta.relativedelta(
            datetime.datetime.now(dateutil.tz.tzutc()),
            dateutil.parser.parse(fake_last_datetime)
        ).years
        actual = chronos_serviceinit._format_parents_verbose(example_job)
        assert "testservice testinstance" in actual
        assert "  Last Run: %s (2007-04-01T17:52, %s years ago)" % (PaastaColors.green("OK"), expected_years) in actual
Exemple #9
0
def assert_disk_health(metrics, mesos_state, threshold=10):
    total = metrics['master/disk_total'] / float(1024)
    used = metrics['master/disk_used']

    for slave in mesos_state['slaves']:
        for role in slave['reserved_resources']:
            used += slave['reserved_resources'][role]['disk']

    used /= float(1024)

    try:
        perc_used = percent_used(total, used)
    except ZeroDivisionError:
        return HealthCheckResult(message="Error reading total available disk from mesos!",
                                 healthy=False)

    if check_threshold(perc_used, threshold):
        return HealthCheckResult(
            message="Disk: %0.2f / %0.2fGB in use (%s)"
            % (used, total, PaastaColors.green("%.2f%%" % perc_used)),
            healthy=True
        )
    else:
        return HealthCheckResult(
            message="CRITICAL: Less than %d%% disk available. (Currently using %.2f%%)" % (threshold, perc_used),
            healthy=False
        )
def test_format_chronos_job_status_enabled():
    example_job = {
        'disabled': False,
    }
    running_tasks = []
    verbose = False
    actual = chronos_serviceinit.format_chronos_job_status(example_job, running_tasks, verbose)
    assert PaastaColors.green('Scheduled') in actual
def test_assert_memory_health():
    ok_metrics = {
        'master/mem_total': 1024,
        'master/mem_used': 512,
    }
    ok_output, ok_health = paasta_metastatus.assert_memory_health(ok_metrics)
    assert ok_health
    assert "Memory: 0.50 / 1.00GB in use (%s)" % PaastaColors.green("50.00%") in ok_output
def test_ok_cpu_health():
    ok_metrics = {
        'master/cpus_total': 10,
        'master/cpus_used': 1,
    }
    ok_output, ok_health = paasta_metastatus.assert_cpu_health(ok_metrics)
    assert ok_health
    assert "CPUs: 1.00 / 10 in use (%s)" % PaastaColors.green("10.00%") in ok_output
def test_format_chronos_job_status_success_no_failure():
    example_job = {"lastError": "", "lastSuccess": "2015-04-20T23:20:00.420Z", "schedule": "foo"}
    running_tasks = []
    verbose = False
    actual = chronos_serviceinit.format_chronos_job_status(example_job, running_tasks, verbose)
    assert PaastaColors.green("OK") in actual
    assert "(2015-04-20" in actual
    assert "ago)" in actual
def status_marathon_job_human(service, instance, deploy_status, app_id,
                              running_instances, normal_instance_count):
    name = PaastaColors.cyan(compose_job_id(service, instance))
    if deploy_status != 'NotRunning':
        if running_instances >= normal_instance_count:
            status = PaastaColors.green("Healthy")
            instance_count = PaastaColors.green("(%d/%d)" % (running_instances, normal_instance_count))
        elif running_instances == 0:
            status = PaastaColors.yellow("Critical")
            instance_count = PaastaColors.red("(%d/%d)" % (running_instances, normal_instance_count))
        else:
            status = PaastaColors.yellow("Warning")
            instance_count = PaastaColors.yellow("(%d/%d)" % (running_instances, normal_instance_count))
        return "Marathon:   %s - up with %s instances. Status: %s" % (status, instance_count, deploy_status)
    else:
        red_not = PaastaColors.red("NOT")
        status = PaastaColors.red("Critical")
        return "Marathon:   %s - %s (app %s) is %s running in Marathon." % (status, name, app_id, red_not)
def status_mesos_tasks(service, instance, normal_instance_count):
    job_id = marathon_tools.format_job_id(service, instance)
    # We have to add a spacer at the end to make sure we only return
    # things for service.main and not service.main_foo
    filter_string = "%s%s" % (job_id, marathon_tools.MESOS_TASK_SPACER)
    running_and_active_tasks = get_running_tasks_from_active_frameworks(filter_string)
    count = len(running_and_active_tasks)
    if count >= normal_instance_count:
        status = PaastaColors.green("Healthy")
        count = PaastaColors.green("(%d/%d)" % (count, normal_instance_count))
    elif count == 0:
        status = PaastaColors.red("Critical")
        count = PaastaColors.red("(%d/%d)" % (count, normal_instance_count))
    else:
        status = PaastaColors.yellow("Warning")
        count = PaastaColors.yellow("(%d/%d)" % (count, normal_instance_count))
    running_string = PaastaColors.bold('TASK_RUNNING')
    return "Mesos:      %s - %s tasks in the %s state." % (status, count, running_string)
def bouncing_status_human(app_count, bounce_method):
    if app_count == 0:
        return PaastaColors.red("Disabled")
    elif app_count == 1:
        return PaastaColors.green("Configured")
    elif app_count > 1:
        return PaastaColors.yellow("Bouncing (%s)" % bounce_method)
    else:
        return PaastaColors.red("Unknown (count: %s)" % app_count)
def test_get_mesos_status(
    mock_get_mesos_state_from_leader,
    mock_get_mesos_stats,
    mock_get_num_masters,
    mock_get_configured_quorum_size,
    mock_getfqdn,
):
    mock_getfqdn.return_value = "fakename"
    mock_get_mesos_stats.return_value = {
        "master/cpus_total": 10,
        "master/cpus_used": 8,
        "master/mem_total": 10240,
        "master/mem_used": 2048,
        "master/tasks_running": 3,
        "master/tasks_staging": 4,
        "master/tasks_starting": 0,
        "master/slaves_active": 4,
        "master/slaves_inactive": 0,
    }
    mock_get_mesos_state_from_leader.return_value = {
        "flags": {"zk": "zk://1.1.1.1:2222/fake_cluster", "quorum": 2},
        "frameworks": [{"name": "test_framework1"}, {"name": "test_framework1"}],
    }
    mock_get_num_masters.return_value = 5
    mock_get_configured_quorum_size.return_value = 3
    expected_cpus_output = "CPUs: 8.00 / 10 in use (%s)" % PaastaColors.green("80.00%")
    expected_mem_output = "Memory: 2.00 / 10.00GB in use (%s)" % PaastaColors.green("20.00%")
    expected_tasks_output = "tasks: running: 3 staging: 4 starting: 0"
    expected_duplicate_frameworks_output = "frameworks:\n%s" % PaastaColors.red(
        "    CRITICAL: Framework test_framework1 has 2 instances running--expected no more than 1."
    )
    expected_slaves_output = "slaves: active: 4 inactive: 0"
    expected_masters_quorum_output = "quorum: masters: 5 configured quorum: 3 "

    results = paasta_metastatus.get_mesos_status()

    assert mock_get_mesos_stats.called_once()
    assert mock_get_mesos_state_from_leader.called_once()
    assert (expected_masters_quorum_output, True) in results
    assert (expected_cpus_output, True) in results
    assert (expected_mem_output, True) in results
    assert (expected_tasks_output, True) in results
    assert (expected_duplicate_frameworks_output, False) in results
    assert (expected_slaves_output, True) in results
def test_format_chronos_job_status_enabled(mock_status):
    example_job = {
        'name': 'my_service my_instance',
        'disabled': False,
        'schedule': 'foo'
    }
    running_tasks = []
    verbose = False
    mock_client = mock.Mock()
    actual = chronos_serviceinit.format_chronos_job_status(mock_client, example_job, running_tasks, verbose)
    assert PaastaColors.green('Scheduled') in actual
def test_format_chronos_job_status_failure_and_then_success():
    example_job = {
        'lastError': '2015-04-20T23:20:00.420Z',
        'lastSuccess': '2015-04-21T23:20:00.420Z',
    }
    running_tasks = []
    verbose = False
    actual = chronos_serviceinit.format_chronos_job_status(example_job, running_tasks, verbose)
    assert PaastaColors.green('OK') in actual
    assert '(2015-04-21' in actual
    assert 'ago)' in actual
Exemple #20
0
def test_format_table_column_for_healthcheck_resource_utilization_pair_healthy():
    fake_healthcheckresult = Mock()
    fake_healthcheckresult.healthy = True
    fake_resource_utilization = Mock()
    fake_resource_utilization.free = 10
    fake_resource_utilization.total = 20
    expected = PaastaColors.green("10/20 (50.00%)")
    assert metastatus_lib.format_table_column_for_healthcheck_resource_utilization_pair(
        (fake_healthcheckresult, fake_resource_utilization),
        False
    ) == expected
Exemple #21
0
def paasta_fsm(args):
    validate_args(args)
    (srvname, service_stanza, smartstack_stanza, monitoring_stanza,
     deploy_stanza, marathon_stanza, cluster_stanza, team) = (
        get_paasta_config(
            args.yelpsoa_config_root,
            args.srvname,
            args.auto,
            args.port,
            args.team,
            args.description,
            args.external_link,
        )
    )
    srv = Service(srvname, args.yelpsoa_config_root)
    write_paasta_config(
        srv,
        service_stanza,
        smartstack_stanza,
        monitoring_stanza,
        deploy_stanza,
        marathon_stanza,
        cluster_stanza,
    )
    print PaastaColors.yellow("               _  _(o)_(o)_  _")
    print PaastaColors.red("             ._\`:_ F S M _:' \_,")
    print PaastaColors.green("                 / (`---'\ `-.")
    print PaastaColors.cyan("              ,-`  _)    (_,")
    print "With My Noodly Appendage I Have Written Configs For"
    print
    print PaastaColors.bold("    %s" % srvname)
    print
    print "Customize Them If It Makes You Happy -- http://y/paasta For Details"
    print "Remember To Add, Commit, And Push When You're Done:"
    print
    print "cd %s" % join(args.yelpsoa_config_root, srvname)
    print "# Review And/Or Customize Files"
    print "git add ."
    print "git commit -m'Initial Commit For %s'" % srvname
    print "git push origin HEAD  # Pushmaster Or Ops Deputy Privs Required"
    print
Exemple #22
0
def assert_cpu_health(metrics, threshold=10):
    total, used, available = get_mesos_cpu_status(metrics)
    perc_used = percent_used(total, used)
    if check_threshold(perc_used, threshold):
        return ("CPUs: %.2f / %d in use (%s)"
                % (used, total, PaastaColors.green("%.2f%%" % perc_used)),
                True)
    else:
        return (PaastaColors.red(
                "CRITICAL: Less than %d%% CPUs available. (Currently using %.2f%%)"
                % (threshold, perc_used)),
                False)
def test_format_table_column_for_healthcheck_resource_utilization_pair_healthy_human():
    fake_healthcheckresult = Mock()
    fake_healthcheckresult.healthy = True
    fake_healthcheckresult.metric = 'mem'
    fake_resource_utilization = Mock()
    fake_resource_utilization.free = 10
    fake_resource_utilization.total = 20
    expected = PaastaColors.green("10.0M/20.0M")
    assert paasta_metastatus.format_table_column_for_healthcheck_resource_utilization_pair(
        (fake_healthcheckresult, fake_resource_utilization),
        True
    ) == expected
def test_format_chronos_job_status_success_no_failure():
    example_job = {
        'lastError': '',
        'lastSuccess': '2015-04-20T23:20:00.420Z',
    }
    desired_state = ''
    running_tasks = []
    verbose = False
    actual = chronos_serviceinit.format_chronos_job_status(example_job, desired_state, running_tasks, verbose)
    assert PaastaColors.green('OK') in actual
    assert '(2015-04-20' in actual
    assert 'ago)' in actual
def get_bouncing_status(service, instance, client, job_config):
    apps = marathon_tools.get_matching_appids(service, instance, client)
    bounce_method = job_config.get_bounce_method()
    app_count = len(apps)
    if app_count == 0:
        return PaastaColors.red("Stopped")
    elif app_count == 1:
        return PaastaColors.green("Running")
    elif app_count > 1:
        return PaastaColors.yellow("Bouncing (%s)" % bounce_method)
    else:
        return PaastaColors.red("Unknown (count: %s)" % app_count)
def status_marathon_job(service, instance, app_id, normal_instance_count, client):
    name = PaastaColors.cyan(compose_job_id(service, instance))
    if marathon_tools.is_app_id_running(app_id, client):
        app = client.get_app(app_id)
        running_instances = app.tasks_running
        deploy_status = marathon_tools.get_marathon_app_deploy_status_human(app, app_id, client)

        if running_instances >= normal_instance_count:
            status = PaastaColors.green("Healthy")
            instance_count = PaastaColors.green("(%d/%d)" % (running_instances, normal_instance_count))
        elif running_instances == 0:
            status = PaastaColors.yellow("Critical")
            instance_count = PaastaColors.red("(%d/%d)" % (running_instances, normal_instance_count))
        else:
            status = PaastaColors.yellow("Warning")
            instance_count = PaastaColors.yellow("(%d/%d)" % (running_instances, normal_instance_count))
        return "Marathon:   %s - up with %s instances. Status: %s" % (status, instance_count, deploy_status)
    else:
        red_not = PaastaColors.red("NOT")
        status = PaastaColors.red("Critical")
        return "Marathon:   %s - %s (app %s) is %s running in Marathon." % (status, name, app_id, red_not)
Exemple #27
0
def paasta_fsm(args):
    variables = get_paasta_config(yelpsoa_config_root=args.yelpsoa_config_root)
    destination = args.yelpsoa_config_root

    paasta_config = load_system_paasta_config()
    template = paasta_config.get_fsm_template()

    write_paasta_config(
        variables=variables,
        template=template,
        destination=destination,
    )

    print PaastaColors.yellow("               _  _(o)_(o)_  _")
    print PaastaColors.red("             ._\`:_ F S M _:' \_,")
    print PaastaColors.green("                 / (`---'\ `-.")
    print PaastaColors.cyan("              ,-`  _)    (_,")
    print "With My Noodly Appendage I Have Written Configs!"
    print
    print "Customize Them If It Makes You Happy -- http://y/paasta For Details"
    print "Remember To Add, Commit, And Push When You're Done:"
    print
def _prettify_status(status):
    if status not in (
        chronos_tools.LastRunState.Fail,
        chronos_tools.LastRunState.Success,
        chronos_tools.LastRunState.NotRun,
    ):
        raise ValueError("Expected valid state, got %s" % status)
    if status == chronos_tools.LastRunState.Fail:
        return PaastaColors.red("Failed")
    elif status == chronos_tools.LastRunState.Success:
        return PaastaColors.green("OK")
    elif status == chronos_tools.LastRunState.NotRun:
        return PaastaColors.yellow("New")
def test_format_chronos_job_status_failure_and_then_success(mock_status):
    example_job = {
        'name': 'my_service my_instance',
        'lastError': '2015-04-20T23:20:00.420Z',
        'lastSuccess': '2015-04-21T23:20:00.420Z',
        'schedule': 'foo'
    }
    running_tasks = []
    verbose = False
    mock_client = mock.Mock()
    actual = chronos_serviceinit.format_chronos_job_status(mock_client, example_job, running_tasks, verbose)
    assert PaastaColors.green('OK') in actual
    assert '(2015-04-21' in actual
    assert 'ago)' in actual
Exemple #30
0
def assert_memory_health(metrics, threshold=10):
    total = metrics['master/mem_total'] / float(1024)
    used = metrics['master/mem_used'] / float(1024)
    perc_used = percent_used(total, used)

    if check_threshold(perc_used, threshold):
        return ("Memory: %0.2f / %0.2fGB in use (%s)"
                % (used, total, PaastaColors.green("%.2f%%" % perc_used)),
                True)
    else:
        return (PaastaColors.red(
                "CRITICAL: Less than %d%% memory available. (Currently using %.2f%%)"
                % (threshold, perc_used)),
                False)
Exemple #31
0
def paasta_rerun(args):
    """Reruns a Chronos job.
    :param args: argparse.Namespace obj created from sys.args by cli"""
    system_paasta_config = load_system_paasta_config()
    soa_dir = args.soa_dir
    service = figure_out_service_name(
        args, soa_dir)  # exit with an error if the service doesn't exist
    if args.execution_date:
        execution_date = args.execution_date
    else:
        execution_date = None

    all_clusters = list_clusters(soa_dir=soa_dir)
    actual_deployments = get_actual_deployments(
        service, soa_dir)  # cluster.instance: sha
    if actual_deployments:
        deploy_pipeline = list(get_planned_deployments(
            service, soa_dir))  # cluster.instance
        deployed_clusters = list_deployed_clusters(deploy_pipeline,
                                                   actual_deployments)
        deployed_cluster_instance = _get_cluster_instance(
            actual_deployments.keys())

    if args.clusters is not None:
        clusters = args.clusters.split(",")
    else:
        clusters = deployed_clusters

    for cluster in clusters:
        paasta_print("cluster: %s" % cluster)

        if cluster not in all_clusters:
            paasta_print(
                "  Warning: \"%s\" does not look like a valid cluster." %
                cluster)
            continue
        if cluster not in deployed_clusters:
            paasta_print(
                f"  Warning: service \"{service}\" has not been deployed to \"{cluster}\" yet."
            )
            continue
        if not deployed_cluster_instance[cluster].get(args.instance, False):
            paasta_print(("  Warning: instance \"%s\" is either invalid "
                          "or has not been deployed to \"%s\" yet." %
                          (args.instance, cluster)))
            continue

        try:
            chronos_job_config = chronos_tools.load_chronos_job_config(
                service,
                args.instance,
                cluster,
                load_deployments=False,
                soa_dir=soa_dir,
            )
            if chronos_tools.uses_time_variables(
                    chronos_job_config) and execution_date is None:
                paasta_print(
                    ("  Warning: \"%s\" uses time variables interpolation, "
                     "please supply a `--execution_date` argument." %
                     args.instance))
                continue
        except NoConfigurationForServiceError as e:
            paasta_print("  Warning: %s" % e)
            continue
        if execution_date is None:
            execution_date = _get_default_execution_date()

        related_job_configs = get_related_jobs_configs(cluster, service,
                                                       args.instance)

        if not args.rerun_type and len(related_job_configs) > 1:
            instance_names = sorted([
                f'- {srv}{chronos_tools.INTERNAL_SPACER}{inst}'
                for srv, inst in related_job_configs
                if srv != service or inst != args.instance
            ])
            paasta_print(PaastaColors.red('  error'))
            paasta_print(
                'Instance {instance} has dependency relations with the following jobs:\n'
                '{relations}\n'
                '\n'
                'Please specify the rerun policy via --rerun-type argument'.
                format(
                    instance=args.instance,
                    relations='\n'.join(instance_names),
                ), )
            return

        rc, output = execute_chronos_rerun_on_remote_master(
            service=service,
            instancename=args.instance,
            cluster=cluster,
            verbose=args.verbose,
            execution_date=execution_date.strftime(
                chronos_tools.EXECUTION_DATE_FORMAT),
            system_paasta_config=system_paasta_config,
            run_all_related_jobs=args.rerun_type == 'graph',
            force_disabled=args.force_disabled,
        )
        if rc == 0:
            paasta_print(PaastaColors.green('  successfully created job'))
        else:
            paasta_print(PaastaColors.red('  error'))
            paasta_print(output)
Exemple #32
0
def test_status_smartstack_backends_verbose_multiple_locations():
    service = 'my_service'
    instance = 'my_instance'
    service_instance = compose_job_id(service, instance)
    cluster = 'fake_cluster'
    good_task = mock.Mock()
    other_task = mock.Mock()
    fake_backend = {
        'status': 'UP',
        'lastchg': '1',
        'last_chk': 'OK',
        'check_code': '200',
        'svname': 'ipaddress1:1001_hostname1',
        'check_status': 'L7OK',
        'check_duration': 1
    }
    fake_other_backend = {
        'status': 'UP',
        'lastchg': '1',
        'last_chk': 'OK',
        'check_code': '200',
        'svname': 'ipaddress1:1002_hostname2',
        'check_status': 'L7OK',
        'check_duration': 1
    }
    with contextlib.nested(
            mock.patch(
                'paasta_tools.marathon_tools.load_service_namespace_config',
                autospec=True),
            mock.patch(
                'paasta_tools.marathon_tools.read_namespace_for_service_instance'
            ),
            mock.patch(
                'paasta_tools.marathon_serviceinit.get_mesos_slaves_grouped_by_attribute'
            ),
            mock.patch('paasta_tools.marathon_serviceinit.get_backends',
                       autospec=True,
                       side_effect=[[fake_backend], [fake_other_backend]]),
            mock.patch(
                'paasta_tools.marathon_serviceinit.match_backends_and_tasks',
                autospec=True,
                side_effect=[[(fake_backend, good_task)],
                             [(fake_other_backend, good_task)]]),
    ) as (
            mock_load_service_namespace_config,
            mock_read_ns,
            mock_get_mesos_slaves_grouped_by_attribute,
            mock_get_backends,
            mock_match_backends_and_tasks,
    ):
        mock_load_service_namespace_config.return_value.get_discover.return_value = 'fake_discover'
        mock_read_ns.return_value = instance
        tasks = [good_task, other_task]
        mock_get_mesos_slaves_grouped_by_attribute.return_value = {
            'fake_location1': ['fakehost1'],
            'fake_location2': ['fakehost2'],
        }
        actual = marathon_serviceinit.status_smartstack_backends(
            service=service,
            instance=instance,
            cluster=cluster,
            job_config=fake_marathon_job_config,
            tasks=tasks,
            expected_count=1,
            soa_dir=None,
            verbose=True,
            synapse_port=123456,
            synapse_haproxy_url_format=DEFAULT_SYNAPSE_HAPROXY_URL_FORMAT,
        )
        mock_get_backends.assert_any_call(
            service_instance,
            synapse_host='fakehost1',
            synapse_port=123456,
            synapse_haproxy_url_format=DEFAULT_SYNAPSE_HAPROXY_URL_FORMAT,
        )
        mock_get_backends.assert_any_call(
            service_instance,
            synapse_host='fakehost2',
            synapse_port=123456,
            synapse_haproxy_url_format=DEFAULT_SYNAPSE_HAPROXY_URL_FORMAT,
        )
        mock_get_mesos_slaves_grouped_by_attribute.assert_called_once_with(
            attribute='fake_discover',
            blacklist=[],
        )
        assert "fake_location1 - %s" % PaastaColors.green('Healthy') in actual
        assert "hostname1:1001" in actual
        assert "fake_location2 - %s" % PaastaColors.green('Healthy') in actual
        assert "hostname2:1002" in actual
def test_assert_memory_health():
    ok_status = (1024, 512, 512)
    ok_output, ok_health = metastatus_lib.assert_memory_health(ok_status)
    assert ok_health
    assert ("Memory: 0.50 / 1.00GB in use (%s)" % PaastaColors.green("50.00%")
            in ok_output)
Exemple #34
0
def remote_run_start(args):
    """ Start a task in Mesos
    Steps:
    1. Accumulate overrides
    2. Create task configuration
    3. Build executor stack
    4. Run the task on the executor stack
    """
    # accumulate all configuration needed to build what we need to run a task
    system_paasta_config, service, cluster, \
        soa_dir, instance, instance_type = extract_args(args)
    # TODO: move run_id into task identifier?
    run_id = args.run_id or generate_run_id(length=10)
    framework_name = create_framework_name(service, instance, run_id)
    overrides = accumulate_config_overrides(args, service, instance)
    # TODO: implement DryRunExecutor?
    taskproc_config = system_paasta_config.get_taskproc()
    native_job_config = load_paasta_native_job_config(
        service,
        instance,
        cluster,
        soa_dir=soa_dir,
        instance_type=instance_type,
        config_overrides=overrides,
        load_deployments=not args.docker_image,
    )
    region = args.aws_region or taskproc_config.get('aws_region')
    default_role = system_paasta_config.get_remote_run_config().get(
        'default_role')
    assert default_role
    role = native_job_config.get_role() or default_role
    pool = native_job_config.get_pool()
    processor = TaskProcessor()
    processor.load_plugin(provider_module='task_processing.plugins.stateful')
    processor.load_plugin(provider_module='task_processing.plugins.mesos')

    if args.detach:
        paasta_print("Running in background")
        if os.fork() > 0:
            return
        os.setsid()
        if os.fork() > 0:
            return
        sys.stdout = open('/dev/null', 'w')
        sys.stderr = open('/dev/null', 'w')

    # create factory functions for task_config and executors, which makes it
    # easier to recreate them for retry purposes
    def task_config_factory():
        return create_mesos_task_config(
            processor=processor,
            service=service,
            instance=instance,
            system_paasta_config=system_paasta_config,
            native_job_config=native_job_config,
            offer_timeout=args.staging_timeout,
            docker_image=args.docker_image,
        )

    framework_config = dict(
        cluster=cluster,
        framework_name=framework_name,
        framework_staging_timeout=args.staging_timeout,
        role=role,
        pool=pool,
    )
    executor_kwargs = dict(  # used to create mesos executor
        processor=processor,
        system_paasta_config=system_paasta_config,
        taskproc_config=taskproc_config,
        **framework_config,
    )

    def executor_factory():
        mesos_executor = create_mesos_executor(**executor_kwargs)
        return build_executor_stack(
            processor,
            mesos_executor,
            taskproc_config,
            cluster,
            region,
        )

    if args.dry_run:
        task_config_dict = task_config_to_dict(task_config_factory())
        pp = pprint.PrettyPrinter(indent=2)
        paasta_print(
            PaastaColors.green("Would have run task with:"),
            PaastaColors.green("Framework config:"),
            pp.pformat(framework_config),
            PaastaColors.green("Task config:"),
            pp.pformat(task_config_dict),
            sep='\n',
        )
        return

    terminals = run_tasks_with_retries(
        executor_factory,
        task_config_factory,
        retries=args.retries,
    )
    final_event, final_task_config = terminals[-1]
    exit_code = handle_terminal_event(
        event=final_event,
        service=service,
        instance=instance,
        run_id=run_id,
        email_address=args.notification_email,
        framework_config=framework_config,
        task_config=final_task_config,
    )
    sys.exit(exit_code)
Exemple #35
0
def simulate_healthcheck_on_service(
    instance_config,
    docker_client,
    container_id,
    healthcheck_mode,
    healthcheck_data,
    healthcheck_enabled,
):
    """Simulates Marathon-style healthcheck on given service if healthcheck is enabled

    :param instance_config: service manifest
    :param docker_client: Docker client object
    :param container_id: Docker container id
    :param healthcheck_data: tuple url to healthcheck
    :param healthcheck_enabled: boolean
    :returns: healthcheck_passed: boolean
    """
    healthcheck_link = PaastaColors.cyan(healthcheck_data)
    if healthcheck_enabled:
        grace_period = instance_config.get_healthcheck_grace_period_seconds()
        timeout = instance_config.get_healthcheck_timeout_seconds()
        interval = instance_config.get_healthcheck_interval_seconds()
        max_failures = instance_config.get_healthcheck_max_consecutive_failures(
        )

        print("\nStarting health check via %s (waiting %s seconds before "
              "considering failures due to grace period):" %
              (healthcheck_link, grace_period))

        # silently start performing health checks until grace period ends or first check succeeds
        graceperiod_end_time = time.time() + grace_period
        after_grace_period_attempts = 0
        healthchecking = True

        def _stream_docker_logs(container_id, generator):
            while healthchecking:
                try:
                    # the generator will block until another log line is available
                    log_line = next(generator).decode("utf-8").rstrip("\n")
                    if healthchecking:
                        print(f"container [{container_id[:12]}]: {log_line}")
                    else:
                        # stop streaming at first opportunity, since generator.close()
                        # cant be used until the container is dead
                        break
                except StopIteration:  # natural end of logs
                    break

        docker_logs_generator = docker_client.logs(container_id,
                                                   stderr=True,
                                                   stream=True)
        threading.Thread(
            target=_stream_docker_logs,
            daemon=True,
            args=(container_id, docker_logs_generator),
        ).start()

        while True:
            # First inspect the container for early exits
            container_state = docker_client.inspect_container(container_id)
            if not container_state["State"]["Running"]:
                print(
                    PaastaColors.red("Container exited with code {}".format(
                        container_state["State"]["ExitCode"])))
                healthcheck_passed = False
                break

            healthcheck_passed, healthcheck_output = run_healthcheck_on_container(
                docker_client, container_id, healthcheck_mode,
                healthcheck_data, timeout)

            # Yay, we passed the healthcheck
            if healthcheck_passed:
                print("{}'{}' (via {})".format(
                    PaastaColors.green("Healthcheck succeeded!: "),
                    healthcheck_output,
                    healthcheck_link,
                ))
                break

            # Otherwise, print why we failed
            if time.time() < graceperiod_end_time:
                color = PaastaColors.grey
                msg = "(disregarded due to grace period)"
                extra_msg = f" (via: {healthcheck_link}. Output: {healthcheck_output})"
            else:
                # If we've exceeded the grace period, we start incrementing attempts
                after_grace_period_attempts += 1
                color = PaastaColors.red
                msg = "(Attempt {} of {})".format(after_grace_period_attempts,
                                                  max_failures)
                extra_msg = f" (via: {healthcheck_link}. Output: {healthcheck_output})"

            print("{}{}".format(color(f"Healthcheck failed! {msg}"),
                                extra_msg))

            if after_grace_period_attempts == max_failures:
                break

            time.sleep(interval)
        healthchecking = False  # end docker logs stream
    else:
        print("\nPaaSTA would have healthchecked your service via\n%s" %
              healthcheck_link)
        healthcheck_passed = True
    return healthcheck_passed
def test_status_smartstack_backends_multiple_locations():
    service = 'my_service'
    instance = 'my_instance'
    service_instance = compose_job_id(service, instance)
    cluster = 'fake_cluster'
    good_task = mock.Mock()
    other_task = mock.Mock()
    fake_backend = {
        'status': 'UP',
        'lastchg': '1',
        'last_chk': 'OK',
        'check_code': '200',
        'svname': 'ipaddress1:1001_hostname1',
        'check_status': 'L7OK',
        'check_duration': 1
    }
    with contextlib.nested(
            mock.patch(
                'paasta_tools.marathon_tools.load_service_namespace_config',
                autospec=True),
            mock.patch(
                'paasta_tools.marathon_tools.read_namespace_for_service_instance',
                autospec=True),
            mock.patch(
                'paasta_tools.marathon_serviceinit.get_all_slaves_for_blacklist_whitelist',
                autospec=True),
            mock.patch('paasta_tools.marathon_serviceinit.get_backends',
                       autospec=True),
            mock.patch(
                'paasta_tools.marathon_serviceinit.match_backends_and_tasks',
                autospec=True),
    ) as (
            mock_load_service_namespace_config,
            mock_read_ns,
            mock_get_all_slaves_for_blacklist_whitelist,
            mock_get_backends,
            mock_match_backends_and_tasks,
    ):
        mock_load_service_namespace_config.return_value.get_discover.return_value = 'fake_discover'
        mock_read_ns.return_value = instance
        mock_get_backends.return_value = [fake_backend]
        mock_match_backends_and_tasks.return_value = [
            (fake_backend, good_task),
        ]
        tasks = [good_task, other_task]
        mock_get_all_slaves_for_blacklist_whitelist.return_value = [{
            'hostname':
            'fakehost',
            'attributes': {
                'fake_discover': 'fakelocation'
            }
        }, {
            'hostname':
            'fakeotherhost',
            'attributes': {
                'fake_discover': 'fakeotherlocation'
            }
        }]
        actual = marathon_serviceinit.status_smartstack_backends(
            service=service,
            instance=instance,
            cluster=cluster,
            job_config=fake_marathon_job_config,
            tasks=tasks,
            expected_count=len(mock_get_backends.return_value),
            soa_dir=None,
            verbose=False,
            synapse_port=123456,
            synapse_haproxy_url_format=DEFAULT_SYNAPSE_HAPROXY_URL_FORMAT,
        )
        mock_get_backends.assert_any_call(
            service_instance,
            synapse_host='fakehost',
            synapse_port=123456,
            synapse_haproxy_url_format=DEFAULT_SYNAPSE_HAPROXY_URL_FORMAT,
        )
        mock_get_backends.assert_any_call(
            service_instance,
            synapse_host='fakeotherhost',
            synapse_port=123456,
            synapse_haproxy_url_format=DEFAULT_SYNAPSE_HAPROXY_URL_FORMAT,
        )
        assert "fakelocation - %s" % PaastaColors.green('Healthy') in actual
        assert "fakeotherlocation - %s" % PaastaColors.green(
            'Healthy') in actual
Exemple #37
0
def check_mark():
    """
    :return: string that can print a checkmark
    """
    return PaastaColors.green(u'\u2713'.encode('utf-8'))
def test_assert_gpu_health():
    ok_status = (3, 1, 2)
    ok_output, ok_health = metastatus_lib.assert_gpu_health(ok_status)
    assert ok_health
    assert "GPUs: 1 / 3 in use (%s)" % PaastaColors.green(
        "33.33%") in ok_output
def test_status_smartstack_backends_verbose_multiple_locations():
    service = "servicename"
    instance = "instancename"
    cluster = "fake_cluster"
    good_task = mock.Mock()
    other_task = mock.Mock()
    fake_backend = {
        "status": "UP",
        "lastchg": "1",
        "last_chk": "OK",
        "check_code": "200",
        "svname": "ipaddress1:1001_hostname1",
        "check_status": "L7OK",
        "check_duration": 1,
    }
    fake_other_backend = {
        "status": "UP",
        "lastchg": "1",
        "last_chk": "OK",
        "check_code": "200",
        "svname": "ipaddress1:1002_hostname2",
        "check_status": "L7OK",
        "check_duration": 1,
    }
    with mock.patch(
            "paasta_tools.marathon_serviceinit.get_all_slaves_for_blacklist_whitelist",
            autospec=True,
    ) as mock_get_all_slaves_for_blacklist_whitelist, mock.patch(
            "paasta_tools.marathon_serviceinit.get_backends",
            autospec=True,
            side_effect=[[fake_backend], [fake_other_backend]],
    ) as mock_get_backends, mock.patch(
            "paasta_tools.marathon_serviceinit.match_backends_and_tasks",
            autospec=True,
            side_effect=[[(fake_backend, good_task)],
                         [(fake_other_backend, good_task)]],
    ):
        fake_service_namespace_config = mock.Mock()
        fake_service_namespace_config.get_discover.return_value = "fake_discover"
        mock_get_all_slaves_for_blacklist_whitelist.return_value = [
            {
                "hostname": "hostname1",
                "attributes": {
                    "fake_discover": "fakelocation"
                }
            },
            {
                "hostname": "hostname2",
                "attributes": {
                    "fake_discover": "fakeotherlocation"
                },
            },
        ]
        tasks = [good_task, other_task]
        actual = marathon_serviceinit.status_smartstack_backends(
            service=service,
            instance=instance,
            cluster=cluster,
            job_config=fake_marathon_job_config,
            service_namespace_config=fake_service_namespace_config,
            tasks=tasks,
            expected_count=1,
            soa_dir=None,
            verbose=True,
            synapse_port=123456,
            synapse_haproxy_url_format=DEFAULT_SYNAPSE_HAPROXY_URL_FORMAT,
            system_deploy_blacklist=[],
            system_deploy_whitelist=[],
        )
        mock_get_backends.assert_any_call(
            "servicename.fake_nerve_ns",
            synapse_host="hostname1",
            synapse_port=123456,
            synapse_haproxy_url_format=DEFAULT_SYNAPSE_HAPROXY_URL_FORMAT,
        )
        mock_get_backends.assert_any_call(
            "servicename.fake_nerve_ns",
            synapse_host="hostname2",
            synapse_port=123456,
            synapse_haproxy_url_format=DEFAULT_SYNAPSE_HAPROXY_URL_FORMAT,
        )
        mock_get_all_slaves_for_blacklist_whitelist.assert_called_once_with(
            blacklist=[], whitelist=[])
        assert "fakelocation - %s" % PaastaColors.green("Healthy") in actual
        assert "hostname1:1001" in actual
        assert "fakeotherlocation - %s" % PaastaColors.green(
            "Healthy") in actual
        assert "hostname2:1002" in actual
def _format_disabled_status(job):
    if job.get("disabled", False):
        status = PaastaColors.grey("Not scheduled")
    else:
        status = PaastaColors.green("Scheduled")
    return status
def test_ok_cpu_health():
    ok_status = (10, 1, 9)
    ok_output, ok_health = metastatus_lib.assert_cpu_health(ok_status)
    assert ok_health
    assert "CPUs: 1.00 / 10 in use (%s)" % PaastaColors.green(
        "10.00%") in ok_output
Exemple #42
0
def test_get_mesos_status(
    mock_get_mesos_stats,
    mock_get_num_masters,
    mock_get_configured_quorum_size,
    mock_getfqdn,
):
    mock_getfqdn.return_value = 'fakename'
    mock_get_mesos_stats.return_value = {
        'master/cpus_total': 10,
        'master/cpus_used': 8,
        'master/mem_total': 10240,
        'master/mem_used': 2048,
        'master/disk_total': 10240,
        'master/disk_used': 3072,
        'master/tasks_running': 3,
        'master/tasks_staging': 4,
        'master/tasks_starting': 0,
        'master/slaves_active': 4,
        'master/slaves_inactive': 0,
    }
    mesos_state = {
        'flags': {
            'zk': 'zk://1.1.1.1:2222/fake_cluster',
            'quorum': 2,
        },
        'frameworks': [
            {
                'name': 'test_framework1',
            },
            {
                'name': 'test_framework1',
            },
        ]
    }
    mock_get_num_masters.return_value = 5
    mock_get_configured_quorum_size.return_value = 3
    expected_cpus_output = "CPUs: 8.00 / 10 in use (%s)" % PaastaColors.green(
        "80.00%")
    expected_mem_output = \
        "Memory: 2.00 / 10.00GB in use (%s)" % PaastaColors.green("20.00%")
    expected_disk_output = "Disk: 3.00 / 10.00GB in use (%s)" % PaastaColors.green(
        "30.00%")
    expected_tasks_output = \
        "tasks: running: 3 staging: 4 starting: 0"
    expected_duplicate_frameworks_output = \
        "frameworks:\n%s" % \
        PaastaColors.red("    CRITICAL: Framework test_framework1 has 2 instances running--expected no more than 1.")
    expected_slaves_output = \
        "slaves: active: 4 inactive: 0"
    expected_masters_quorum_output = \
        "quorum: masters: 5 configured quorum: 3 "

    results = paasta_metastatus.get_mesos_status(mesos_state, verbosity=0)

    assert mock_get_mesos_stats.called_once()
    assert (expected_masters_quorum_output, True) in results
    assert (expected_cpus_output, True) in results
    assert (expected_mem_output, True) in results
    assert (expected_disk_output, True) in results
    assert (expected_tasks_output, True) in results
    assert (expected_duplicate_frameworks_output, False) in results
    assert (expected_slaves_output, True) in results
def test_status_smartstack_backends_verbose_multiple_locations():
    service = 'my_service'
    instance = 'my_instance'
    service_instance = compose_job_id(service, instance)
    cluster = 'fake_cluster'
    good_task = mock.Mock()
    other_task = mock.Mock()
    fake_backend = {
        'status': 'UP',
        'lastchg': '1',
        'last_chk': 'OK',
        'check_code': '200',
        'svname': 'ipaddress1:1001_hostname1',
        'check_status': 'L7OK',
        'check_duration': 1,
    }
    fake_other_backend = {
        'status': 'UP',
        'lastchg': '1',
        'last_chk': 'OK',
        'check_code': '200',
        'svname': 'ipaddress1:1002_hostname2',
        'check_status': 'L7OK',
        'check_duration': 1,
    }
    with mock.patch(
            'paasta_tools.marathon_tools.load_service_namespace_config',
            autospec=True,
    ) as mock_load_service_namespace_config, mock.patch(
            'paasta_tools.marathon_tools.read_registration_for_service_instance',
            autospec=True,
    ) as mock_read_reg, mock.patch(
            'paasta_tools.marathon_serviceinit.get_all_slaves_for_blacklist_whitelist',
            autospec=True,
    ) as mock_get_all_slaves_for_blacklist_whitelist, mock.patch(
            'paasta_tools.marathon_serviceinit.get_backends',
            autospec=True,
            side_effect=[[fake_backend], [fake_other_backend]],
    ) as mock_get_backends, mock.patch(
            'paasta_tools.marathon_serviceinit.match_backends_and_tasks',
            autospec=True,
            side_effect=[[(fake_backend, good_task)],
                         [(fake_other_backend, good_task)]],
    ):
        mock_load_service_namespace_config.return_value.get_discover.return_value = 'fake_discover'
        mock_read_reg.return_value = service_instance
        mock_get_all_slaves_for_blacklist_whitelist.return_value = [
            {
                'hostname': 'hostname1',
                'attributes': {
                    'fake_discover': 'fakelocation',
                },
            },
            {
                'hostname': 'hostname2',
                'attributes': {
                    'fake_discover': 'fakeotherlocation',
                },
            },
        ]
        tasks = [good_task, other_task]
        actual = marathon_serviceinit.status_smartstack_backends(
            service=service,
            instance=instance,
            cluster=cluster,
            job_config=fake_marathon_job_config,
            tasks=tasks,
            expected_count=1,
            soa_dir=None,
            verbose=True,
            synapse_port=123456,
            synapse_haproxy_url_format=DEFAULT_SYNAPSE_HAPROXY_URL_FORMAT,
            system_deploy_blacklist=[],
            system_deploy_whitelist=[],
        )
        mock_get_backends.assert_any_call(
            service_instance,
            synapse_host='hostname1',
            synapse_port=123456,
            synapse_haproxy_url_format=DEFAULT_SYNAPSE_HAPROXY_URL_FORMAT,
        )
        mock_get_backends.assert_any_call(
            service_instance,
            synapse_host='hostname2',
            synapse_port=123456,
            synapse_haproxy_url_format=DEFAULT_SYNAPSE_HAPROXY_URL_FORMAT,
        )
        mock_get_all_slaves_for_blacklist_whitelist.assert_called_once_with(
            blacklist=[],
            whitelist=[],
        )
        assert "fakelocation - %s" % PaastaColors.green('Healthy') in actual
        assert "hostname1:1001" in actual
        assert "fakeotherlocation - %s" % PaastaColors.green(
            'Healthy') in actual
        assert "hostname2:1002" in actual
Exemple #44
0
def check_mark():
    """
    :return: string that can print a checkmark
    """
    return PaastaColors.green('\u2713')
def status_marathon_app(
    marathon_client: marathon_tools.MarathonClient,
    app: marathon_tools.MarathonApp,
    service: str,
    instance: str,
    cluster: str,
    soa_dir: str,
    dashboards: Dict[marathon_tools.MarathonClient, str],
    verbose: int,
) -> Tuple[int, int, str]:
    """Takes a given marathon app object and returns the details about start, times, hosts, etc"""
    output = []
    create_datetime = datetime_from_utc_to_local(isodate.parse_datetime(app.version))
    output.append(get_marathon_dashboard(marathon_client, dashboards, app.id))
    output.append(
        "    "
        + " ".join(
            [
                f"{app.tasks_running} running,",
                f"{app.tasks_healthy} healthy,",
                f"{app.tasks_staged} staged",
                f"out of {app.instances}",
            ]
        )
    )
    output.append(
        "    App created: {} ({})".format(
            str(create_datetime), humanize.naturaltime(create_datetime)
        )
    )

    deploy_status = marathon_tools.get_marathon_app_deploy_status(marathon_client, app)
    app_queue = marathon_tools.get_app_queue(marathon_client, app.id)
    unused_offers_summary = marathon_tools.summarize_unused_offers(app_queue)
    if deploy_status == marathon_tools.MarathonDeployStatus.Delayed:
        _, backoff_seconds = marathon_tools.get_app_queue_status_from_queue(app_queue)
        deploy_status_human = marathon_app_deploy_status_human(
            deploy_status, backoff_seconds
        )
    else:
        deploy_status_human = marathon_app_deploy_status_human(deploy_status)
    output.append(f"    Status: {deploy_status_human}")

    if unused_offers_summary is not None and len(unused_offers_summary) > 0:
        output.append("    Possibly stalled for:")
        output.append(
            "      ".join([f"{k}: {n} times" for k, n in unused_offers_summary.items()])
        )

    if verbose > 0:
        output.append("    Tasks:")
        rows = [
            (
                "Mesos Task ID",
                "Host deployed to",
                "Deployed at what localtime",
                "Health",
            )
        ]
        for task in app.tasks:
            local_deployed_datetime = datetime_from_utc_to_local(task.staged_at)
            if task.host is not None:
                hostname = "{}:{}".format(task.host.split(".")[0], task.ports[0])
            else:
                hostname = "Unknown"
            if not task.health_check_results:
                health_check_status = PaastaColors.grey("N/A")
            elif marathon_tools.is_task_healthy(task):
                health_check_status = PaastaColors.green("Healthy")
            else:
                health_check_status = PaastaColors.red("Unhealthy")

            rows.append(
                (
                    get_short_task_id(task.id),
                    hostname,
                    "{} ({})".format(
                        local_deployed_datetime.strftime("%Y-%m-%dT%H:%M"),
                        humanize.naturaltime(local_deployed_datetime),
                    ),
                    health_check_status,
                )
            )
        output.append("\n".join(["      %s" % line for line in format_table(rows)]))
        if len(app.tasks) == 0:
            output.append("      No tasks associated with this marathon app")
    return deploy_status, app.tasks_running, "\n".join(output)
Exemple #46
0
def simulate_healthcheck_on_service(
    instance_config,
    docker_client,
    container_id,
    healthcheck_mode,
    healthcheck_data,
    healthcheck_enabled,
):
    """Simulates Marathon-style healthcheck on given service if healthcheck is enabled

    :param instance_config: service manifest
    :param docker_client: Docker client object
    :param container_id: Docker container id
    :param healthcheck_data: tuple url to healthcheck
    :param healthcheck_enabled: boolean
    :returns: healthcheck_passed: boolean
    """
    healthcheck_link = PaastaColors.cyan(healthcheck_data)
    if healthcheck_enabled:
        grace_period = instance_config.get_healthcheck_grace_period_seconds()
        timeout = instance_config.get_healthcheck_timeout_seconds()
        interval = instance_config.get_healthcheck_interval_seconds()
        max_failures = instance_config.get_healthcheck_max_consecutive_failures()

        paasta_print('\nStarting health check via %s (waiting %s seconds before '
                     'considering failures due to grace period):' % (healthcheck_link, grace_period))

        # silenty start performing health checks until grace period ends or first check succeeds
        graceperiod_end_time = time.time() + grace_period
        after_grace_period_attempts = 0
        while True:
            # First inspect the container for early exits
            container_state = docker_client.inspect_container(container_id)
            if not container_state['State']['Running']:
                paasta_print(
                    PaastaColors.red('Container exited with code {}'.format(
                        container_state['State']['ExitCode'],
                    )),
                )
                healthcheck_passed = False
                break

            healthcheck_passed, healthcheck_output = run_healthcheck_on_container(
                docker_client, container_id, healthcheck_mode, healthcheck_data, timeout,
            )

            # Yay, we passed the healthcheck
            if healthcheck_passed:
                paasta_print("{}'{}' (via {})".format(
                    PaastaColors.green("Healthcheck succeeded!: "),
                    healthcheck_output,
                    healthcheck_link,
                ))
                break

            # Otherwise, print why we failed
            if time.time() < graceperiod_end_time:
                color = PaastaColors.grey
                msg = '(disregarded due to grace period)'
                extra_msg = f' (via: {healthcheck_link}. Output: {healthcheck_output})'
            else:
                # If we've exceeded the grace period, we start incrementing attempts
                after_grace_period_attempts += 1
                color = PaastaColors.red
                msg = '(Attempt {} of {})'.format(
                    after_grace_period_attempts, max_failures,
                )
                extra_msg = f' (via: {healthcheck_link}. Output: {healthcheck_output})'

            paasta_print('{}{}'.format(
                color(f'Healthcheck failed! {msg}'),
                extra_msg,
            ))

            if after_grace_period_attempts == max_failures:
                break

            time.sleep(interval)
    else:
        paasta_print('\nPaaSTA would have healthchecked your service via\n%s' % healthcheck_link)
        healthcheck_passed = True
    return healthcheck_passed
Exemple #47
0
def simulate_healthcheck_on_service(instance_config, docker_client,
                                    container_id, healthcheck_mode,
                                    healthcheck_data, healthcheck_enabled):
    """Simulates Marathon-style healthcheck on given service if healthcheck is enabled

    :param instance_config: service manifest
    :param docker_client: Docker client object
    :param container_id: Docker container id
    :param healthcheck_data: tuple url to healthcheck
    :param healthcheck_enabled: boolean
    :returns: if healthcheck_enabled is true, then returns output of healthcheck, otherwise simply returns true
    """
    healthcheck_link = PaastaColors.cyan(healthcheck_data)
    if healthcheck_enabled:
        grace_period = instance_config.get_healthcheck_grace_period_seconds()
        timeout = instance_config.get_healthcheck_timeout_seconds()
        interval = instance_config.get_healthcheck_interval_seconds()
        max_failures = instance_config.get_healthcheck_max_consecutive_failures(
        )

        sys.stdout.write(
            '\nStarting health check via %s (waiting %s seconds before '
            'considering failures due to grace period):\n' %
            (healthcheck_link, grace_period))

        # silenty start performing health checks until grace period ends or first check succeeds
        graceperiod_end_time = time.time() + grace_period
        while True:
            healthcheck_succeeded = run_healthcheck_on_container(
                docker_client, container_id, healthcheck_mode,
                healthcheck_data, timeout)
            if healthcheck_succeeded or time.time() > graceperiod_end_time:
                break
            else:
                sys.stdout.write("%s\n" % PaastaColors.grey(
                    "Healthcheck failed (disregarded due to grace period)"))
            time.sleep(interval)

        failure = False
        for attempt in range(1, max_failures + 1):
            healthcheck_succeeded = run_healthcheck_on_container(
                docker_client, container_id, healthcheck_mode,
                healthcheck_data, timeout)
            if healthcheck_succeeded:
                sys.stdout.write("%s (via: %s)\n" %
                                 (PaastaColors.green("Healthcheck succeeded!"),
                                  healthcheck_link))
                failure = False
                break
            else:
                sys.stdout.write("%s (via: %s)\n" % (PaastaColors.red(
                    "Healthcheck failed! (Attempt %d of %d)" %
                    (attempt, max_failures)), healthcheck_link))
                failure = True
            time.sleep(interval)

        if failure:
            healthcheck_status = False
        else:
            healthcheck_status = True
    else:
        sys.stdout.write(
            '\nMesos would have healthchecked your service via\n%s\n' %
            healthcheck_link)
        healthcheck_status = True
    return healthcheck_status
Exemple #48
0
def paasta_rerun(args):
    """Reruns a Chronos job.
    :param args: argparse.Namespace obj created from sys.args by cli"""
    soa_dir = args.soa_dir
    service = figure_out_service_name(args, soa_dir)  # exit with an error if the service doesn't exist
    if args.execution_date:
        execution_date = args.execution_date
    else:
        execution_date = None

    all_clusters = list_clusters(soa_dir=soa_dir)
    actual_deployments = get_actual_deployments(service, soa_dir)  # cluster.instance: sha
    if actual_deployments:
        deploy_pipeline = list(get_planned_deployments(service, soa_dir))  # cluster.instance
        deployed_clusters = list_deployed_clusters(deploy_pipeline, actual_deployments)
        deployed_cluster_instance = _get_cluster_instance(actual_deployments.keys())

    if args.clusters is not None:
        clusters = args.clusters.split(",")
    else:
        clusters = deployed_clusters

    for cluster in clusters:
        print "cluster: %s" % cluster

        if cluster not in all_clusters:
            print "  Warning: \"%s\" does not look like a valid cluster." % cluster
            continue
        if cluster not in deployed_clusters:
            print "  Warning: service \"%s\" has not been deployed to \"%s\" yet." % (service, cluster)
            continue
        if not deployed_cluster_instance[cluster].get(args.instance, False):
            print ("  Warning: instance \"%s\" is either invalid "
                   "or has not been deployed to \"%s\" yet." % (args.instance, cluster))
            continue

        try:
            chronos_job_config = chronos_tools.load_chronos_job_config(
                service, args.instance, cluster, load_deployments=False, soa_dir=soa_dir)
            if chronos_tools.uses_time_variables(chronos_job_config) and execution_date is None:
                print ("  Warning: \"%s\" uses time variables interpolation, "
                       "please supply a `--execution_date` argument." % args.instance)
                continue
        except chronos_tools.UnknownChronosJobError as e:
            print "  Warning: %s" % e.message
            continue
        if execution_date is None:
            execution_date = _get_default_execution_date()

        rc, output = execute_chronos_rerun_on_remote_master(
            service=service,
            instancename=args.instance,
            cluster=cluster,
            verbose=args.verbose,
            execution_date=execution_date.strftime(chronos_tools.EXECUTION_DATE_FORMAT)
        )
        if rc == 0:
            print PaastaColors.green('  successfully created job')
        else:
            print PaastaColors.red('  error')
            print output
Exemple #49
0
def test_generate_summary_for_results_ok():
    assert (metastatus_lib.generate_summary_for_check(
        "Myservice",
        True) == "Myservice Status: %s" % PaastaColors.green("OK"))