def test_get_mesos_status(
    mock_get_mesos_stats,
    mock_get_num_masters,
    mock_get_configured_quorum_size,
    mock_getfqdn,
):
    mock_getfqdn.return_value = 'fakename'
    mock_get_mesos_stats.return_value = {
        'master/cpus_total': 10,
        'master/cpus_used': 8,
        'master/mem_total': 10240,
        'master/mem_used': 2048,
        'master/disk_total': 10240,
        'master/disk_used': 3072,
        'master/tasks_running': 3,
        'master/tasks_staging': 4,
        'master/tasks_starting': 0,
        'master/slaves_active': 4,
        'master/slaves_inactive': 0,
    }
    mesos_state = {
        'flags': {
            'zk': 'zk://1.1.1.1:2222/fake_cluster',
            'quorum': 2,
        },
        'frameworks': [
            {
                'name': 'test_framework1',
            },
            {
                'name': 'test_framework1',
            },
        ]
    }
    mock_get_num_masters.return_value = 5
    mock_get_configured_quorum_size.return_value = 3
    expected_cpus_output = "CPUs: 8.00 / 10 in use (%s)" % PaastaColors.green("80.00%")
    expected_mem_output = \
        "Memory: 2.00 / 10.00GB in use (%s)" % PaastaColors.green("20.00%")
    expected_disk_output = "Disk: 3.00 / 10.00GB in use (%s)" % PaastaColors.green("30.00%")
    expected_tasks_output = \
        "tasks: running: 3 staging: 4 starting: 0"
    expected_duplicate_frameworks_output = \
        "frameworks:\n%s" % \
        PaastaColors.red("    CRITICAL: Framework test_framework1 has 2 instances running--expected no more than 1.")
    expected_slaves_output = \
        "slaves: active: 4 inactive: 0"
    expected_masters_quorum_output = \
        "quorum: masters: 5 configured quorum: 3 "

    results = paasta_metastatus.get_mesos_status(mesos_state, verbosity=0)

    assert mock_get_mesos_stats.called_once()
    assert (expected_masters_quorum_output, True) in results
    assert (expected_cpus_output, True) in results
    assert (expected_mem_output, True) in results
    assert (expected_disk_output, True) in results
    assert (expected_tasks_output, True) in results
    assert (expected_duplicate_frameworks_output, False) in results
    assert (expected_slaves_output, True) in results
Example #2
0
def marathon_deployments_check(service):
    """Checks for consistency between deploy.yaml and the marathon yamls"""
    the_return = True
    pipeline_deployments = get_pipeline_config(service)
    pipeline_steps = [step['instancename'] for step in pipeline_deployments]
    pipeline_steps = [step for step in pipeline_steps if step not in DEPLOY_PIPELINE_NON_DEPLOY_STEPS]
    marathon_steps = get_marathon_steps(service)
    in_marathon_not_deploy = set(marathon_steps) - set(pipeline_steps)
    if len(in_marathon_not_deploy) > 0:
        print "%s There are some instance(s) you have asked to run in marathon that" % x_mark()
        print "  do not have a corresponding entry in deploy.yaml:"
        print "  %s" % PaastaColors.bold(", ".join(in_marathon_not_deploy))
        print "  You should probably add entries to deploy.yaml for them so they"
        print "  are deployed to those clusters."
        the_return = False
    in_deploy_not_marathon = set(pipeline_steps) - set(marathon_steps)
    if len(in_deploy_not_marathon) > 0:
        print "%s There are some instance(s) in deploy.yaml that are not referenced" % x_mark()
        print "  by any marathon instance:"
        print "  %s" % PaastaColors.bold((", ".join(in_deploy_not_marathon)))
        print "  You should probably delete these deploy.yaml entries if they are unused."
        the_return = False
    if the_return is True:
        print success("All entries in deploy.yaml correspond to a marathon entry")
        print success("All marathon instances have a corresponding deploy.yaml entry")
    return the_return
Example #3
0
def paasta_status(args):
    """Print the status of a Yelp service running on PaaSTA.
    :param args: argparse.Namespace obj created from sys.args by cli"""
    soa_dir = args.soa_dir
    service = figure_out_service_name(args, soa_dir)
    actual_deployments = get_actual_deployments(service, soa_dir)
    system_paasta_config = load_system_paasta_config()

    if args.clusters is not None:
        cluster_whitelist = args.clusters.split(",")
    else:
        cluster_whitelist = []
    if args.instances is not None:
        instance_whitelist = args.instances.split(",")
    else:
        instance_whitelist = []

    if actual_deployments:
        deploy_pipeline = list(get_planned_deployments(service, soa_dir))
        try:
            report_status(
                service=service,
                deploy_pipeline=deploy_pipeline,
                actual_deployments=actual_deployments,
                cluster_whitelist=cluster_whitelist,
                instance_whitelist=instance_whitelist,
                system_paasta_config=system_paasta_config,
                verbose=args.verbose,
            )
        except CalledProcessError as e:
            print PaastaColors.grey(PaastaColors.bold(e.cmd + " exited with non-zero return code."))
            print PaastaColors.grey(e.output)
            return e.returncode
    else:
        print missing_deployments_message(service)
Example #4
0
def report_status_for_cluster(service, cluster, deploy_pipeline, actual_deployments, verbose=False):
    """With a given service and cluster, prints the status of the instances
    in that cluster"""
    # Get cluster.instance in the order in which they appear in deploy.yaml
    print
    print "cluster: %s" % cluster
    for namespace in deploy_pipeline:
        cluster_in_pipeline, instance = namespace.split('.')

        if cluster_in_pipeline != cluster:
            # This function only prints things that are relevant to cluster
            # We skip anything not in this cluster
            continue

        # Case: service deployed to cluster.instance
        if namespace in actual_deployments:
            unformatted_instance = instance
            instance = PaastaColors.blue(instance)
            version = actual_deployments[namespace][:8]
            # TODO: Perform sanity checks once per cluster instead of for each namespace
            status = execute_paasta_serviceinit_on_remote_master('status', cluster, service, unformatted_instance,
                                                                 verbose=verbose)
        # Case: service NOT deployed to cluster.instance
        else:
            instance = PaastaColors.red(instance)
            version = 'None'
            status = None

        print '  instance: %s' % instance
        print '    Git sha:    %s' % version
        if status is not None:
            for line in status.rstrip().split('\n'):
                print '    %s' % line
def _format_disabled_status(job):
    status = PaastaColors.red("UNKNOWN")
    if job.get("disabled", False):
        status = PaastaColors.grey("Not scheduled")
    else:
        status = PaastaColors.green("Scheduled")
    return status
Example #6
0
def test_status_smartstack_backends_multiple_locations():
    service = 'my_service'
    instance = 'my_instance'
    service_instance = compose_job_id(service, instance)
    cluster = 'fake_cluster'
    good_task = mock.Mock()
    other_task = mock.Mock()
    fake_backend = {'status': 'UP', 'lastchg': '1', 'last_chk': 'OK',
                    'check_code': '200', 'svname': 'ipaddress1:1001_hostname1',
                    'check_status': 'L7OK', 'check_duration': 1}
    with contextlib.nested(
        mock.patch('paasta_tools.marathon_tools.load_service_namespace_config', autospec=True),
        mock.patch('paasta_tools.marathon_tools.read_namespace_for_service_instance'),
        mock.patch('paasta_tools.marathon_serviceinit.get_mesos_slaves_grouped_by_attribute'),
        mock.patch('paasta_tools.marathon_serviceinit.get_backends', autospec=True),
        mock.patch('paasta_tools.marathon_serviceinit.match_backends_and_tasks', autospec=True),
    ) as (
        mock_load_service_namespace_config,
        mock_read_ns,
        mock_get_mesos_slaves_grouped_by_attribute,
        mock_get_backends,
        mock_match_backends_and_tasks,
    ):
        mock_load_service_namespace_config.return_value.get_discover.return_value = 'fake_discover'
        mock_read_ns.return_value = instance
        mock_get_backends.return_value = [fake_backend]
        mock_match_backends_and_tasks.return_value = [
            (fake_backend, good_task),
        ]
        tasks = [good_task, other_task]
        mock_get_mesos_slaves_grouped_by_attribute.return_value = {
            'fake_location1': ['fakehost1'],
            'fake_location2': ['fakehost2'],
        }
        actual = marathon_serviceinit.status_smartstack_backends(
            service=service,
            instance=instance,
            cluster=cluster,
            job_config=fake_marathon_job_config,
            tasks=tasks,
            expected_count=len(mock_get_backends.return_value),
            soa_dir=None,
            verbose=False,
            synapse_port=123456,
            synapse_haproxy_url_format=DEFAULT_SYNAPSE_HAPROXY_URL_FORMAT,
        )
        mock_get_backends.assert_any_call(
            service_instance,
            synapse_host='fakehost1',
            synapse_port=123456,
            synapse_haproxy_url_format=DEFAULT_SYNAPSE_HAPROXY_URL_FORMAT,
        )
        mock_get_backends.assert_any_call(
            service_instance,
            synapse_host='fakehost2',
            synapse_port=123456,
            synapse_haproxy_url_format=DEFAULT_SYNAPSE_HAPROXY_URL_FORMAT,
        )
        assert "fake_location1 - %s" % PaastaColors.green('Healthy') in actual
        assert "fake_location2 - %s" % PaastaColors.green('Healthy') in actual
Example #7
0
def guess_instance(service, cluster, args):
    """Returns instance from args if available, otherwise uses 'main' if it is a valid instance,
    otherwise takes a good guess and returns the first instance available"""
    if args.instance:
        instance = args.instance
    else:
        try:
            instances = list_all_instances_for_service(
                service=service, clusters=[cluster], instance_type=None, soa_dir=args.yelpsoa_config_root
            )
            if "main" in instances:
                instance = "main"
            else:
                instance = list(instances)[0]
        except NoConfigurationForServiceError:
            sys.stderr.write(
                PaastaColors.red(
                    "Could not automatically detect instance to emulate. Please specify one with the --instance option.\n"
                )
            )
            sys.exit(2)
        sys.stderr.write(
            PaastaColors.yellow(
                "Guessing instance configuration for %s. To override, use the --instance option.\n" % instance
            )
        )
    return instance
def start_chronos_job(service, instance, job_id, client, cluster, job_config, complete_job_config, emergency=False):
    """
    Calls the 'manual start' Chronos endpoint (https://mesos.github.io/chronos/docs/api.html#manually-starting-a-job),
    running the job now regardless of its 'schedule'. The job's "schedule" is unmodified. If a job is disabled,
    this function does not do anything.
    """
    name = PaastaColors.cyan(job_id)

    # The job should be run immediately as long as the job is not disabled via the 'disabled' key in soa-configs or has
    # been previously stopped.
    if complete_job_config['disabled']:
        print PaastaColors.red("You cannot emergency start a disabled job. Run `paasta start` first.")
    else:
        log_reason = PaastaColors.red("EmergencyStart") if emergency else "Brutal bounce"
        _log(
            service=service,
            line="%s: Starting manual run of %s in Chronos" % (log_reason, name),
            component="deploy",
            level="event",
            cluster=cluster,
            instance=instance
        )

        client.update(complete_job_config)
        client.run(job_id)
Example #9
0
def generate_summary_for_check(name, ok):
    """Given a check name and a boolean indicating if the service is OK, return
    a formatted message.
    """
    status = PaastaColors.green("OK") if ok is True else PaastaColors.red("CRITICAL")
    summary = "%s Status: %s" % (name, status)
    return summary
Example #10
0
def get_service_info(service):
    service_configuration = read_service_configuration(service)
    description = service_configuration.get('description', NO_DESCRIPTION_MESSAGE)
    external_link = service_configuration.get('external_link', NO_EXTERNAL_LINK_MESSAGE)
    pipeline_url = get_pipeline_url(service)
    smartstack_endpoints = get_smartstack_endpoints(service)
    git_url = get_git_url(service)

    output = []
    output.append('Service Name: %s' % service)
    output.append('Description: %s' % description)
    output.append('External Link: %s' % PaastaColors.cyan(external_link))
    output.append('Monitored By: team %s' % get_team(service=service, overrides={}))
    output.append('Runbook: %s' % PaastaColors.cyan(get_runbook(service=service, overrides={})))
    output.append('Git Repo: %s' % git_url)
    output.append('Jenkins Pipeline: %s' % pipeline_url)
    output.append('Deployed to the following clusters:')
    output.extend(get_deployments_strings(service))
    if smartstack_endpoints:
        output.append('Smartstack endpoint(s):')
        for endpoint in smartstack_endpoints:
            output.append(' - %s' % endpoint)
    output.append('Dashboard(s):')
    output.extend(get_dashboard_urls(service))

    return '\n'.join(output)
Example #11
0
def format_haproxy_backend_row(backend, is_correct_instance):
    """Pretty Prints the status of a given haproxy backend
    Takes the fields described in the CSV format of haproxy:
    http://www.haproxy.org/download/1.5/doc/configuration.txt
    And tries to make a good guess about how to represent them in text
    """
    backend_name = backend['svname']
    backend_hostname = backend_name.split("_")[-1]
    backend_port = backend_name.split("_")[0].split(":")[-1]
    pretty_backend_name = "%s:%s" % (backend_hostname, backend_port)
    if backend['status'] == "UP":
        status = PaastaColors.default(backend['status'])
    elif backend['status'] == 'DOWN' or backend['status'] == 'MAINT':
        status = PaastaColors.red(backend['status'])
    else:
        status = PaastaColors.yellow(backend['status'])
    lastcheck = "%s/%s in %sms" % (backend['check_status'], backend['check_code'], backend['check_duration'])
    lastchange = humanize.naturaltime(datetime.timedelta(seconds=int(backend['lastchg'])))

    row = (
        '      %s' % pretty_backend_name,
        lastcheck,
        lastchange,
        status,
    )

    if is_correct_instance:
        return row
    else:
        return tuple(PaastaColors.grey(remove_ansi_escape_sequences(col)) for col in row)
Example #12
0
def paasta_rollback(args):
    """Call mark_for_deployment with rollback parameters
    :param args: contains all the arguments passed onto the script: service,
    deploy groups and sha. These arguments will be verified and passed onto
    mark_for_deployment.
    """
    service = figure_out_service_name(args)
    git_url = get_git_url(service)
    commit = args.commit
    given_deploy_groups = [deploy_group for deploy_group in args.deploy_groups.split(",") if deploy_group]

    service_deploy_groups = set(config.get_deploy_group() for config in get_instance_config_for_service(
        soa_dir=DEFAULT_SOA_DIR,
        service=service,
    ))
    deploy_groups, invalid = validate_given_deploy_groups(service_deploy_groups, given_deploy_groups)
    if len(invalid) > 0:
        print PaastaColors.yellow("These deploy groups are not valid and will be skipped: %s.\n" % (",").join(invalid))

    if len(deploy_groups) == 0:
        print PaastaColors.red("ERROR: No valid deploy groups specified for %s.\n" % (service))
        returncode = 1

    for deploy_group in deploy_groups:
        returncode = mark_for_deployment(
            git_url=git_url,
            service=service,
            deploy_group=deploy_group,
            commit=commit,
        )

    sys.exit(returncode)
Example #13
0
def report_status_for_cluster(service, cluster, deploy_pipeline, actual_deployments, instance_whitelist, verbose=0):
    """With a given service and cluster, prints the status of the instances
    in that cluster"""
    print
    print "cluster: %s" % cluster
    seen_instances = []
    for namespace in deploy_pipeline:
        cluster_in_pipeline, instance = namespace.split('.')
        seen_instances.append(instance)

        if cluster_in_pipeline != cluster:
            continue
        if instance_whitelist and instance not in instance_whitelist:
            continue

        # Case: service deployed to cluster.instance
        if namespace in actual_deployments:
            formatted_instance = PaastaColors.blue(instance)
            version = actual_deployments[namespace][:8]
            # TODO: Perform sanity checks once per cluster instead of for each namespace
            status = execute_paasta_serviceinit_on_remote_master('status', cluster, service, instance, verbose=verbose)
        # Case: service NOT deployed to cluster.instance
        else:
            formatted_instance = PaastaColors.red(instance)
            version = 'None'
            status = None

        print '  instance: %s' % formatted_instance
        print '    Git sha:    %s' % version
        if status is not None:
            for line in status.rstrip().split('\n'):
                print '    %s' % line

    print report_invalid_whitelist_values(instance_whitelist, seen_instances, 'instance')
Example #14
0
def status_mesos_tasks_verbose(job_id, get_short_task_id):
    """Returns detailed information about the mesos tasks for a service.

    :param job_id: An id used for looking up Mesos tasks
    :param get_short_task_id: A function which given a
                              task_id returns a short task_id suitable for
                              printing.
    """
    output = []
    running_and_active_tasks = get_running_tasks_from_active_frameworks(job_id)
    output.append("  Running Tasks:")
    rows_running = [[
        "Mesos Task ID",
        "Host deployed to",
        "Ram",
        "CPU",
        "Deployed at what localtime"
    ]]
    for task in running_and_active_tasks:
        rows_running.append(format_running_mesos_task_row(task, get_short_task_id))
    output.extend(["    %s" % row for row in format_table(rows_running)])

    non_running_tasks = reversed(get_non_running_tasks_from_active_frameworks(job_id)[-10:])
    output.append(PaastaColors.grey("  Non-Running Tasks"))
    rows_non_running = [[
        PaastaColors.grey("Mesos Task ID"),
        PaastaColors.grey("Host deployed to"),
        PaastaColors.grey("Deployed at what localtime"),
        PaastaColors.grey("Status"),
    ]]
    for task in non_running_tasks:
        rows_non_running.append(format_non_running_mesos_task_row(task, get_short_task_id))
    output.extend(["    %s" % row for row in format_table(rows_non_running)])

    return "\n".join(output)
Example #15
0
def main():
    marathon_config = None
    chronos_config = None
    args = parse_args()

    try:
        mesos_state = get_mesos_state_from_leader()
    except MasterNotAvailableException as e:
        # if we can't connect to master at all,
        # then bomb out early
        print(PaastaColors.red("CRITICAL:  %s" % e.message))
        sys.exit(2)
    mesos_results = get_mesos_status(mesos_state, verbosity=args.verbose,
                                     humanize_output=args.humanize)

    # Check to see if Marathon should be running here by checking for config
    try:
        marathon_config = marathon_tools.load_marathon_config()
    except MarathonNotConfigured:
        marathon_results = [('marathon is not configured to run here', True)]

    # Check to see if Chronos should be running here by checking for config
    try:
        chronos_config = load_chronos_config()
    except ChronosNotConfigured:
        chronos_results = [('chronos is not configured to run here', True)]

    if marathon_config:
        marathon_client = get_marathon_client(marathon_config)
        try:
            marathon_results = get_marathon_status(marathon_client)
        except MarathonError as e:
            print(PaastaColors.red("CRITICAL: Unable to contact Marathon! Error: %s" % e))
            sys.exit(2)

    if chronos_config:
        chronos_client = get_chronos_client(chronos_config)
        try:
            chronos_results = get_chronos_status(chronos_client)
        except ServerNotFoundError as e:
            print(PaastaColors.red("CRITICAL: Unable to contact Chronos! Error: %s" % e))
            sys.exit(2)

    mesos_ok = all(status_for_results(mesos_results))
    marathon_ok = all(status_for_results(marathon_results))
    chronos_ok = all(status_for_results(chronos_results))

    mesos_summary = generate_summary_for_check("Mesos", mesos_ok)
    marathon_summary = generate_summary_for_check("Marathon", marathon_ok)
    chronos_summary = generate_summary_for_check("Chronos", chronos_ok)

    print_results_for_healthchecks(mesos_summary, mesos_ok, mesos_results, args.verbose)
    print_results_for_healthchecks(marathon_summary, marathon_ok, marathon_results, args.verbose)
    print_results_for_healthchecks(chronos_summary, chronos_ok, chronos_results, args.verbose)

    if not all([mesos_ok, marathon_ok, chronos_ok]):
        sys.exit(2)
    else:
        sys.exit(0)
Example #16
0
 def get_desired_state_human(self):
     desired_state = self.get_desired_state()
     if desired_state == "start":
         return PaastaColors.bold("Scheduled")
     elif desired_state == "stop":
         return PaastaColors.bold("Disabled")
     else:
         return PaastaColors.red("Unknown (desired_state: %s)" % desired_state)
Example #17
0
def format_non_running_mesos_task_row(task, get_short_task_id):
    """Returns a pretty formatted string of a running mesos task attributes"""
    return (
        PaastaColors.grey(get_short_task_id(task['id'])),
        PaastaColors.grey(get_short_hostname_from_task(task)),
        PaastaColors.grey(get_first_status_timestamp(task)),
        PaastaColors.grey(task['state']),
    )
def _format_schedule(job):
    if job.get('parents') is not None:
        schedule = PaastaColors.yellow("None (Dependent Job).")
    else:
        schedule = job.get("schedule", PaastaColors.red("UNKNOWN"))
    epsilon = job.get("epsilon", PaastaColors.red("UNKNOWN"))
    formatted_schedule = "%s Epsilon: %s" % (schedule, epsilon)
    return formatted_schedule
Example #19
0
 def get_desired_state_human(self):
     desired_state = self.get_desired_state()
     if desired_state == 'start':
         return PaastaColors.bold('Scheduled')
     elif desired_state == 'stop':
         return PaastaColors.bold('Disabled')
     else:
         return PaastaColors.red('Unknown (desired_state: %s)' % desired_state)
Example #20
0
def desired_state_human(desired_state, instances):
    if desired_state == 'start' and instances != 0:
        return PaastaColors.bold('Started')
    elif desired_state == 'start' and instances == 0:
        return PaastaColors.bold('Stopped')
    elif desired_state == 'stop':
        return PaastaColors.red('Stopped')
    else:
        return PaastaColors.red('Unknown (desired_state: %s)' % desired_state)
Example #21
0
def bouncing_status_human(app_count, bounce_method):
    if app_count == 0:
        return PaastaColors.red("Disabled")
    elif app_count == 1:
        return PaastaColors.green("Configured")
    elif app_count > 1:
        return PaastaColors.yellow("Bouncing (%s)" % bounce_method)
    else:
        return PaastaColors.red("Unknown (count: %s)" % app_count)
Example #22
0
 def get_desired_state_human(self):
     desired_state = self.get_desired_state()
     if desired_state == 'start' and self.get_instances() != 0:
         return PaastaColors.bold('Started')
     elif desired_state == 'start' and self.get_instances() == 0:
         return PaastaColors.bold('Stopped')
     elif desired_state == 'stop':
         return PaastaColors.red('Stopped')
     else:
         return PaastaColors.red('Unknown (desired_state: %s)' % desired_state)
def _format_mesos_status(job, running_tasks):
    mesos_status = PaastaColors.red("UNKNOWN")
    num_tasks = len(running_tasks)
    if num_tasks == 0:
        mesos_status = PaastaColors.grey("Not running")
    elif num_tasks == 1:
        mesos_status = PaastaColors.yellow("Running")
    else:
        mesos_status = PaastaColors.red("Critical - %d tasks running (expected 1)" % num_tasks)
    return mesos_status
Example #24
0
 def get_desired_state_human(self):
     desired_state = self.get_desired_state()
     if desired_state == "start" and self.get_instances() != 0:
         return PaastaColors.bold("Started")
     elif desired_state == "start" and self.get_instances() == 0:
         return PaastaColors.bold("Stopped")
     elif desired_state == "stop":
         return PaastaColors.red("Stopped")
     else:
         return PaastaColors.red("Unknown (desired_state: %s)" % desired_state)
Example #25
0
def print_results_for_healthchecks(ok, results, verbose, indent=2):
    if verbose >= 1:
        for health_check_result in results:
            if health_check_result.healthy:
                print_with_indent(health_check_result.message, indent)
            else:
                print_with_indent(PaastaColors.red(health_check_result.message), indent)
    elif not ok:
        unhealthy_results = critical_events_in_outputs(results)
        for health_check_result in unhealthy_results:
            print_with_indent(PaastaColors.red(health_check_result.message), indent)
def _format_schedule(job):
    if job.get('parents') is not None:
        schedule = PaastaColors.yellow("None (Dependent Job).")
    else:
        schedule = job.get("schedule", PaastaColors.red("UNKNOWN"))
    epsilon = job.get("epsilon", PaastaColors.red("UNKNOWN"))
    schedule_time_zone = job.get("scheduleTimeZone", "null")
    if schedule_time_zone == "null":  # This is what Chronos returns.
        schedule_time_zone = "UTC"
    formatted_schedule = "%s (%s) Epsilon: %s" % (schedule, schedule_time_zone, epsilon)
    return formatted_schedule
Example #27
0
def assert_cpu_health(metrics, threshold=10):
    total, used, available = get_mesos_cpu_status(metrics)
    perc_used = percent_used(total, used)
    if check_threshold(perc_used, threshold):
        return ("CPUs: %.2f / %d in use (%s)"
                % (used, total, PaastaColors.green("%.2f%%" % perc_used)),
                True)
    else:
        return (PaastaColors.red(
                "CRITICAL: Less than %d%% CPUs available. (Currently using %.2f%%)"
                % (threshold, perc_used)),
                False)
Example #28
0
def get_bouncing_status(service, instance, client, job_config):
    apps = marathon_tools.get_matching_appids(service, instance, client)
    bounce_method = job_config.get_bounce_method()
    app_count = len(apps)
    if app_count == 0:
        return PaastaColors.red("Stopped")
    elif app_count == 1:
        return PaastaColors.green("Running")
    elif app_count > 1:
        return PaastaColors.yellow("Bouncing (%s)" % bounce_method)
    else:
        return PaastaColors.red("Unknown (count: %s)" % app_count)
def test_status_smartstack_backends_multiple_locations():
    service = "my_service"
    instance = "my_instance"
    service_instance = compose_job_id(service, instance)
    cluster = "fake_cluster"
    good_task = mock.Mock()
    other_task = mock.Mock()
    fake_backend = {
        "status": "UP",
        "lastchg": "1",
        "last_chk": "OK",
        "check_code": "200",
        "svname": "ipaddress1:1001_hostname1",
        "check_status": "L7OK",
        "check_duration": 1,
    }
    with contextlib.nested(
        mock.patch("paasta_tools.marathon_tools.load_service_namespace_config", autospec=True),
        mock.patch("paasta_tools.marathon_tools.read_namespace_for_service_instance"),
        mock.patch("paasta_tools.marathon_serviceinit.get_mesos_slaves_grouped_by_attribute"),
        mock.patch("paasta_tools.marathon_serviceinit.get_backends", autospec=True),
        mock.patch("paasta_tools.marathon_serviceinit.match_backends_and_tasks", autospec=True),
    ) as (
        mock_load_service_namespace_config,
        mock_read_ns,
        mock_get_mesos_slaves_grouped_by_attribute,
        mock_get_backends,
        mock_match_backends_and_tasks,
    ):
        mock_load_service_namespace_config.return_value.get_discover.return_value = "fake_discover"
        mock_read_ns.return_value = instance
        mock_get_backends.return_value = [fake_backend]
        mock_match_backends_and_tasks.return_value = [(fake_backend, good_task)]
        tasks = [good_task, other_task]
        mock_get_mesos_slaves_grouped_by_attribute.return_value = {
            "fake_location1": ["fakehost1"],
            "fake_location2": ["fakehost2"],
        }
        actual = marathon_serviceinit.status_smartstack_backends(
            service=service,
            instance=instance,
            cluster=cluster,
            job_config=fake_marathon_job_config,
            tasks=tasks,
            expected_count=len(mock_get_backends.return_value),
            soa_dir=None,
            verbose=False,
        )
        mock_get_backends.assert_any_call(service_instance, synapse_host="fakehost1", synapse_port=DEFAULT_SYNAPSE_PORT)
        mock_get_backends.assert_any_call(service_instance, synapse_host="fakehost2", synapse_port=DEFAULT_SYNAPSE_PORT)
        assert "fake_location1 - %s" % PaastaColors.green("Healthy") in actual
        assert "fake_location2 - %s" % PaastaColors.green("Healthy") in actual
Example #30
0
def get_cluster_dashboards(cluster):
    """Returns the direct dashboards for humans to use for a given cluster"""
    SPACER = ' '
    try:
        dashboards = load_system_paasta_config().get_dashboard_links()[cluster]
    except KeyError:
        output = [PaastaColors.red('No dashboards configured for %s!' % cluster)]
    else:
        output = ['Dashboards:']
        spacing = max((len(label) for label in dashboards.keys())) + 1
        for label, url in dashboards.items():
            output.append('  %s:%s%s' % (label, SPACER * (spacing - len(label)), PaastaColors.cyan(url)))
    return '\n'.join(output)
Example #31
0
def paasta_local_run(args):
    if args.action == 'pull' and os.geteuid() != 0 and not docker_config_available():
        paasta_print("Re-executing paasta local-run --pull with sudo..")
        os.execvp("sudo", ["sudo", "-H"] + sys.argv)
    if args.action == 'build' and not makefile_responds_to('cook-image'):
        paasta_print("A local Makefile with a 'cook-image' target is required for --build", file=sys.stderr)
        paasta_print("If you meant to pull the docker image from the registry, explicitly pass --pull", file=sys.stderr)
        return 1

    try:
        system_paasta_config = load_system_paasta_config()
    except PaastaNotConfiguredError:
        paasta_print(
            PaastaColors.yellow(
                "Warning: Couldn't load config files from '/etc/paasta'. This indicates"
                "PaaSTA is not configured locally on this host, and local-run may not behave"
                "the same way it would behave on a server configured for PaaSTA.",
            ),
            sep='\n',
        )
        system_paasta_config = SystemPaastaConfig({"volumes": []}, '/etc/paasta')

    local_run_config = system_paasta_config.get_local_run_config()

    service = figure_out_service_name(args, soa_dir=args.yelpsoa_config_root)
    if args.cluster:
        cluster = args.cluster
    else:
        try:
            cluster = local_run_config['default_cluster']
        except KeyError:
            paasta_print(
                PaastaColors.red(
                    "PaaSTA on this machine has not been configured with a default cluster."
                    "Please pass one to local-run using '-c'.",
                ),
                sep='\n',
                file=sys.stderr,
            )
            return 1
    instance = args.instance
    docker_client = get_docker_client()

    if args.action == 'build':
        default_tag = 'paasta-local-run-{}-{}'.format(service, get_username())
        tag = os.environ.get('DOCKER_TAG', default_tag)
        os.environ['DOCKER_TAG'] = tag
        pull_image = False
        cook_return = paasta_cook_image(args=None, service=service, soa_dir=args.yelpsoa_config_root)
        if cook_return != 0:
            return cook_return
    elif args.action == 'dry_run':
        pull_image = False
        tag = None
    else:
        pull_image = True
        tag = None

    try:
        return configure_and_run_docker_container(
            docker_client=docker_client,
            docker_hash=tag,
            service=service,
            instance=instance,
            cluster=cluster,
            args=args,
            pull_image=pull_image,
            system_paasta_config=system_paasta_config,
            dry_run=args.action == 'dry_run',
        )
    except errors.APIError as e:
        paasta_print(
            'Can\'t run Docker container. Error: %s' % str(e),
            file=sys.stderr,
        )
        return 1
Example #32
0
 def service_dir_found(service, soa_dir):
     message = "yelpsoa-config directory for %s found in %s" \
               % (PaastaColors.cyan(service), soa_dir)
     return success(message)
Example #33
0
def test_get_mem_usage_bad():
    fake_task = mock.create_autospec(mesos.task.Task)
    fake_task.rss = 1024 * 1024 * 100
    fake_task.mem_limit = fake_task.rss
    actual = mesos_tools.get_mem_usage(fake_task)
    assert actual == PaastaColors.red('100/100MB')
def test_status_smartstack_backends_verbose_multiple_locations():
    service = 'my_service'
    instance = 'my_instance'
    service_instance = compose_job_id(service, instance)
    cluster = 'fake_cluster'
    good_task = mock.Mock()
    other_task = mock.Mock()
    fake_backend = {
        'status': 'UP',
        'lastchg': '1',
        'last_chk': 'OK',
        'check_code': '200',
        'svname': 'ipaddress1:1001_hostname1',
        'check_status': 'L7OK',
        'check_duration': 1,
    }
    fake_other_backend = {
        'status': 'UP',
        'lastchg': '1',
        'last_chk': 'OK',
        'check_code': '200',
        'svname': 'ipaddress1:1002_hostname2',
        'check_status': 'L7OK',
        'check_duration': 1,
    }
    with mock.patch(
            'paasta_tools.marathon_tools.load_service_namespace_config',
            autospec=True,
    ) as mock_load_service_namespace_config, mock.patch(
            'paasta_tools.marathon_tools.read_registration_for_service_instance',
            autospec=True,
    ) as mock_read_reg, mock.patch(
            'paasta_tools.marathon_serviceinit.get_all_slaves_for_blacklist_whitelist',
            autospec=True,
    ) as mock_get_all_slaves_for_blacklist_whitelist, mock.patch(
            'paasta_tools.marathon_serviceinit.get_backends',
            autospec=True,
            side_effect=[[fake_backend], [fake_other_backend]],
    ) as mock_get_backends, mock.patch(
            'paasta_tools.marathon_serviceinit.match_backends_and_tasks',
            autospec=True,
            side_effect=[[(fake_backend, good_task)],
                         [(fake_other_backend, good_task)]],
    ):
        mock_load_service_namespace_config.return_value.get_discover.return_value = 'fake_discover'
        mock_read_reg.return_value = service_instance
        mock_get_all_slaves_for_blacklist_whitelist.return_value = [
            {
                'hostname': 'hostname1',
                'attributes': {
                    'fake_discover': 'fakelocation',
                },
            },
            {
                'hostname': 'hostname2',
                'attributes': {
                    'fake_discover': 'fakeotherlocation',
                },
            },
        ]
        tasks = [good_task, other_task]
        actual = marathon_serviceinit.status_smartstack_backends(
            service=service,
            instance=instance,
            cluster=cluster,
            job_config=fake_marathon_job_config,
            tasks=tasks,
            expected_count=1,
            soa_dir=None,
            verbose=True,
            synapse_port=123456,
            synapse_haproxy_url_format=DEFAULT_SYNAPSE_HAPROXY_URL_FORMAT,
            system_deploy_blacklist=[],
            system_deploy_whitelist=[],
        )
        mock_get_backends.assert_any_call(
            service_instance,
            synapse_host='hostname1',
            synapse_port=123456,
            synapse_haproxy_url_format=DEFAULT_SYNAPSE_HAPROXY_URL_FORMAT,
        )
        mock_get_backends.assert_any_call(
            service_instance,
            synapse_host='hostname2',
            synapse_port=123456,
            synapse_haproxy_url_format=DEFAULT_SYNAPSE_HAPROXY_URL_FORMAT,
        )
        mock_get_all_slaves_for_blacklist_whitelist.assert_called_once_with(
            blacklist=[],
            whitelist=[],
        )
        assert "fakelocation - %s" % PaastaColors.green('Healthy') in actual
        assert "hostname1:1001" in actual
        assert "fakeotherlocation - %s" % PaastaColors.green(
            'Healthy') in actual
        assert "hostname2:1002" in actual
Example #35
0
def main(argv=None):
    marathon_config = None
    chronos_config = None
    args = parse_args(argv)

    master = get_mesos_master()
    try:
        mesos_state = master.state
    except MasterNotAvailableException as e:
        # if we can't connect to master at all,
        # then bomb out early
        paasta_print(PaastaColors.red("CRITICAL:  %s" % e.message))
        sys.exit(2)

    mesos_state_status = metastatus_lib.get_mesos_state_status(
        mesos_state=mesos_state, )

    metrics = master.metrics_snapshot()
    mesos_metrics_status = metastatus_lib.get_mesos_resource_utilization_health(
        mesos_metrics=metrics, mesos_state=mesos_state)
    framework_metrics_healthchecks = metastatus_lib.get_framework_metrics_status(
        metrics=metrics)

    all_mesos_results = mesos_state_status + mesos_metrics_status + framework_metrics_healthchecks

    # Check to see if Marathon should be running here by checking for config
    marathon_config = marathon_tools.load_marathon_config()

    # Check to see if Chronos should be running here by checking for config
    chronos_config = load_chronos_config()

    if marathon_config:
        marathon_client = metastatus_lib.get_marathon_client(marathon_config)
        try:
            marathon_results = metastatus_lib.get_marathon_status(
                marathon_client)
        except (MarathonError, InternalServerError, ValueError) as e:
            # catch ValueError until marathon-python/pull/167 is merged and this is handled upstream
            paasta_print(
                PaastaColors.red(
                    ("CRITICAL: Unable to contact Marathon cluster at {}!"
                     "Is the cluster healthy?".format(
                         marathon_config["url"]))))
            sys.exit(2)
    else:
        marathon_results = [
            metastatus_lib.HealthCheckResult(
                message='Marathon is not configured to run here', healthy=True)
        ]

    if chronos_config:
        chronos_client = get_chronos_client(chronos_config)
        try:
            chronos_results = metastatus_lib.get_chronos_status(chronos_client)
        except (chronos.ChronosAPIError) as e:
            paasta_print(
                PaastaColors.red(
                    "CRITICAL: Unable to contact Chronos! Error: %s" % e))
            sys.exit(2)
    else:
        chronos_results = [
            metastatus_lib.HealthCheckResult(
                message='Chronos is not configured to run here', healthy=True)
        ]

    mesos_ok = all(metastatus_lib.status_for_results(all_mesos_results))
    marathon_ok = all(metastatus_lib.status_for_results(marathon_results))
    chronos_ok = all(metastatus_lib.status_for_results(chronos_results))

    mesos_summary = metastatus_lib.generate_summary_for_check(
        "Mesos", mesos_ok)
    marathon_summary = metastatus_lib.generate_summary_for_check(
        "Marathon", marathon_ok)
    chronos_summary = metastatus_lib.generate_summary_for_check(
        "Chronos", chronos_ok)

    healthy_exit = True if all([mesos_ok, marathon_ok, chronos_ok]) else False

    paasta_print("Master paasta_tools version: {}".format(__version__))
    metastatus_lib.print_results_for_healthchecks(mesos_summary, mesos_ok,
                                                  all_mesos_results,
                                                  args.verbose)
    if args.verbose > 1:
        for grouping in args.groupings:
            print_with_indent('Resources Grouped by %s' % grouping, 2)
            grouping_function = metastatus_lib.key_func_for_attribute(grouping)
            resource_info_dict = metastatus_lib.get_resource_utilization_by_grouping(
                grouping_function, mesos_state)
            all_rows = [[
                grouping.capitalize(), 'CPU (used/total)', 'RAM (used/total)',
                'Disk (used/total)'
            ]]
            table_rows = []
            for attribute_value, resource_info_dict in resource_info_dict.items(
            ):
                resource_utilizations = metastatus_lib.resource_utillizations_from_resource_info(
                    total=resource_info_dict['total'],
                    free=resource_info_dict['free'],
                )
                healthcheck_utilization_pairs = [
                    metastatus_lib.
                    healthcheck_result_resource_utilization_pair_for_resource_utilization(
                        utilization, args.threshold)
                    for utilization in resource_utilizations
                ]
                healthy_exit = all(pair[0].healthy
                                   for pair in healthcheck_utilization_pairs)
                table_rows.append(
                    metastatus_lib.get_table_rows_for_resource_info_dict(
                        attribute_value, healthcheck_utilization_pairs,
                        args.humanize))
            table_rows = sorted(table_rows, key=lambda x: x[0])
            all_rows.extend(table_rows)
            for line in format_table(all_rows):
                print_with_indent(line, 4)

        if args.autoscaling_info:
            print_with_indent("Autoscaling resources:", 2)
            headers = [
                field.replace("_", " ").capitalize()
                for field in AutoscalingInfo._fields
            ]
            table = reduce(lambda x, y: x + [(y)],
                           get_autoscaling_info_for_all_resources(), [headers])

            for line in format_table(table):
                print_with_indent(line, 4)

        if args.verbose == 3:
            print_with_indent('Per Slave Utilization', 2)
            slave_resource_dict = metastatus_lib.get_resource_utilization_by_grouping(
                lambda slave: slave['hostname'], mesos_state)
            all_rows = [[
                'Hostname', 'CPU (used/total)', 'RAM (used//total)',
                'Disk (used//total)'
            ]]

            # print info about slaves here. Note that we don't make modifications to
            # the healthy_exit variable here, because we don't care about a single slave
            # having high usage.
            for attribute_value, resource_info_dict in slave_resource_dict.items(
            ):
                table_rows = []
                resource_utilizations = metastatus_lib.resource_utillizations_from_resource_info(
                    total=resource_info_dict['total'],
                    free=resource_info_dict['free'],
                )
                healthcheck_utilization_pairs = [
                    metastatus_lib.
                    healthcheck_result_resource_utilization_pair_for_resource_utilization(
                        utilization, args.threshold)
                    for utilization in resource_utilizations
                ]
                table_rows.append(
                    metastatus_lib.get_table_rows_for_resource_info_dict(
                        attribute_value, healthcheck_utilization_pairs,
                        args.humanize))
                table_rows = sorted(table_rows, key=lambda x: x[0])
                all_rows.extend(table_rows)
            for line in format_table(all_rows):
                print_with_indent(line, 4)
    metastatus_lib.print_results_for_healthchecks(marathon_summary,
                                                  marathon_ok,
                                                  marathon_results,
                                                  args.verbose)
    metastatus_lib.print_results_for_healthchecks(chronos_summary, chronos_ok,
                                                  chronos_results,
                                                  args.verbose)

    if not healthy_exit:
        sys.exit(2)
    else:
        sys.exit(0)
Example #36
0
def _format_disabled_status(job):
    if job.get("disabled", False):
        status = PaastaColors.grey("Not scheduled")
    else:
        status = PaastaColors.green("Scheduled")
    return status
Example #37
0
def configure_and_run_docker_container(
        docker_client,
        docker_hash,
        service,
        instance,
        cluster,
        system_paasta_config,
        args,
        pull_image=False,
        dry_run=False,
):
    """
    Run Docker container by image hash with args set in command line.
    Function prints the output of run command in stdout.
    """

    if instance is None and args.healthcheck_only:
        paasta_print(
            "With --healthcheck-only, --instance MUST be provided!",
            file=sys.stderr,
        )
        return 1
    if instance is None and not sys.stdin.isatty():
        paasta_print(
            "--instance and --cluster must be specified when using paasta local-run without a tty!",
            file=sys.stderr,
        )
        return 1

    soa_dir = args.yelpsoa_config_root
    volumes = list()
    load_deployments = docker_hash is None or pull_image
    interactive = args.interactive

    try:
        if instance is None:
            instance_type = 'adhoc'
            instance = 'interactive'
            instance_config = get_default_interactive_config(
                service=service,
                cluster=cluster,
                soa_dir=soa_dir,
                load_deployments=load_deployments,
            )
            interactive = True
        else:
            instance_type = validate_service_instance(service, instance, cluster, soa_dir)
            instance_config = get_instance_config(
                service=service,
                instance=instance,
                cluster=cluster,
                load_deployments=load_deployments,
                soa_dir=soa_dir,
            )
    except NoConfigurationForServiceError as e:
        paasta_print(str(e), file=sys.stderr)
        return 1
    except NoDeploymentsAvailable:
        paasta_print(
            PaastaColors.red(
                "Error: No deployments.json found in %(soa_dir)s/%(service)s."
                "You can generate this by running:"
                "generate_deployments_for_service -d %(soa_dir)s -s %(service)s" % {
                    'soa_dir': soa_dir,
                    'service': service,
                },
            ),
            sep='\n',
            file=sys.stderr,
        )
        return 1

    if docker_hash is None:
        try:
            docker_url = instance_config.get_docker_url()
        except NoDockerImageError:
            paasta_print(
                PaastaColors.red(
                    "Error: No sha has been marked for deployment for the %s deploy group.\n"
                    "Please ensure this service has either run through a jenkins pipeline "
                    "or paasta mark-for-deployment has been run for %s\n" % (
                        instance_config.get_deploy_group(), service,
                    ),
                ),
                sep='',
                file=sys.stderr,
            )
            return 1
        docker_hash = docker_url

    if pull_image:
        docker_pull_image(docker_url)

    for volume in instance_config.get_volumes(system_paasta_config.get_volumes()):
        if os.path.exists(volume['hostPath']):
            volumes.append('{}:{}:{}'.format(volume['hostPath'], volume['containerPath'], volume['mode'].lower()))
        else:
            paasta_print(
                PaastaColors.yellow(
                    "Warning: Path %s does not exist on this host. Skipping this binding." % volume['hostPath'],
                ),
            )

    if interactive is True and args.cmd is None:
        command = 'bash'
    elif args.cmd:
        command = args.cmd
    else:
        command_from_config = instance_config.get_cmd()
        if command_from_config:
            command_modifier = command_function_for_framework(instance_type)
            command = command_modifier(command_from_config)
        else:
            command = instance_config.get_args()

    secret_provider_kwargs = {
        'vault_cluster_config': system_paasta_config.get_vault_cluster_config(),
        'vault_auth_method': args.vault_auth_method,
        'vault_token_file': args.vault_token_file,
    }

    return run_docker_container(
        docker_client=docker_client,
        service=service,
        instance=instance,
        docker_hash=docker_hash,
        volumes=volumes,
        interactive=interactive,
        command=command,
        healthcheck=args.healthcheck,
        healthcheck_only=args.healthcheck_only,
        user_port=args.user_port,
        instance_config=instance_config,
        soa_dir=args.yelpsoa_config_root,
        dry_run=dry_run,
        json_dict=args.dry_run_json_dict,
        framework=instance_type,
        secret_provider_name=system_paasta_config.get_secret_provider_name(),
        secret_provider_kwargs=secret_provider_kwargs,
        skip_secrets=args.skip_secrets,
    )
Example #38
0
def test_assert_memory_health():
    ok_status = (1024, 512, 512)
    ok_output, ok_health = metastatus_lib.assert_memory_health(ok_status)
    assert ok_health
    assert ("Memory: 0.50 / 1.00GB in use (%s)" % PaastaColors.green("50.00%")
            in ok_output)
Example #39
0
def print_flinkcluster_status(
    cluster: str,
    service: str,
    instance: str,
    output: List[str],
    status,
    verbose: int,
) -> int:
    if status is None:
        output.append(
            PaastaColors.red("    Flink cluster is not available yet"))
        return 1

    if status.state != "running":
        output.append("    State: {state}".format(state=PaastaColors.yellow(
            status.state), ))
        output.append(
            f"    No other information available in non-running state")
        return 0

    dashboard_url = get_dashboard_url(
        cluster=cluster,
        service=service,
        instance=instance,
    )
    if verbose:
        output.append(
            f"    Flink version: {status.config['flink-version']} {status.config['flink-revision']}"
        )
    else:
        output.append(f"    Flink version: {status.config['flink-version']}")
    output.append(f"    URL: {dashboard_url}/")
    output.append(f"    State: {status.state}")
    output.append(
        "    Jobs:"
        f" {status.overview['jobs-running']} running,"
        f" {status.overview['jobs-finished']} finished,"
        f" {status.overview['jobs-failed']} failed,"
        f" {status.overview['jobs-cancelled']} cancelled", )
    output.append(
        "   "
        f" {status.overview['taskmanagers']} taskmanagers,"
        f" {status.overview['slots-available']}/{status.overview['slots-total']} slots available",
    )

    output.append(f"    Jobs:")
    if verbose:
        output.append(
            f"      Job Name                         State       Job ID                           Started"
        )
    else:
        output.append(
            f"      Job Name                         State       Started")
    # Use only the most recent jobs
    unique_jobs = (sorted(jobs, key=lambda j: -j['start-time'])[0]
                   for _, jobs in groupby(
                       sorted(status.jobs, key=lambda j: j['name']),
                       lambda j: j['name'],
                   ))
    for job in unique_jobs:
        job_id = job['jid']
        if verbose:
            fmt = """      {job_name: <32.32} {state: <11} {job_id} {start_time}
        {dashboard_url}"""
        else:
            fmt = "      {job_name: <32.32} {state: <11} {start_time}"
        start_time = datetime_from_utc_to_local(
            datetime.utcfromtimestamp(int(job['start-time']) // 1000))
        output.append(
            fmt.format(
                job_id=job_id,
                job_name=job['name'].split('.', 2)[2],
                state=job['state'],
                start_time=
                f'{str(start_time)} ({humanize.naturaltime(start_time)})',
                dashboard_url=PaastaColors.grey(
                    f'{dashboard_url}/#/jobs/{job_id}', ),
            ))
        if job_id in status.exceptions:
            exceptions = status.exceptions[job_id]
            root_exception = exceptions['root-exception']
            if root_exception is not None:
                output.append(f"        Exception: {root_exception}")
                ts = exceptions['timestamp']
                if ts is not None:
                    exc_ts = datetime_from_utc_to_local(
                        datetime.utcfromtimestamp(int(ts) // 1000))
                    output.append(
                        f"            {str(exc_ts)} ({humanize.naturaltime(exc_ts)})"
                    )
    return 0
Example #40
0
def test_ok_cpu_health():
    ok_status = (10, 1, 9)
    ok_output, ok_health = metastatus_lib.assert_cpu_health(ok_status)
    assert ok_health
    assert "CPUs: 1.00 / 10 in use (%s)" % PaastaColors.green(
        "10.00%") in ok_output
Example #41
0
def test_generate_summary_for_results_critical():
    assert metastatus_lib.generate_summary_for_check(
        "Myservice",
        False) == "Myservice Status: %s" % PaastaColors.red("CRITICAL")
Example #42
0
def test_generate_summary_for_results_ok():
    assert metastatus_lib.generate_summary_for_check(
        "Myservice", True) == "Myservice Status: %s" % PaastaColors.green("OK")
Example #43
0
def _format_command(job):
    command = job.get("command", PaastaColors.red("UNKNOWN"))
    return command
Example #44
0
def configure_and_run_docker_container(
        docker_client,
        docker_hash,
        service,
        instance,
        cluster,
        system_paasta_config,
        args,
        pull_image=False,
        dry_run=False
):
    """
    Run Docker container by image hash with args set in command line.
    Function prints the output of run command in stdout.
    """

    if instance is None and args.healthcheck_only:
        paasta_print(
            "With --healthcheck-only, --instance MUST be provided!",
            file=sys.stderr,
        )
        return 1
    if instance is None and not sys.stdin.isatty():
        paasta_print(
            "--instance and --cluster must be specified when using paasta local-run without a tty!",
            file=sys.stderr,
        )
        return 1

    soa_dir = args.yelpsoa_config_root
    volumes = list()
    load_deployments = docker_hash is None or pull_image
    interactive = args.interactive

    try:
        if instance is None:
            instance_type = 'adhoc'
            instance = 'interactive'
            instance_config = get_default_interactive_config(
                service=service,
                cluster=cluster,
                soa_dir=soa_dir,
                load_deployments=load_deployments,
            )
            interactive = True
        else:
            instance_type = validate_service_instance(service, instance, cluster, soa_dir)
            instance_config = get_instance_config(
                service=service,
                instance=instance,
                cluster=cluster,
                load_deployments=load_deployments,
                soa_dir=soa_dir,
            )
    except NoConfigurationForServiceError as e:
        paasta_print(str(e), file=sys.stderr)
        return 1
    except NoDeploymentsAvailable:
        paasta_print(
            PaastaColors.red(
                "Error: No deployments.json found in %(soa_dir)s/%(service)s."
                "You can generate this by running:"
                "generate_deployments_for_service -d %(soa_dir)s -s %(service)s" % {
                    'soa_dir': soa_dir,
                    'service': service,
                }
            ),
            sep='\n',
            file=sys.stderr,
        )
        return 1

    if docker_hash is None:
        try:
            docker_url = get_docker_url(
                system_paasta_config.get_docker_registry(), instance_config.get_docker_image())
        except NoDockerImageError:
            paasta_print(PaastaColors.red(
                "Error: No sha has been marked for deployment for the %s deploy group.\n"
                "Please ensure this service has either run through a jenkins pipeline "
                "or paasta mark-for-deployment has been run for %s\n" % (instance_config.get_deploy_group(), service)),
                sep='',
                file=sys.stderr,
            )
            return 1
        docker_hash = docker_url

    if pull_image:
        docker_pull_image(docker_url)

    # if only one volume specified, extra_volumes should be converted to a list
    extra_volumes = instance_config.get_extra_volumes()
    if type(extra_volumes) == dict:
        extra_volumes = [extra_volumes]

    for volume in system_paasta_config.get_volumes() + extra_volumes:
        volumes.append('%s:%s:%s' % (volume['hostPath'], volume['containerPath'], volume['mode'].lower()))

    if interactive is True and args.cmd is None:
        command = 'bash'
    elif args.cmd:
        command = args.cmd
    else:
        command_from_config = instance_config.get_cmd()
        if command_from_config:
            command_modifier = command_function_for_framework(instance_type)
            command = command_modifier(command_from_config)
        else:
            command = instance_config.get_args()

    return run_docker_container(
        docker_client=docker_client,
        service=service,
        instance=instance,
        docker_hash=docker_hash,
        volumes=volumes,
        interactive=interactive,
        command=command,
        healthcheck=args.healthcheck,
        healthcheck_only=args.healthcheck_only,
        instance_config=instance_config,
        soa_dir=args.yelpsoa_config_root,
        dry_run=dry_run,
        json_dict=args.dry_run_json_dict,
        framework=instance_type,
    )
Example #45
0
def report_status_for_cluster(
    service: str,
    cluster: str,
    deploy_pipeline: Sequence[str],
    actual_deployments: Mapping[str, str],
    instance_whitelist: Mapping[str, Type[InstanceConfig]],
    system_paasta_config: SystemPaastaConfig,
    verbose: int = 0,
    use_api_endpoint: bool = False,
) -> Tuple[int, Sequence[str]]:
    """With a given service and cluster, prints the status of the instances
    in that cluster"""
    output = ['', 'service: %s' % service, 'cluster: %s' % cluster]
    seen_instances = []
    deployed_instances = []
    instances = instance_whitelist.keys()
    http_only_instances = [
        instance
        for instance, instance_config_class in instance_whitelist.items()
        if instance_config_class in HTTP_ONLY_INSTANCE_CONFIG
    ]
    ssh_only_instances = [
        instance
        for instance, instance_config_class in instance_whitelist.items()
        if instance_config_class in SSH_ONLY_INSTANCE_CONFIG
    ]

    for namespace in deploy_pipeline:
        cluster_in_pipeline, instance = namespace.split('.')
        seen_instances.append(instance)

        if cluster_in_pipeline != cluster:
            continue
        if instances and instance not in instances:
            continue

        # Case: service deployed to cluster.instance
        if namespace in actual_deployments:
            deployed_instances.append(instance)

        # Case: flinkcluster instances don't use `deployments.json`
        elif instance_whitelist.get(instance) == FlinkClusterConfig:
            deployed_instances.append(instance)

        # Case: service NOT deployed to cluster.instance
        else:
            output.append('  instance: %s' % PaastaColors.red(instance))
            output.append('    Git sha:    None (not deployed yet)')

    api_return_code = 0
    ssh_return_code = 0
    if len(deployed_instances) > 0:
        http_only_deployed_instances = [
            deployed_instance for deployed_instance in deployed_instances
            if (deployed_instance in http_only_instances or deployed_instance
                not in ssh_only_instances and use_api_endpoint)
        ]
        if len(http_only_deployed_instances):
            return_codes = [
                paasta_status_on_api_endpoint(
                    cluster=cluster,
                    service=service,
                    instance=deployed_instance,
                    output=output,
                    system_paasta_config=system_paasta_config,
                    verbose=verbose,
                ) for deployed_instance in http_only_deployed_instances
            ]
            if any(return_codes):
                api_return_code = 1
        ssh_only_deployed_instances = [
            deployed_instance for deployed_instance in deployed_instances
            if (deployed_instance in ssh_only_instances or deployed_instance
                not in http_only_instances and not use_api_endpoint)
        ]
        if len(ssh_only_deployed_instances):
            ssh_return_code, status = execute_paasta_serviceinit_on_remote_master(
                'status',
                cluster,
                service,
                ','.join(deployed_instance
                         for deployed_instance in ssh_only_deployed_instances),
                system_paasta_config,
                stream=False,
                verbose=verbose,
                ignore_ssh_output=True,
            )
            # Status results are streamed. This print is for possible error messages.
            if status is not None:
                for line in status.rstrip().split('\n'):
                    output.append('    %s' % line)

    output.append(
        report_invalid_whitelist_values(instances, seen_instances, 'instance'))

    if ssh_return_code:
        return_code = ssh_return_code
    elif api_return_code:
        return_code = api_return_code
    else:
        return_code = 0

    return return_code, output
def paasta_start_or_stop(args, desired_state):
    """Requests a change of state to start or stop given branches of a service."""
    soa_dir = args.soa_dir

    pargs = apply_args_filters(args)
    if len(pargs) == 0:
        return 1

    affected_services = {
        s
        for service_list in pargs.values() for s in service_list.keys()
    }
    if len(affected_services) > 1:
        print(
            PaastaColors.red(
                "Warning: trying to start/stop/restart multiple services:"))

        for cluster, services_instances in pargs.items():
            print("Cluster %s:" % cluster)
            for service, instances in services_instances.items():
                print("    Service %s:" % service)
                print("        Instances %s" % ",".join(instances.keys()))

        if sys.stdin.isatty():
            confirm = choice.Binary("Are you sure you want to continue?",
                                    False).ask()
        else:
            confirm = False
        if not confirm:
            print()
            print("exiting")
            return 1

    invalid_deploy_groups = []
    marathon_message_printed = False
    affected_flinks = []

    if args.clusters is None or args.instances is None:
        if confirm_to_continue(pargs.items(), desired_state) is False:
            print()
            print("exiting")
            return 1

    for cluster, services_instances in pargs.items():
        for service, instances in services_instances.items():
            for instance in instances.keys():
                service_config = get_instance_config(
                    service=service,
                    cluster=cluster,
                    instance=instance,
                    soa_dir=soa_dir,
                    load_deployments=False,
                )
                if isinstance(service_config, FlinkDeploymentConfig):
                    affected_flinks.append(service_config)
                    continue

                try:
                    remote_refs = get_remote_refs(service, soa_dir)
                except remote_git.LSRemoteException as e:
                    msg = (
                        "Error talking to the git server: %s\n"
                        "This PaaSTA command requires access to the git server to operate.\n"
                        "The git server may be down or not reachable from here.\n"
                        "Try again from somewhere where the git server can be reached, "
                        "like your developer environment.") % str(e)
                    print(msg)
                    return 1

                deploy_group = service_config.get_deploy_group()
                (deploy_tag,
                 _) = get_latest_deployment_tag(remote_refs, deploy_group)

                if deploy_tag not in remote_refs:
                    invalid_deploy_groups.append(deploy_group)
                else:
                    force_bounce = utils.format_timestamp(
                        datetime.datetime.utcnow())
                    if (isinstance(service_config, MarathonServiceConfig)
                            and not marathon_message_printed):
                        print_marathon_message(desired_state)
                        marathon_message_printed = True

                    issue_state_change_for_service(
                        service_config=service_config,
                        force_bounce=force_bounce,
                        desired_state=desired_state,
                    )

    return_val = 0

    # TODO: Refactor to discover if set_state is available for given
    #       instance_type in API
    if affected_flinks:
        print_flink_message(desired_state)
        csi = defaultdict(lambda: defaultdict(list))
        for service_config in affected_flinks:
            csi[service_config.cluster][service_config.service].append(
                service_config.instance)

        system_paasta_config = load_system_paasta_config()
        for cluster, services_instances in csi.items():
            client = get_paasta_api_client(cluster, system_paasta_config)
            if not client:
                print("Cannot get a paasta-api client")
                exit(1)

            for service, instances in services_instances.items():
                for instance in instances:
                    try:
                        client.service.instance_set_state(
                            service=service,
                            instance=instance,
                            desired_state=desired_state,
                        ).result()
                    except HTTPError as exc:
                        print(exc.response.text)
                        return exc.status_code

                return_val = 0

    if invalid_deploy_groups:
        print(f"No deploy tags found for {', '.join(invalid_deploy_groups)}.")
        print(f"Has {service} been deployed there yet?")
        return_val = 1

    return return_val
Example #47
0
def _format_job_name(job):
    job_id = job.get("name", PaastaColors.red("UNKNOWN"))
    return job_id
Example #48
0
 def handle_interrupt(_signum, _frame):
     paasta_print(
         PaastaColors.red("Signal received, shutting down scheduler."))
     if runner is not None:
         runner.stop()
     sys.exit(143 if _signum == signal.SIGTERM else 1)
Example #49
0
def run_docker_container(
    docker_client,
    service,
    instance,
    docker_hash,
    volumes,
    interactive,
    command,
    healthcheck,
    healthcheck_only,
    instance_config,
    soa_dir=DEFAULT_SOA_DIR,
    dry_run=False,
    json_dict=False,
    framework=None,
):
    """docker-py has issues running a container with a TTY attached, so for
    consistency we execute 'docker run' directly in both interactive and
    non-interactive modes.

    In non-interactive mode when the run is complete, stop the container and
    remove it (with docker-py).
    """
    random_port = pick_random_port()
    environment = instance_config.get_env_dictionary()
    local_run_environment = get_local_run_environment_vars(
        instance_config=instance_config,
        port0=random_port,
        framework=framework,
    )
    environment.update(local_run_environment)
    net = instance_config.get_net()
    memory = instance_config.get_mem()
    container_name = get_container_name()
    docker_params = instance_config.format_docker_parameters()
    docker_run_args = dict(
        memory=memory,
        random_port=random_port,
        container_name=container_name,
        volumes=volumes,
        env=environment,
        interactive=interactive,
        docker_hash=docker_hash,
        command=command,
        net=net,
        docker_params=docker_params,
    )
    docker_run_cmd = get_docker_run_cmd(**docker_run_args)
    joined_docker_run_cmd = ' '.join(docker_run_cmd)
    healthcheck_mode, healthcheck_data = get_healthcheck_for_instance(
        service, instance, instance_config, random_port, soa_dir=soa_dir)

    if dry_run:
        if json_dict:
            paasta_print(json.dumps(docker_run_args))
        else:
            paasta_print(json.dumps(docker_run_cmd))
        return 0
    else:
        paasta_print('Running docker command:\n%s' % PaastaColors.grey(joined_docker_run_cmd))

    if interactive:
        # NOTE: This immediately replaces us with the docker run cmd. Docker
        # run knows how to clean up the running container in this situation.
        execlp('paasta_docker_wrapper', *docker_run_cmd)
        # For testing, when execlp is patched out and doesn't replace us, we
        # still want to bail out.
        return 0

    container_started = False
    container_id = None
    try:
        (returncode, output) = _run(docker_run_cmd)
        if returncode != 0:
            paasta_print(
                'Failure trying to start your container!'
                'Returncode: %d'
                'Output:'
                '%s'
                ''
                'Fix that problem and try again.'
                'http://y/paasta-troubleshooting'
                % (returncode, output),
                sep='\n',
            )
            # Container failed to start so no need to cleanup; just bail.
            sys.exit(1)
        container_started = True
        container_id = get_container_id(docker_client, container_name)
        paasta_print('Found our container running with CID %s' % container_id)

        # If the service has a healthcheck, simulate it
        if healthcheck_mode is not None:
            healthcheck_result = simulate_healthcheck_on_service(
                instance_config=instance_config,
                docker_client=docker_client,
                container_id=container_id,
                healthcheck_mode=healthcheck_mode,
                healthcheck_data=healthcheck_data,
                healthcheck_enabled=healthcheck,
            )

        def _output_stdout_and_exit_code():
            returncode = docker_client.inspect_container(container_id)['State']['ExitCode']
            paasta_print('Container exited: %d)' % returncode)
            paasta_print('Here is the stdout and stderr:\n\n')
            paasta_print(
                docker_client.attach(container_id, stderr=True, stream=False, logs=True)
            )

        if healthcheck_only:
            if container_started:
                _output_stdout_and_exit_code()
                _cleanup_container(docker_client, container_id)
            if healthcheck_mode is None:
                paasta_print('--healthcheck-only, but no healthcheck is defined for this instance!')
                sys.exit(1)
            elif healthcheck_result is True:
                sys.exit(0)
            else:
                sys.exit(1)

        running = docker_client.inspect_container(container_id)['State']['Running']
        if running:
            paasta_print('Your service is now running! Tailing stdout and stderr:')
            for line in docker_client.attach(container_id, stderr=True, stream=True, logs=True):
                paasta_print(line)
        else:
            _output_stdout_and_exit_code()
            returncode = 3

    except KeyboardInterrupt:
        returncode = 3

    # Cleanup if the container exits on its own or interrupted.
    if container_started:
        returncode = docker_client.inspect_container(container_id)['State']['ExitCode']
        _cleanup_container(docker_client, container_id)
    return returncode
Example #50
0
def remote_run_start(args):
    """ Start a task in Mesos
    Steps:
    1. Accumulate overrides
    2. Create task configuration
    3. Build executor stack
    4. Run the task on the executor stack
    """
    # accumulate all configuration needed to build what we need to run a task
    system_paasta_config, service, cluster, soa_dir, instance, instance_type = extract_args(
        args)
    # TODO: move run_id into task identifier?
    run_id = args.run_id or generate_run_id(length=10)
    framework_name = create_framework_name(service, instance, run_id)
    overrides = accumulate_config_overrides(args, service, instance)
    # TODO: implement DryRunExecutor?
    taskproc_config = system_paasta_config.get_taskproc()
    native_job_config = load_paasta_native_job_config(
        service,
        instance,
        cluster,
        soa_dir=soa_dir,
        instance_type=instance_type,
        config_overrides=overrides,
        load_deployments=not args.docker_image,
    )
    region = args.aws_region or taskproc_config.get("aws_region")
    default_role = system_paasta_config.get_remote_run_config().get(
        "default_role")
    assert default_role
    role = native_job_config.get_role() or default_role
    pool = native_job_config.get_pool()
    processor = TaskProcessor()
    processor.load_plugin(provider_module="task_processing.plugins.stateful")
    processor.load_plugin(provider_module="task_processing.plugins.mesos")

    if args.detach:
        paasta_print("Running in background")
        if os.fork() > 0:
            return
        os.setsid()
        if os.fork() > 0:
            return
        sys.stdout = open("/dev/null", "w")
        sys.stderr = open("/dev/null", "w")

    # create factory functions for task_config and executors, which makes it
    # easier to recreate them for retry purposes
    def task_config_factory():
        return create_mesos_task_config(
            processor=processor,
            service=service,
            instance=instance,
            system_paasta_config=system_paasta_config,
            native_job_config=native_job_config,
            offer_timeout=args.staging_timeout,
            docker_image=args.docker_image,
        )

    framework_config = dict(
        cluster=cluster,
        framework_name=framework_name,
        framework_staging_timeout=args.staging_timeout,
        role=role,
        pool=pool,
    )
    executor_kwargs = dict(  # used to create mesos executor
        processor=processor,
        system_paasta_config=system_paasta_config,
        taskproc_config=taskproc_config,
        **framework_config,
    )

    def executor_factory():
        mesos_executor = create_mesos_executor(**executor_kwargs)
        return build_executor_stack(processor, mesos_executor, taskproc_config,
                                    cluster, region)

    if args.dry_run:
        task_config_dict = task_config_to_dict(task_config_factory())
        pp = pprint.PrettyPrinter(indent=2)
        paasta_print(
            PaastaColors.green("Would have run task with:"),
            PaastaColors.green("Framework config:"),
            pp.pformat(framework_config),
            PaastaColors.green("Task config:"),
            pp.pformat(task_config_dict),
            sep="\n",
        )
        return

    terminals = run_tasks_with_retries(executor_factory,
                                       task_config_factory,
                                       retries=args.retries)
    final_event, final_task_config = terminals[-1]
    exit_code = handle_terminal_event(
        event=final_event,
        service=service,
        instance=instance,
        run_id=run_id,
        email_address=args.notification_email,
        framework_config=framework_config,
        task_config=final_task_config,
    )
    sys.exit(exit_code)
def test_status_smartstack_backends_verbose_demphasizes_maint_instances_for_unrelated_tasks(
):
    service = 'my_service'
    instance = 'my_instance'
    cluster = 'fake_cluster'
    normal_count = 10
    good_task = mock.Mock()
    other_task = mock.Mock()
    fake_backend = {
        'status': 'MAINT',
        'lastchg': '1',
        'last_chk': 'OK',
        'check_code': '200',
        'svname': 'ipaddress1:1001_hostname1',
        'check_status': 'L7OK',
        'check_duration': 1,
    }
    with mock.patch(
            'paasta_tools.marathon_tools.load_service_namespace_config',
            autospec=True,
    ) as mock_load_service_namespace_config, mock.patch(
            'paasta_tools.marathon_tools.read_registration_for_service_instance',
            autospec=True,
    ) as mock_read_reg, mock.patch(
            'paasta_tools.marathon_serviceinit.get_all_slaves_for_blacklist_whitelist',
            autospec=True,
    ) as mock_get_all_slaves_for_blacklist_whitelist, mock.patch(
            'paasta_tools.marathon_serviceinit.get_backends',
            autospec=True,
    ) as mock_get_backends, mock.patch(
            'paasta_tools.marathon_serviceinit.match_backends_and_tasks',
            autospec=True,
    ) as mock_match_backends_and_tasks:
        mock_get_all_slaves_for_blacklist_whitelist.return_value = [
            {
                'hostname': 'fake',
                'attributes': {
                    'fake_discover': 'fake_location_1',
                },
            },
        ]
        mock_load_service_namespace_config.return_value.get_discover.return_value = 'fake_discover'
        mock_read_reg.return_value = compose_job_id(service, instance)
        mock_get_backends.return_value = [fake_backend]
        mock_match_backends_and_tasks.return_value = [
            (fake_backend, None),
        ]
        tasks = [good_task, other_task]
        actual = marathon_serviceinit.status_smartstack_backends(
            service=service,
            instance=instance,
            cluster=cluster,
            job_config=fake_marathon_job_config,
            tasks=tasks,
            expected_count=normal_count,
            soa_dir=None,
            verbose=True,
            synapse_port=123456,
            synapse_haproxy_url_format=DEFAULT_SYNAPSE_HAPROXY_URL_FORMAT,
            system_deploy_blacklist=[],
            system_deploy_whitelist=[],
        )
        assert PaastaColors.red('MAINT') not in actual
        assert re.search(
            r"%s[^\n]*hostname1:1001" % re.escape(PaastaColors.GREY), actual)
Example #52
0
def paasta_start_or_stop(args, desired_state):
    """Requests a change of state to start or stop given branches of a service."""
    soa_dir = args.soa_dir

    pargs = apply_args_filters(args)
    if len(pargs) == 0:
        return 1

    affected_services = {
        s
        for service_list in pargs.values() for s in service_list.keys()
    }
    if len(affected_services) > 1:
        paasta_print(
            PaastaColors.red(
                "Warning: trying to start/stop/restart multiple services:"))

        for cluster, services_instances in pargs.items():
            paasta_print("Cluster %s:" % cluster)
            for service, instances in services_instances.items():
                paasta_print("    Service %s:" % service)
                paasta_print("        Instances %s" %
                             ",".join(instances.keys()))

        if sys.stdin.isatty():
            confirm = choice.Binary('Are you sure you want to continue?',
                                    False).ask()
        else:
            confirm = False
        if not confirm:
            paasta_print()
            paasta_print("exiting")
            return 1

    invalid_deploy_groups = []
    marathon_message_printed, chronos_message_printed = False, False
    for cluster, services_instances in pargs.items():
        for service, instances in services_instances.items():
            try:
                remote_refs = remote_git.list_remote_refs(
                    utils.get_git_url(service, soa_dir))
            except remote_git.LSRemoteException as e:
                msg = (
                    "Error talking to the git server: %s\n"
                    "This PaaSTA command requires access to the git server to operate.\n"
                    "The git server may be down or not reachable from here.\n"
                    "Try again from somewhere where the git server can be reached, "
                    "like your developer environment.") % str(e)
                paasta_print(msg)
                return 1

            for instance in instances.keys():
                service_config = get_instance_config(
                    service=service,
                    cluster=cluster,
                    instance=instance,
                    soa_dir=soa_dir,
                    load_deployments=False,
                )
                deploy_group = service_config.get_deploy_group()
                (deploy_tag,
                 _) = get_latest_deployment_tag(remote_refs, deploy_group)

                if deploy_tag not in remote_refs:
                    invalid_deploy_groups.append(deploy_group)
                else:
                    force_bounce = utils.format_timestamp(
                        datetime.datetime.utcnow())
                    if isinstance(service_config, MarathonServiceConfig
                                  ) and not marathon_message_printed:
                        print_marathon_message(desired_state)
                        marathon_message_printed = True
                    elif isinstance(
                            service_config,
                            ChronosJobConfig) and not chronos_message_printed:
                        print_chronos_message(desired_state)
                        chronos_message_printed = True

                    issue_state_change_for_service(
                        service_config=service_config,
                        force_bounce=force_bounce,
                        desired_state=desired_state,
                    )

    return_val = 0
    if invalid_deploy_groups:
        paasta_print("No branches found for %s in %s." %
                     (", ".join(invalid_deploy_groups), remote_refs))
        paasta_print("Has %s been deployed there yet?" % service)
        return_val = 1

    return return_val
Example #53
0
def paasta_logs(args):
    """Print the logs for as Paasta service.
    :param args: argparse.Namespace obj created from sys.args by cli"""
    soa_dir = args.soa_dir
    service = figure_out_service_name(args, soa_dir)

    if args.clusters is None:
        clusters = list_clusters(service, soa_dir=soa_dir)
    else:
        clusters = args.clusters.split(",")

    if args.instances is None:
        instances = None
    else:
        instances = args.instances.split(',')

    if args.components is not None:
        components = args.components.split(",")
    else:
        components = DEFAULT_COMPONENTS
    components = set(components)

    if 'app_output' in components:
        components.remove('app_output')
        components.add('stdout')
        components.add('stderr')

    if args.verbose:
        log.setLevel(logging.DEBUG)
    else:
        log.setLevel(logging.INFO)

    levels = [DEFAULT_LOGLEVEL, 'debug']

    log.debug(f"Going to get logs for {service} on clusters {clusters}")

    log_reader = get_log_reader()

    if not validate_filtering_args(args, log_reader):
        return 1

    # They haven't specified what kind of filtering they want, decide for them
    if args.line_count is None and args.time_from is None and not args.tail:
        return pick_default_log_mode(args, log_reader, service, levels, components, clusters, instances)

    if args.tail:
        paasta_print(PaastaColors.cyan("Tailing logs and applying filters..."), file=sys.stderr)
        log_reader.tail_logs(
            service=service,
            levels=levels,
            components=components,
            clusters=clusters,
            instances=instances,
            raw_mode=args.raw_mode,
        )
        return 0

    # If the logger doesn't support offsetting the number of lines by a particular line number
    # there is no point in distinguishing between a positive/negative number of lines since it
    # can only get the last N lines
    if not log_reader.SUPPORTS_LINE_OFFSET and args.line_count is not None:
        args.line_count = abs(args.line_count)

    # Handle line based filtering
    if args.line_count is not None and args.line_offset is None:
        log_reader.print_last_n_logs(
            service=service,
            line_count=args.line_count,
            levels=levels,
            components=components,
            clusters=clusters,
            instances=instances,
            raw_mode=args.raw_mode,
        )
        return 0
    elif args.line_count is not None and args.line_offset is not None:
        log_reader.print_logs_by_offset(
            service=service,
            line_count=args.line_count,
            line_offset=args.line_offset,
            levels=levels,
            components=components,
            clusters=clusters,
            instances=instances,
            raw_mode=args.raw_mode,
        )
        return 0

    # Handle time based filtering
    try:
        start_time, end_time = generate_start_end_time(args.time_from, args.time_to)
    except ValueError as e:
        paasta_print(PaastaColors.red(str(e)), file=sys.stderr)
        return 1

    log_reader.print_logs_by_time(
        service=service,
        start_time=start_time,
        end_time=end_time,
        levels=levels,
        components=components,
        clusters=clusters,
        instances=instances,
        raw_mode=args.raw_mode,
    )
Example #54
0
def run_docker_container(
    docker_client,
    service,
    instance,
    docker_hash,
    volumes,
    interactive,
    command,
    healthcheck,
    healthcheck_only,
    user_port,
    instance_config,
    secret_provider_name,
    soa_dir=DEFAULT_SOA_DIR,
    dry_run=False,
    json_dict=False,
    framework=None,
    secret_provider_kwargs={},
    skip_secrets=False,
):
    """docker-py has issues running a container with a TTY attached, so for
    consistency we execute 'docker run' directly in both interactive and
    non-interactive modes.

    In non-interactive mode when the run is complete, stop the container and
    remove it (with docker-py).
    """
    if user_port:
        if check_if_port_free(user_port):
            chosen_port = user_port
        else:
            paasta_print(
                PaastaColors.red(
                    "The chosen port is already in use!\n"
                    "Try specifying another one, or omit (--port|-o) and paasta will find a free one for you",
                ),
                file=sys.stderr,
            )
            sys.exit(1)
    else:
        chosen_port = pick_random_port(service)
    environment = instance_config.get_env_dictionary()
    if not skip_secrets:
        secret_environment = decrypt_secret_environment_variables(
            secret_provider_name=secret_provider_name,
            environment=environment,
            soa_dir=soa_dir,
            service_name=service,
            cluster_name=instance_config.cluster,
            secret_provider_kwargs=secret_provider_kwargs,
        )
        environment.update(secret_environment)
    local_run_environment = get_local_run_environment_vars(
        instance_config=instance_config,
        port0=chosen_port,
        framework=framework,
    )
    environment.update(local_run_environment)
    net = instance_config.get_net()
    memory = instance_config.get_mem()
    container_name = get_container_name()
    docker_params = instance_config.format_docker_parameters()

    healthcheck_mode, healthcheck_data = get_healthcheck_for_instance(
        service, instance, instance_config, chosen_port, soa_dir=soa_dir,
    )
    if healthcheck_mode is None:
        container_port = None
        interactive = True
    elif not user_port and not healthcheck and not healthcheck_only:
        container_port = None
    else:
        try:
            container_port = instance_config.get_container_port()
        except AttributeError:
            container_port = None

    simulate_healthcheck = (healthcheck_only or healthcheck) and healthcheck_mode is not None

    docker_run_args = dict(
        memory=memory,
        chosen_port=chosen_port,
        container_port=container_port,
        container_name=container_name,
        volumes=volumes,
        env=environment,
        interactive=interactive,
        detach=simulate_healthcheck,
        docker_hash=docker_hash,
        command=command,
        net=net,
        docker_params=docker_params,
    )
    docker_run_cmd = get_docker_run_cmd(**docker_run_args)
    joined_docker_run_cmd = ' '.join(docker_run_cmd)

    if dry_run:
        if json_dict:
            paasta_print(json.dumps(docker_run_args))
        else:
            paasta_print(json.dumps(docker_run_cmd))
        return 0
    else:
        paasta_print('Running docker command:\n%s' % PaastaColors.grey(joined_docker_run_cmd))

    merged_env = {**os.environ, **environment}

    if interactive or not simulate_healthcheck:
        # NOTE: This immediately replaces us with the docker run cmd. Docker
        # run knows how to clean up the running container in this situation.
        execlpe('paasta_docker_wrapper', *docker_run_cmd, merged_env)
        # For testing, when execlpe is patched out and doesn't replace us, we
        # still want to bail out.
        return 0

    container_started = False
    container_id = None
    try:
        (returncode, output) = _run(docker_run_cmd, env=merged_env)
        if returncode != 0:
            paasta_print(
                'Failure trying to start your container!'
                'Returncode: %d'
                'Output:'
                '%s'
                ''
                'Fix that problem and try again.'
                'http://y/paasta-troubleshooting'
                % (returncode, output),
                sep='\n',
            )
            # Container failed to start so no need to cleanup; just bail.
            sys.exit(1)
        container_started = True
        container_id = get_container_id(docker_client, container_name)
        paasta_print('Found our container running with CID %s' % container_id)

        if simulate_healthcheck:
            healthcheck_result = simulate_healthcheck_on_service(
                instance_config=instance_config,
                docker_client=docker_client,
                container_id=container_id,
                healthcheck_mode=healthcheck_mode,
                healthcheck_data=healthcheck_data,
                healthcheck_enabled=healthcheck,
            )

        def _output_stdout_and_exit_code():
            returncode = docker_client.inspect_container(container_id)['State']['ExitCode']
            paasta_print('Container exited: %d)' % returncode)
            paasta_print('Here is the stdout and stderr:\n\n')
            paasta_print(
                docker_client.attach(container_id, stderr=True, stream=False, logs=True),
            )

        if healthcheck_only:
            if container_started:
                _output_stdout_and_exit_code()
                _cleanup_container(docker_client, container_id)
            if healthcheck_mode is None:
                paasta_print('--healthcheck-only, but no healthcheck is defined for this instance!')
                sys.exit(1)
            elif healthcheck_result is True:
                sys.exit(0)
            else:
                sys.exit(1)

        running = docker_client.inspect_container(container_id)['State']['Running']
        if running:
            paasta_print('Your service is now running! Tailing stdout and stderr:')
            for line in docker_client.attach(container_id, stderr=True, stream=True, logs=True):
                paasta_print(line)
        else:
            _output_stdout_and_exit_code()
            returncode = 3

    except KeyboardInterrupt:
        returncode = 3

    # Cleanup if the container exits on its own or interrupted.
    if container_started:
        returncode = docker_client.inspect_container(container_id)['State']['ExitCode']
        _cleanup_container(docker_client, container_id)
    return returncode
Example #55
0
def check_mark():
    """
    :return: string that can print a checkmark
    """
    return PaastaColors.green('\u2713')
Example #56
0
def simulate_healthcheck_on_service(
    instance_config,
    docker_client,
    container_id,
    healthcheck_mode,
    healthcheck_data,
    healthcheck_enabled,
):
    """Simulates Marathon-style healthcheck on given service if healthcheck is enabled

    :param instance_config: service manifest
    :param docker_client: Docker client object
    :param container_id: Docker container id
    :param healthcheck_data: tuple url to healthcheck
    :param healthcheck_enabled: boolean
    :returns: healthcheck_passed: boolean
    """
    healthcheck_link = PaastaColors.cyan(healthcheck_data)
    if healthcheck_enabled:
        grace_period = instance_config.get_healthcheck_grace_period_seconds()
        timeout = instance_config.get_healthcheck_timeout_seconds()
        interval = instance_config.get_healthcheck_interval_seconds()
        max_failures = instance_config.get_healthcheck_max_consecutive_failures()

        paasta_print('\nStarting health check via %s (waiting %s seconds before '
                     'considering failures due to grace period):' % (healthcheck_link, grace_period))

        # silenty start performing health checks until grace period ends or first check succeeds
        graceperiod_end_time = time.time() + grace_period
        after_grace_period_attempts = 0
        while True:
            # First inspect the container for early exits
            container_state = docker_client.inspect_container(container_id)
            if not container_state['State']['Running']:
                paasta_print(
                    PaastaColors.red('Container exited with code {}'.format(
                        container_state['State']['ExitCode'],
                    )),
                )
                healthcheck_passed = False
                break

            healthcheck_passed, healthcheck_output = run_healthcheck_on_container(
                docker_client, container_id, healthcheck_mode, healthcheck_data, timeout,
            )

            # Yay, we passed the healthcheck
            if healthcheck_passed:
                paasta_print("{}'{}' (via {})".format(
                    PaastaColors.green("Healthcheck succeeded!: "),
                    healthcheck_output,
                    healthcheck_link,
                ))
                break

            # Otherwise, print why we failed
            if time.time() < graceperiod_end_time:
                color = PaastaColors.grey
                msg = '(disregarded due to grace period)'
                extra_msg = f' (via: {healthcheck_link}. Output: {healthcheck_output})'
            else:
                # If we've exceeded the grace period, we start incrementing attempts
                after_grace_period_attempts += 1
                color = PaastaColors.red
                msg = '(Attempt {} of {})'.format(
                    after_grace_period_attempts, max_failures,
                )
                extra_msg = f' (via: {healthcheck_link}. Output: {healthcheck_output})'

            paasta_print('{}{}'.format(
                color(f'Healthcheck failed! {msg}'),
                extra_msg,
            ))

            if after_grace_period_attempts == max_failures:
                break

            time.sleep(interval)
    else:
        paasta_print('\nPaaSTA would have healthchecked your service via\n%s' % healthcheck_link)
        healthcheck_passed = True
    return healthcheck_passed
Example #57
0
 def __str__(self):
     if self.service:
         return "SERVICE: %s %s" \
                % (PaastaColors.cyan(self.service), self.CHECK_ERROR_MSG)
     else:
         return self.GUESS_ERROR_MSG
Example #58
0
 def colorize(x):
     return (PaastaColors.grey(x))
Example #59
0
def test_assert_gpu_health():
    ok_status = (3, 1, 2)
    ok_output, ok_health = metastatus_lib.assert_gpu_health(ok_status)
    assert ok_health
    assert "GPUs: 1 / 3 in use (%s)" % PaastaColors.green(
        "33.33%") in ok_output
Example #60
0
def paasta_mark_for_deployment(args):
    """Wrapping mark_for_deployment"""
    if args.verbose:
        log.setLevel(level=logging.DEBUG)
    else:
        log.setLevel(level=logging.INFO)

    service = args.service
    if service and service.startswith('services-'):
        service = service.split('services-', 1)[1]
    validate_service_name(service, soa_dir=args.soa_dir)

    in_use_deploy_groups = list_deploy_groups(
        service=service,
        soa_dir=args.soa_dir,
    )
    _, invalid_deploy_groups = validate_given_deploy_groups(
        in_use_deploy_groups, [args.deploy_group])

    if len(invalid_deploy_groups) == 1:
        paasta_print(
            PaastaColors.red(
                "ERROR: These deploy groups are not currently used anywhere: %s.\n"
                % (",").join(invalid_deploy_groups)))
        paasta_print(
            PaastaColors.red(
                "This isn't technically wrong because you can mark-for-deployment before deploying there"
            ))
        paasta_print(
            PaastaColors.red(
                "but this is probably a typo. Did you mean one of these in-use deploy groups?:"
            ))
        paasta_print(
            PaastaColors.red("   %s" % (",").join(in_use_deploy_groups)))
        paasta_print()
        paasta_print(PaastaColors.red("Continuing regardless..."))

    if args.git_url is None:
        args.git_url = get_git_url(service=service, soa_dir=args.soa_dir)

    old_git_sha = get_currently_deployed_sha(service=service,
                                             deploy_group=args.deploy_group)
    if old_git_sha == args.commit:
        paasta_print(
            "Warning: The sha asked to be deployed already matches what is set to be deployed:"
        )
        paasta_print(old_git_sha)
        paasta_print("Continuing anyway.")

    ret = mark_for_deployment(
        git_url=args.git_url,
        deploy_group=args.deploy_group,
        service=service,
        commit=args.commit,
    )
    if args.block:
        try:
            wait_for_deployment(service=service,
                                deploy_group=args.deploy_group,
                                git_sha=args.commit,
                                soa_dir=args.soa_dir,
                                timeout=args.timeout)
            line = "Deployment of {} for {} complete".format(
                args.commit, args.deploy_group)
            _log(service=service, component='deploy', line=line, level='event')
        except (KeyboardInterrupt, TimeoutError):
            if args.auto_rollback is True:
                if old_git_sha == args.commit:
                    paasta_print(
                        "Error: --auto-rollback was requested, but the previous sha"
                    )
                    paasta_print(
                        "is the same that was requested with --commit. Can't rollback"
                    )
                    paasta_print("automatically.")
                else:
                    paasta_print(
                        "Auto-Rollback requested. Marking the previous sha")
                    paasta_print("(%s) for %s as desired." %
                                 (args.deploy_group, old_git_sha))
                    mark_for_deployment(
                        git_url=args.git_url,
                        deploy_group=args.deploy_group,
                        service=service,
                        commit=old_git_sha,
                    )
            else:
                paasta_print(
                    "Waiting for deployment aborted. PaaSTA will continue to try to deploy this code."
                )
                paasta_print("If you wish to see the status, run:")
                paasta_print()
                paasta_print("    paasta status -s %s -v" % service)
                paasta_print()
            ret = 1
        except NoInstancesFound:
            return 1
    if old_git_sha is not None and old_git_sha != args.commit and not args.auto_rollback:
        paasta_print()
        paasta_print("If you wish to roll back, you can run:")
        paasta_print()
        paasta_print(
            PaastaColors.bold(
                "    paasta rollback --service %s --deploy-group %s --commit %s "
                % (service, args.deploy_group, old_git_sha)))
    return ret