Ejemplo n.º 1
0
def status_marathon_job_verbose(service, instance, clients, cluster, soa_dir,
                                job_config):
    """Returns detailed information about a marathon apps for a service
    and instance. Does not make assumptions about what the *exact*
    appid is, but instead does a fuzzy match on any marathon apps
    that match the given service.instance"""
    all_tasks = []
    all_output = []
    # For verbose mode, we want to see *any* matching app. As it may
    # not be the one that we think should be deployed. For example
    # during a bounce we want to see the old and new ones.

    relevant_clients = clients.get_all_clients_for_service(job_config)
    marathon_apps_with_clients = marathon_tools.get_marathon_apps_with_clients(
        relevant_clients, embed_tasks=True)
    for app, client in marathon_tools.get_matching_apps_with_clients(
            service, instance, marathon_apps_with_clients):
        tasks, output = get_verbose_status_of_marathon_app(
            marathon_client=client,
            app=app,
            service=service,
            instance=instance,
            cluster=cluster,
            soa_dir=soa_dir,
        )
        all_tasks.extend(tasks)
        all_output.append(output)
    return all_tasks, "\n".join(all_output)
def marathon_instance_status(
    instance_status: Mapping[str, Any],
    service: str,
    instance: str,
    verbose: int,
    include_smartstack: bool,
    include_mesos: bool,
) -> Mapping[str, Any]:
    mstatus: Dict[str, Any] = {}

    job_config = marathon_tools.load_marathon_service_config(
        service, instance, settings.cluster, soa_dir=settings.soa_dir
    )
    marathon_apps_with_clients = marathon_tools.get_marathon_apps_with_clients(
        clients=settings.marathon_clients.get_all_clients_for_service(job_config),
        embed_tasks=True,
        service_name=service,
    )
    matching_apps_with_clients = marathon_tools.get_matching_apps_with_clients(
        service, instance, marathon_apps_with_clients
    )

    mstatus.update(
        marathon_job_status(
            service, instance, job_config, matching_apps_with_clients, verbose
        )
    )

    if include_smartstack:
        service_namespace_config = marathon_tools.load_service_namespace_config(
            service=service,
            namespace=job_config.get_nerve_namespace(),
            soa_dir=settings.soa_dir,
        )
        if "proxy_port" in service_namespace_config:
            tasks = [
                task for app, _ in matching_apps_with_clients for task in app.tasks
            ]

            mstatus["smartstack"] = marathon_smartstack_status(
                service,
                instance,
                job_config,
                service_namespace_config,
                tasks,
                should_return_individual_backends=verbose > 0,
            )

    if include_mesos:
        mstatus["mesos"] = marathon_mesos_status(service, instance, verbose)

    return mstatus
Ejemplo n.º 3
0
def status_marathon_job_verbose(
    service: str,
    instance: str,
    clients: marathon_tools.MarathonClients,
    cluster: str,
    soa_dir: str,
    job_config: marathon_tools.MarathonServiceConfig,
    dashboards: Dict[marathon_tools.MarathonClient, str],
) -> Tuple[List[MarathonTask], str]:
    """Returns detailed information about a marathon apps for a service
    and instance. Does not make assumptions about what the *exact*
    appid is, but instead does a fuzzy match on any marathon apps
    that match the given service.instance"""
    all_tasks: List[MarathonTask] = []
    all_output: List[str] = []
    # For verbose mode, we want to see *any* matching app. As it may
    # not be the one that we think should be deployed. For example
    # during a bounce we want to see the old and new ones.
    marathon_apps_with_clients = marathon_tools.get_marathon_apps_with_clients(
        clients=clients.get_all_clients_for_service(job_config),
        embed_tasks=True,
    )

    autoscaling_info = get_autoscaling_info(clients, job_config)
    if autoscaling_info:
        all_output.append("  Autoscaling Info:")
        headers = [
            field.replace("_", " ").capitalize()
            for field in ServiceAutoscalingInfo._fields
        ]
        table = [headers, autoscaling_info]
        all_output.append('\n'.join(
            ["    %s" % line for line in format_table(table)]))

    for app, client in marathon_tools.get_matching_apps_with_clients(
            service, instance, marathon_apps_with_clients):
        tasks, output = get_verbose_status_of_marathon_app(
            marathon_client=client,
            app=app,
            service=service,
            instance=instance,
            cluster=cluster,
            soa_dir=soa_dir,
            dashboards=dashboards,
        )
        all_tasks.extend(tasks)
        all_output.append(output)
    return all_tasks, "\n".join(all_output)
Ejemplo n.º 4
0
def deploy_service(
    service: str,
    instance: str,
    marathon_jobid: str,
    config: marathon_tools.FormattedMarathonAppDict,
    clients: marathon_tools.MarathonClients,
    marathon_apps_with_clients: Collection[Tuple[MarathonApp, MarathonClient]],
    bounce_method: str,
    drain_method_name: str,
    drain_method_params: Dict[str, Any],
    nerve_ns: str,
    bounce_health_params: Dict[str, Any],
    soa_dir: str,
    job_config: marathon_tools.MarathonServiceConfig,
    bounce_margin_factor: float = 1.0,
) -> Tuple[int, str, Optional[float]]:
    """Deploy the service to marathon, either directly or via a bounce if needed.
    Called by setup_service when it's time to actually deploy.

    :param service: The name of the service to deploy
    :param instance: The instance of the service to deploy
    :param marathon_jobid: Full id of the marathon job
    :param config: The complete configuration dict to send to marathon
    :param clients: A MarathonClients object
    :param bounce_method: The bounce method to use, if needed
    :param drain_method_name: The name of the traffic draining method to use.
    :param nerve_ns: The nerve namespace to look in.
    :param bounce_health_params: A dictionary of options for bounce_lib.get_happy_tasks.
    :param bounce_margin_factor: the multiplication factor used to calculate the number of instances to be drained
    :returns: A tuple of (status, output, bounce_in_seconds) to be used with send_sensu_event"""
    def log_deploy_error(errormsg: str, level: str = 'event') -> None:
        return _log(
            service=service,
            line=errormsg,
            component='deploy',
            level='event',
            cluster=cluster,
            instance=instance,
        )

    system_paasta_config = load_system_paasta_config()
    cluster = system_paasta_config.get_cluster()
    existing_apps_with_clients = marathon_tools.get_matching_apps_with_clients(
        service=service,
        instance=instance,
        marathon_apps_with_clients=marathon_apps_with_clients,
    )

    new_client = clients.get_current_client_for_service(job_config)

    new_apps_with_clients_list: List[Tuple[MarathonApp, MarathonClient]] = []
    other_apps_with_clients: List[Tuple[MarathonApp, MarathonClient]] = []

    for a, c in existing_apps_with_clients:
        if a.id == '/%s' % config['id'] and c == new_client:
            new_apps_with_clients_list.append((a, c))
        else:
            other_apps_with_clients.append((a, c))

    serviceinstance = "%s.%s" % (service, instance)

    if new_apps_with_clients_list:
        new_app, new_client = new_apps_with_clients_list[0]
        if len(new_apps_with_clients_list) != 1:
            raise ValueError(
                "Only expected one app per ID per shard; found %d" %
                len(new_apps_with_clients_list))
        new_app_running = True
        happy_new_tasks = bounce_lib.get_happy_tasks(
            new_app,
            service,
            nerve_ns,
            system_paasta_config,
            **bounce_health_params,
        )
    else:
        new_app_running = False
        happy_new_tasks = []

    try:
        drain_method = drain_lib.get_drain_method(
            drain_method_name,
            service=service,
            instance=instance,
            nerve_ns=nerve_ns,
            drain_method_params=drain_method_params,
        )
    except KeyError:
        errormsg = 'ERROR: drain_method not recognized: %s. Must be one of (%s)' % \
            (drain_method_name, ', '.join(drain_lib.list_drain_methods()))
        log_deploy_error(errormsg)
        return (1, errormsg, None)

    try:
        draining_hosts = get_draining_hosts()
    except ReadTimeout as e:
        errormsg = "ReadTimeout encountered trying to get draining hosts: %s" % e
        return (1, errormsg, 60)

    (
        old_app_live_happy_tasks,
        old_app_live_unhappy_tasks,
        old_app_draining_tasks,
        old_app_at_risk_tasks,
    ) = get_tasks_by_state(
        other_apps_with_clients=other_apps_with_clients,
        drain_method=drain_method,
        service=service,
        nerve_ns=nerve_ns,
        bounce_health_params=bounce_health_params,
        system_paasta_config=system_paasta_config,
        log_deploy_error=log_deploy_error,
        draining_hosts=draining_hosts,
    )

    # The first thing we need to do is take up the "slack" of old apps, to stop
    # them from launching new things that we are going to have to end up draining
    # and killing anyway.
    for a, c in other_apps_with_clients:
        marathon_tools.take_up_slack(app=a, client=c)

    num_at_risk_tasks = 0
    if new_app_running:
        num_at_risk_tasks = get_num_at_risk_tasks(
            new_app, draining_hosts=draining_hosts)
        if new_app.instances < config['instances'] + num_at_risk_tasks:
            log.info("Scaling %s up from %d to %d instances." %
                     (new_app.id, new_app.instances,
                      config['instances'] + num_at_risk_tasks))
            new_client.scale_app(app_id=new_app.id,
                                 instances=config['instances'] +
                                 num_at_risk_tasks,
                                 force=True)
        # If we have more than the specified number of instances running, we will want to drain some of them.
        # We will start by draining any tasks running on at-risk hosts.
        elif new_app.instances > config['instances']:
            num_tasks_to_scale = max(
                min(len(new_app.tasks), new_app.instances) -
                config['instances'], 0)
            task_dict = get_tasks_by_state_for_app(
                app=new_app,
                drain_method=drain_method,
                service=service,
                nerve_ns=nerve_ns,
                bounce_health_params=bounce_health_params,
                system_paasta_config=system_paasta_config,
                log_deploy_error=log_deploy_error,
                draining_hosts=draining_hosts,
            )
            scaling_app_happy_tasks = list(task_dict['happy'])
            scaling_app_unhappy_tasks = list(task_dict['unhappy'])
            scaling_app_draining_tasks = list(task_dict['draining'])
            scaling_app_at_risk_tasks = list(task_dict['at_risk'])

            tasks_to_move_draining = min(len(scaling_app_draining_tasks),
                                         num_tasks_to_scale)
            old_app_draining_tasks[(new_app.id, new_client)] = set(
                scaling_app_draining_tasks[:tasks_to_move_draining])
            num_tasks_to_scale = num_tasks_to_scale - tasks_to_move_draining

            tasks_to_move_unhappy = min(len(scaling_app_unhappy_tasks),
                                        num_tasks_to_scale)
            old_app_live_unhappy_tasks[(new_app.id, new_client)] = set(
                scaling_app_unhappy_tasks[:tasks_to_move_unhappy], )
            num_tasks_to_scale = num_tasks_to_scale - tasks_to_move_unhappy

            tasks_to_move_at_risk = min(len(scaling_app_at_risk_tasks),
                                        num_tasks_to_scale)
            old_app_at_risk_tasks[(new_app.id, new_client)] = set(
                scaling_app_at_risk_tasks[:tasks_to_move_at_risk])
            num_tasks_to_scale = num_tasks_to_scale - tasks_to_move_at_risk

            tasks_to_move_happy = min(len(scaling_app_happy_tasks),
                                      num_tasks_to_scale)
            old_app_live_happy_tasks[(new_app.id, new_client)] = set(
                scaling_app_happy_tasks[:tasks_to_move_happy])
            happy_new_tasks = scaling_app_happy_tasks[tasks_to_move_happy:]

            # slack represents remaining the extra remaining instances that are configured
            # in marathon that don't have a launched task yet. When scaling down we want to
            # reduce this slack so marathon doesn't get a chance to launch a new task in
            # that space that we will then have to drain and kill again.
            marathon_tools.take_up_slack(client=new_client, app=new_app)

        # TODO: don't take actions in deploy_service.
        undrain_tasks(
            to_undrain=new_app.tasks,
            leave_draining=old_app_draining_tasks.get((new_app.id, new_client),
                                                      []),
            drain_method=drain_method,
            log_deploy_error=log_deploy_error,
        )

    # log all uncaught exceptions and raise them again
    try:
        try:
            bounce_func = bounce_lib.get_bounce_method_func(bounce_method)
        except KeyError:
            errormsg = 'ERROR: bounce_method not recognized: %s. Must be one of (%s)' % \
                (bounce_method, ', '.join(bounce_lib.list_bounce_methods()))
            log_deploy_error(errormsg)
            return (1, errormsg, None)

        bounce_again_in_seconds = do_bounce(
            bounce_func=bounce_func,
            drain_method=drain_method,
            config=config,
            new_app_running=new_app_running,
            happy_new_tasks=happy_new_tasks,
            old_app_live_happy_tasks=old_app_live_happy_tasks,
            old_app_live_unhappy_tasks=old_app_live_unhappy_tasks,
            old_app_draining_tasks=old_app_draining_tasks,
            old_app_at_risk_tasks=old_app_at_risk_tasks,
            service=service,
            bounce_method=bounce_method,
            serviceinstance=serviceinstance,
            cluster=cluster,
            instance=instance,
            marathon_jobid=marathon_jobid,
            clients=clients,
            soa_dir=soa_dir,
            job_config=job_config,
            bounce_margin_factor=bounce_margin_factor,
        )
    except bounce_lib.LockHeldException:
        logline = 'Failed to get lock to create marathon app for %s.%s' % (
            service, instance)
        log_deploy_error(logline, level='debug')
        return (0, "Couldn't get marathon lock, skipping until next time",
                None)
    except Exception:
        logline = 'Exception raised during deploy of service %s:\n%s' % (
            service, traceback.format_exc())
        log_deploy_error(logline, level='debug')
        raise
    if num_at_risk_tasks:
        bounce_again_in_seconds = 60
    elif new_app_running:
        if new_app.instances > config['instances']:
            bounce_again_in_seconds = 60
    return (0, 'Service deployed.', bounce_again_in_seconds)
Ejemplo n.º 5
0
def status_marathon_job(
    service: str,
    instance: str,
    cluster: str,
    soa_dir: str,
    dashboards: Dict[marathon_tools.MarathonClient, str],
    normal_instance_count: int,
    clients: marathon_tools.MarathonClients,
    job_config: marathon_tools.MarathonServiceConfig,
    desired_app_id: str,
    verbose: int,
) -> Tuple[List[MarathonTask], str]:
    marathon_apps_with_clients = marathon_tools.get_marathon_apps_with_clients(
        clients=clients.get_all_clients_for_service(job_config),
        embed_tasks=True,
        service_name=service,
    )
    all_tasks = []
    all_output = [
        ""
    ]  # One entry that will be replaced with status_marathon_job_human output later.

    running_instances = 0

    if verbose > 0:
        autoscaling_info = get_autoscaling_info(marathon_apps_with_clients, job_config)
        if autoscaling_info:
            all_output.append("  Autoscaling Info:")
            headers = [
                field.replace("_", " ").capitalize()
                for field in ServiceAutoscalingInfo._fields
            ]
            table = [headers, humanize_autoscaling_info(autoscaling_info)]
            all_output.append(
                "\n".join(["    %s" % line for line in format_table(table)])
            )

    deploy_status_for_desired_app = "Waiting for bounce"
    matching_apps_with_clients = marathon_tools.get_matching_apps_with_clients(
        service, instance, marathon_apps_with_clients
    )
    for app, client in matching_apps_with_clients:
        all_tasks.extend(app.tasks)
        (
            deploy_status_for_current_app,
            running_instances_for_current_app,
            out,
        ) = status_marathon_app(
            marathon_client=client,
            app=app,
            service=service,
            instance=instance,
            cluster=cluster,
            soa_dir=soa_dir,
            dashboards=dashboards,
            verbose=verbose,
        )
        if app.id.lstrip("/") == desired_app_id.lstrip("/"):
            deploy_status_for_desired_app = marathon_tools.MarathonDeployStatus.tostring(
                deploy_status_for_current_app
            )

        running_instances += running_instances_for_current_app
        all_output.append(out)

    all_output[0] = status_marathon_job_human(
        service=service,
        instance=instance,
        deploy_status=deploy_status_for_desired_app,
        desired_app_id=desired_app_id,
        app_count=len(matching_apps_with_clients),
        running_instances=running_instances,
        normal_instance_count=normal_instance_count,
    )

    return all_tasks, "\n".join(all_output)