Beispiel #1
0
def autoscale_marathon_instance(marathon_service_config, marathon_tasks,
                                mesos_tasks):
    current_instances = marathon_service_config.get_instances()
    if len(marathon_tasks) != current_instances:
        write_to_log(
            config=marathon_service_config,
            line=
            'Delaying scaling as marathon is either waiting for resources or is delayed'
        )
    autoscaling_params = marathon_service_config.get_autoscaling_params()
    autoscaling_metrics_provider = get_autoscaling_metrics_provider(
        autoscaling_params.pop(METRICS_PROVIDER_KEY))
    autoscaling_decision_policy = get_autoscaling_decision_policy(
        autoscaling_params.pop(DECISION_POLICY_KEY))

    error = autoscaling_metrics_provider(
        marathon_tasks, mesos_tasks, **
        autoscaling_params) - autoscaling_params.pop('setpoint')
    autoscaling_direction = autoscaling_decision_policy(
        marathon_service_config, error, **autoscaling_params)
    if autoscaling_direction:
        autoscaling_amount = get_new_instance_count(current_instances,
                                                    autoscaling_direction)
        instances = marathon_service_config.limit_instance_count(
            autoscaling_amount)
        if instances != current_instances:
            write_to_log(config=marathon_service_config,
                         line='Scaling from %d to %d' %
                         (current_instances, instances))
            set_instances_for_marathon_service(
                service=marathon_service_config.service,
                instance=marathon_service_config.instance,
                instance_count=instances,
            )
def zookeeper_scale_job(context, service, instance, number):
    with contextlib.nested(
        mock.patch.object(SystemPaastaConfig, 'get_zk_hosts', autospec=True, return_value=context.zk_hosts)
    ) as (
        _,
    ):
        marathon_tools.set_instances_for_marathon_service(service, instance, number, soa_dir=context.soa_dir)
Beispiel #3
0
def update_autoscaler_count(request):
    service = request.swagger_data.get('service')
    instance = request.swagger_data.get('instance')
    desired_instances = request.swagger_data.get('json_body')['desired_instances']
    set_instances_for_marathon_service(service=service, instance=instance, instance_count=desired_instances)
    response_body = {'desired_instances': desired_instances}
    return Response(json_body=response_body, status_code=202)
def autoscale_marathon_instance(marathon_service_config, marathon_tasks,
                                mesos_tasks):
    current_instances = marathon_service_config.get_instances()
    if len(marathon_tasks) != current_instances:
        write_to_log(
            config=marathon_service_config,
            line=
            'Delaying scaling as marathon is either waiting for resources or is delayed'
        )
        return
    autoscaling_params = marathon_service_config.get_autoscaling_params()
    autoscaling_metrics_provider = get_service_metrics_provider(
        autoscaling_params.pop(SERVICE_METRICS_PROVIDER_KEY))
    autoscaling_decision_policy = get_decision_policy(
        autoscaling_params.pop(DECISION_POLICY_KEY))

    utilization = autoscaling_metrics_provider(marathon_service_config,
                                               marathon_tasks, mesos_tasks,
                                               **autoscaling_params)
    error = get_error_from_utilization(
        utilization=utilization,
        setpoint=autoscaling_params.pop('setpoint'),
        current_instances=current_instances,
    )

    zookeeper_path = compose_autoscaling_zookeeper_root(
        service=marathon_service_config.service,
        instance=marathon_service_config.instance,
    )
    autoscaling_amount = autoscaling_decision_policy(
        error=error,
        min_instances=marathon_service_config.get_min_instances(),
        max_instances=marathon_service_config.get_max_instances(),
        current_instances=current_instances,
        zookeeper_path=zookeeper_path,
        **autoscaling_params)

    new_instance_count = marathon_service_config.limit_instance_count(
        current_instances + autoscaling_amount)
    if new_instance_count != current_instances:
        write_to_log(
            config=marathon_service_config,
            line='Scaling from %d to %d instances (%s)' %
            (current_instances, new_instance_count, humanize_error(error)),
        )
        set_instances_for_marathon_service(
            service=marathon_service_config.service,
            instance=marathon_service_config.instance,
            instance_count=new_instance_count,
        )
    else:
        write_to_log(
            config=marathon_service_config,
            line='Staying at %d instances (%s)' %
            (current_instances, humanize_error(error)),
            level='debug',
        )
Beispiel #5
0
def autoscale_marathon_instance(marathon_service_config, marathon_tasks, mesos_tasks):
    current_instances = marathon_service_config.get_instances()
    if len(marathon_tasks) != current_instances:
        write_to_log(config=marathon_service_config,
                     line='Delaying scaling as marathon is either waiting for resources or is delayed')
        return
    autoscaling_params = marathon_service_config.get_autoscaling_params()
    autoscaling_metrics_provider = get_service_metrics_provider(autoscaling_params.pop(SERVICE_METRICS_PROVIDER_KEY))
    autoscaling_decision_policy = get_decision_policy(autoscaling_params.pop(DECISION_POLICY_KEY))

    utilization = autoscaling_metrics_provider(marathon_service_config, marathon_tasks,
                                               mesos_tasks, **autoscaling_params)
    error = get_error_from_utilization(
        utilization=utilization,
        setpoint=autoscaling_params.pop('setpoint'),
        current_instances=current_instances,
    )

    zookeeper_path = compose_autoscaling_zookeeper_root(
        service=marathon_service_config.service,
        instance=marathon_service_config.instance,
    )
    autoscaling_amount = autoscaling_decision_policy(
        error=error,
        min_instances=marathon_service_config.get_min_instances(),
        max_instances=marathon_service_config.get_max_instances(),
        current_instances=current_instances,
        zookeeper_path=zookeeper_path,
        **autoscaling_params
    )

    new_instance_count = marathon_service_config.limit_instance_count(current_instances + autoscaling_amount)
    if new_instance_count != current_instances:
        write_to_log(
            config=marathon_service_config,
            line='Scaling from %d to %d instances (%s)' % (
                current_instances, new_instance_count, humanize_error(error)),
        )
        set_instances_for_marathon_service(
            service=marathon_service_config.service,
            instance=marathon_service_config.instance,
            instance_count=new_instance_count,
        )
    else:
        write_to_log(
            config=marathon_service_config,
            line='Staying at %d instances (%s)' % (current_instances, humanize_error(error)),
            level='debug',
        )
Beispiel #6
0
def update_autoscaler_count(request):
    service = request.swagger_data.get("service")
    instance = request.swagger_data.get("instance")
    desired_instances = request.swagger_data.get(
        "json_body")["desired_instances"]
    if not isinstance(desired_instances, int):
        error_message = 'The provided body does not have an integer value for "desired_instances": {}'.format(
            request.swagger_data.get("json_body"))
        raise ApiFailure(error_message, 500)

    try:
        service_config = load_marathon_service_config(
            service=service,
            instance=instance,
            cluster=settings.cluster,
            soa_dir=settings.soa_dir,
            load_deployments=False,
        )
    except Exception:
        error_message = f"Unable to load service config for {service}.{instance}"
        raise ApiFailure(error_message, 404)

    max_instances = service_config.get_max_instances()
    if max_instances is None:
        error_message = f"Autoscaling is not enabled for {service}.{instance}"
        raise ApiFailure(error_message, 404)

    min_instances = service_config.get_min_instances()

    # Dump whatever number from the client to zk. get_instances() will limit
    # readings from zk to [min_instances, max_instances].
    set_instances_for_marathon_service(service=service,
                                       instance=instance,
                                       instance_count=desired_instances)
    status = "SUCCESS"
    if desired_instances > max_instances:
        desired_instances = max_instances
        status = (
            "WARNING desired_instances is greater than max_instances %d" %
            max_instances)
    elif desired_instances < min_instances:
        desired_instances = min_instances
        status = ("WARNING desired_instances is less than min_instances %d" %
                  min_instances)

    response_body = {"desired_instances": desired_instances, "status": status}
    return Response(json_body=response_body, status_code=202)
Beispiel #7
0
def autoscale_marathon_instance(marathon_service_config, marathon_tasks, mesos_tasks):
    autoscaling_params = marathon_service_config.get_autoscaling_params()
    autoscaling_metrics_provider = get_autoscaling_metrics_provider(autoscaling_params[METRICS_PROVIDER_KEY])
    autoscaling_decision_policy = get_autoscaling_decision_policy(autoscaling_params[DECISION_POLICY_KEY])
    autoscaling_direction = autoscaling_decision_policy(marathon_service_config, autoscaling_metrics_provider,
                                                        marathon_tasks, mesos_tasks, **autoscaling_params)
    if autoscaling_direction:
        current_instances = marathon_service_config.get_instances()
        autoscaling_amount = get_new_instance_count(current_instances, autoscaling_direction)
        instances = marathon_service_config.limit_instance_count(autoscaling_amount)
        if instances != current_instances:
            write_to_log(config=marathon_service_config, line='Scaling from %d to %d' % (current_instances, instances))
            set_instances_for_marathon_service(
                service=marathon_service_config.service,
                instance=marathon_service_config.instance,
                instance_count=instances,
            )
Beispiel #8
0
def update_autoscaler_count(request):
    service = request.swagger_data.get('service')
    instance = request.swagger_data.get('instance')
    desired_instances = request.swagger_data.get(
        'json_body')['desired_instances']

    try:
        service_config = load_marathon_service_config(
            service=service,
            instance=instance,
            cluster=settings.cluster,
            soa_dir=settings.soa_dir,
            load_deployments=False,
        )
    except Exception:
        error_message = 'Unable to load service config for %s.%s' % (service,
                                                                     instance)
        raise ApiFailure(error_message, 404)

    max_instances = service_config.get_max_instances()
    if max_instances is None:
        error_message = 'Autoscaling is not enabled for %s.%s' % (service,
                                                                  instance)
        raise ApiFailure(error_message, 404)

    min_instances = service_config.get_min_instances()

    # Dump whatever number from the client to zk. get_instances() will limit
    # readings from zk to [min_instances, max_instances].
    set_instances_for_marathon_service(service=service,
                                       instance=instance,
                                       instance_count=desired_instances)
    status = 'SUCCESS'
    if desired_instances > max_instances:
        desired_instances = max_instances
        status = 'WARNING desired_instances is greater than max_instances %d' % max_instances
    elif desired_instances < min_instances:
        desired_instances = min_instances
        status = 'WARNING desired_instances is less than min_instances %d' % min_instances

    response_body = {'desired_instances': desired_instances, 'status': status}
    return Response(json_body=response_body, status_code=202)
Beispiel #9
0
def autoscale_marathon_instance(marathon_service_config, marathon_tasks, mesos_tasks):
    current_instances = marathon_service_config.get_instances()
    if len(marathon_tasks) != current_instances:
        write_to_log(config=marathon_service_config,
                     line='Delaying scaling as marathon is either waiting for resources or is delayed')
    autoscaling_params = marathon_service_config.get_autoscaling_params()
    autoscaling_metrics_provider = get_autoscaling_metrics_provider(autoscaling_params.pop(METRICS_PROVIDER_KEY))
    autoscaling_decision_policy = get_autoscaling_decision_policy(autoscaling_params.pop(DECISION_POLICY_KEY))

    error = autoscaling_metrics_provider(marathon_service_config, marathon_tasks,
                                         mesos_tasks, **autoscaling_params) - autoscaling_params.pop('setpoint')
    write_to_log(config=marathon_service_config, line='Recieved error from metrics provider: %f' % error)
    autoscaling_direction = autoscaling_decision_policy(marathon_service_config, error, **autoscaling_params)
    if autoscaling_direction:
        autoscaling_amount = get_new_instance_count(current_instances, autoscaling_direction)
        instances = marathon_service_config.limit_instance_count(autoscaling_amount)
        if instances != current_instances:
            write_to_log(config=marathon_service_config, line='Scaling from %d to %d' % (current_instances, instances))
            set_instances_for_marathon_service(
                service=marathon_service_config.service,
                instance=marathon_service_config.instance,
                instance_count=instances,
            )
Beispiel #10
0
def update_autoscaler_count(request):
    service = request.swagger_data.get("service")
    instance = request.swagger_data.get("instance")
    desired_instances = request.swagger_data.get("json_body")["desired_instances"]

    try:
        service_config = load_marathon_service_config(
            service=service,
            instance=instance,
            cluster=settings.cluster,
            soa_dir=settings.soa_dir,
            load_deployments=False,
        )
    except Exception:
        error_message = "Unable to load service config for %s.%s" % (service, instance)
        raise ApiFailure(error_message, 404)

    max_instances = service_config.get_max_instances()
    if max_instances is None:
        error_message = "Autoscaling is not enabled for %s.%s" % (service, instance)
        raise ApiFailure(error_message, 404)

    min_instances = service_config.get_min_instances()

    # Dump whatever number from the client to zk. get_instances() will limit
    # readings from zk to [min_instances, max_instances].
    set_instances_for_marathon_service(service=service, instance=instance, instance_count=desired_instances)
    status = "SUCCESS"
    if desired_instances > max_instances:
        desired_instances = max_instances
        status = "WARNING desired_instances is greater than max_instances %d" % max_instances
    elif desired_instances < min_instances:
        desired_instances = min_instances
        status = "WARNING desired_instances is less than min_instances %d" % min_instances

    response_body = {"desired_instances": desired_instances, "status": status}
    return Response(json_body=response_body, status_code=202)
def autoscale_marathon_instance(
    marathon_service_config: MarathonServiceConfig,
    system_paasta_config: SystemPaastaConfig,
    marathon_tasks: Sequence[MarathonTask],
    mesos_tasks: Sequence[Task],
) -> None:
    try:
        with create_autoscaling_lock(marathon_service_config.service,
                                     marathon_service_config.instance):
            current_instances = marathon_service_config.get_instances()
            task_data_insufficient = is_task_data_insufficient(
                marathon_service_config=marathon_service_config,
                marathon_tasks=marathon_tasks,
                current_instances=current_instances,
            )
            autoscaling_params = marathon_service_config.get_autoscaling_params(
            )
            log_utilization_data: Mapping = {}
            utilization = get_utilization(
                marathon_service_config=marathon_service_config,
                system_paasta_config=system_paasta_config,
                autoscaling_params=autoscaling_params,
                log_utilization_data=log_utilization_data,
                marathon_tasks=marathon_tasks,
                mesos_tasks=mesos_tasks,
            )
            error = get_error_from_utilization(
                utilization=utilization,
                setpoint=autoscaling_params["setpoint"],
                current_instances=current_instances,
            )
            num_healthy_instances = len(marathon_tasks)
            new_instance_count = get_new_instance_count(
                utilization=utilization,
                error=error,
                autoscaling_params=autoscaling_params,
                current_instances=current_instances,
                marathon_service_config=marathon_service_config,
                num_healthy_instances=num_healthy_instances,
                persist_data=(not task_data_insufficient),
            )
            safe_downscaling_threshold = int(current_instances * 0.7)
            _record_autoscaling_decision(
                marathon_service_config=marathon_service_config,
                autoscaling_params=autoscaling_params,
                utilization=utilization,
                log_utilization_data=log_utilization_data,
                error=error,
                current_instances=current_instances,
                num_healthy_instances=num_healthy_instances,
                new_instance_count=new_instance_count,
                safe_downscaling_threshold=safe_downscaling_threshold,
                task_data_insufficient=task_data_insufficient,
            )
            if new_instance_count != current_instances:
                if new_instance_count < current_instances and task_data_insufficient:
                    write_to_log(
                        config=marathon_service_config,
                        line=
                        "Delaying scaling *down* as we found too few healthy tasks running in marathon. "
                        "This can happen because tasks are delayed/waiting/unhealthy or because we are "
                        "waiting for tasks to be killed. Will wait for sufficient healthy tasks before "
                        "we make a decision to scale down.",
                        level="debug",
                    )
                    return
                else:
                    set_instances_for_marathon_service(
                        service=marathon_service_config.service,
                        instance=marathon_service_config.instance,
                        instance_count=new_instance_count,
                    )
                    write_to_log(
                        config=marathon_service_config,
                        line="Scaling from %d to %d instances (%s)" % (
                            current_instances,
                            new_instance_count,
                            humanize_error(error),
                        ),
                        level="event",
                    )
            else:
                write_to_log(
                    config=marathon_service_config,
                    line="Staying at %d instances (%s)" %
                    (current_instances, humanize_error(error)),
                    level="debug",
                )
    except LockHeldException:
        log.warning(
            "Skipping autoscaling run for {service}.{instance} because the lock is held"
            .format(
                service=marathon_service_config.service,
                instance=marathon_service_config.instance,
            ))