def autoscale_marathon_instance(marathon_service_config, marathon_tasks, mesos_tasks): current_instances = marathon_service_config.get_instances() if len(marathon_tasks) != current_instances: write_to_log( config=marathon_service_config, line= 'Delaying scaling as marathon is either waiting for resources or is delayed' ) autoscaling_params = marathon_service_config.get_autoscaling_params() autoscaling_metrics_provider = get_autoscaling_metrics_provider( autoscaling_params.pop(METRICS_PROVIDER_KEY)) autoscaling_decision_policy = get_autoscaling_decision_policy( autoscaling_params.pop(DECISION_POLICY_KEY)) error = autoscaling_metrics_provider( marathon_tasks, mesos_tasks, ** autoscaling_params) - autoscaling_params.pop('setpoint') autoscaling_direction = autoscaling_decision_policy( marathon_service_config, error, **autoscaling_params) if autoscaling_direction: autoscaling_amount = get_new_instance_count(current_instances, autoscaling_direction) instances = marathon_service_config.limit_instance_count( autoscaling_amount) if instances != current_instances: write_to_log(config=marathon_service_config, line='Scaling from %d to %d' % (current_instances, instances)) set_instances_for_marathon_service( service=marathon_service_config.service, instance=marathon_service_config.instance, instance_count=instances, )
def zookeeper_scale_job(context, service, instance, number): with contextlib.nested( mock.patch.object(SystemPaastaConfig, 'get_zk_hosts', autospec=True, return_value=context.zk_hosts) ) as ( _, ): marathon_tools.set_instances_for_marathon_service(service, instance, number, soa_dir=context.soa_dir)
def update_autoscaler_count(request): service = request.swagger_data.get('service') instance = request.swagger_data.get('instance') desired_instances = request.swagger_data.get('json_body')['desired_instances'] set_instances_for_marathon_service(service=service, instance=instance, instance_count=desired_instances) response_body = {'desired_instances': desired_instances} return Response(json_body=response_body, status_code=202)
def autoscale_marathon_instance(marathon_service_config, marathon_tasks, mesos_tasks): current_instances = marathon_service_config.get_instances() if len(marathon_tasks) != current_instances: write_to_log( config=marathon_service_config, line= 'Delaying scaling as marathon is either waiting for resources or is delayed' ) return autoscaling_params = marathon_service_config.get_autoscaling_params() autoscaling_metrics_provider = get_service_metrics_provider( autoscaling_params.pop(SERVICE_METRICS_PROVIDER_KEY)) autoscaling_decision_policy = get_decision_policy( autoscaling_params.pop(DECISION_POLICY_KEY)) utilization = autoscaling_metrics_provider(marathon_service_config, marathon_tasks, mesos_tasks, **autoscaling_params) error = get_error_from_utilization( utilization=utilization, setpoint=autoscaling_params.pop('setpoint'), current_instances=current_instances, ) zookeeper_path = compose_autoscaling_zookeeper_root( service=marathon_service_config.service, instance=marathon_service_config.instance, ) autoscaling_amount = autoscaling_decision_policy( error=error, min_instances=marathon_service_config.get_min_instances(), max_instances=marathon_service_config.get_max_instances(), current_instances=current_instances, zookeeper_path=zookeeper_path, **autoscaling_params) new_instance_count = marathon_service_config.limit_instance_count( current_instances + autoscaling_amount) if new_instance_count != current_instances: write_to_log( config=marathon_service_config, line='Scaling from %d to %d instances (%s)' % (current_instances, new_instance_count, humanize_error(error)), ) set_instances_for_marathon_service( service=marathon_service_config.service, instance=marathon_service_config.instance, instance_count=new_instance_count, ) else: write_to_log( config=marathon_service_config, line='Staying at %d instances (%s)' % (current_instances, humanize_error(error)), level='debug', )
def autoscale_marathon_instance(marathon_service_config, marathon_tasks, mesos_tasks): current_instances = marathon_service_config.get_instances() if len(marathon_tasks) != current_instances: write_to_log(config=marathon_service_config, line='Delaying scaling as marathon is either waiting for resources or is delayed') return autoscaling_params = marathon_service_config.get_autoscaling_params() autoscaling_metrics_provider = get_service_metrics_provider(autoscaling_params.pop(SERVICE_METRICS_PROVIDER_KEY)) autoscaling_decision_policy = get_decision_policy(autoscaling_params.pop(DECISION_POLICY_KEY)) utilization = autoscaling_metrics_provider(marathon_service_config, marathon_tasks, mesos_tasks, **autoscaling_params) error = get_error_from_utilization( utilization=utilization, setpoint=autoscaling_params.pop('setpoint'), current_instances=current_instances, ) zookeeper_path = compose_autoscaling_zookeeper_root( service=marathon_service_config.service, instance=marathon_service_config.instance, ) autoscaling_amount = autoscaling_decision_policy( error=error, min_instances=marathon_service_config.get_min_instances(), max_instances=marathon_service_config.get_max_instances(), current_instances=current_instances, zookeeper_path=zookeeper_path, **autoscaling_params ) new_instance_count = marathon_service_config.limit_instance_count(current_instances + autoscaling_amount) if new_instance_count != current_instances: write_to_log( config=marathon_service_config, line='Scaling from %d to %d instances (%s)' % ( current_instances, new_instance_count, humanize_error(error)), ) set_instances_for_marathon_service( service=marathon_service_config.service, instance=marathon_service_config.instance, instance_count=new_instance_count, ) else: write_to_log( config=marathon_service_config, line='Staying at %d instances (%s)' % (current_instances, humanize_error(error)), level='debug', )
def update_autoscaler_count(request): service = request.swagger_data.get("service") instance = request.swagger_data.get("instance") desired_instances = request.swagger_data.get( "json_body")["desired_instances"] if not isinstance(desired_instances, int): error_message = 'The provided body does not have an integer value for "desired_instances": {}'.format( request.swagger_data.get("json_body")) raise ApiFailure(error_message, 500) try: service_config = load_marathon_service_config( service=service, instance=instance, cluster=settings.cluster, soa_dir=settings.soa_dir, load_deployments=False, ) except Exception: error_message = f"Unable to load service config for {service}.{instance}" raise ApiFailure(error_message, 404) max_instances = service_config.get_max_instances() if max_instances is None: error_message = f"Autoscaling is not enabled for {service}.{instance}" raise ApiFailure(error_message, 404) min_instances = service_config.get_min_instances() # Dump whatever number from the client to zk. get_instances() will limit # readings from zk to [min_instances, max_instances]. set_instances_for_marathon_service(service=service, instance=instance, instance_count=desired_instances) status = "SUCCESS" if desired_instances > max_instances: desired_instances = max_instances status = ( "WARNING desired_instances is greater than max_instances %d" % max_instances) elif desired_instances < min_instances: desired_instances = min_instances status = ("WARNING desired_instances is less than min_instances %d" % min_instances) response_body = {"desired_instances": desired_instances, "status": status} return Response(json_body=response_body, status_code=202)
def autoscale_marathon_instance(marathon_service_config, marathon_tasks, mesos_tasks): autoscaling_params = marathon_service_config.get_autoscaling_params() autoscaling_metrics_provider = get_autoscaling_metrics_provider(autoscaling_params[METRICS_PROVIDER_KEY]) autoscaling_decision_policy = get_autoscaling_decision_policy(autoscaling_params[DECISION_POLICY_KEY]) autoscaling_direction = autoscaling_decision_policy(marathon_service_config, autoscaling_metrics_provider, marathon_tasks, mesos_tasks, **autoscaling_params) if autoscaling_direction: current_instances = marathon_service_config.get_instances() autoscaling_amount = get_new_instance_count(current_instances, autoscaling_direction) instances = marathon_service_config.limit_instance_count(autoscaling_amount) if instances != current_instances: write_to_log(config=marathon_service_config, line='Scaling from %d to %d' % (current_instances, instances)) set_instances_for_marathon_service( service=marathon_service_config.service, instance=marathon_service_config.instance, instance_count=instances, )
def update_autoscaler_count(request): service = request.swagger_data.get('service') instance = request.swagger_data.get('instance') desired_instances = request.swagger_data.get( 'json_body')['desired_instances'] try: service_config = load_marathon_service_config( service=service, instance=instance, cluster=settings.cluster, soa_dir=settings.soa_dir, load_deployments=False, ) except Exception: error_message = 'Unable to load service config for %s.%s' % (service, instance) raise ApiFailure(error_message, 404) max_instances = service_config.get_max_instances() if max_instances is None: error_message = 'Autoscaling is not enabled for %s.%s' % (service, instance) raise ApiFailure(error_message, 404) min_instances = service_config.get_min_instances() # Dump whatever number from the client to zk. get_instances() will limit # readings from zk to [min_instances, max_instances]. set_instances_for_marathon_service(service=service, instance=instance, instance_count=desired_instances) status = 'SUCCESS' if desired_instances > max_instances: desired_instances = max_instances status = 'WARNING desired_instances is greater than max_instances %d' % max_instances elif desired_instances < min_instances: desired_instances = min_instances status = 'WARNING desired_instances is less than min_instances %d' % min_instances response_body = {'desired_instances': desired_instances, 'status': status} return Response(json_body=response_body, status_code=202)
def autoscale_marathon_instance(marathon_service_config, marathon_tasks, mesos_tasks): current_instances = marathon_service_config.get_instances() if len(marathon_tasks) != current_instances: write_to_log(config=marathon_service_config, line='Delaying scaling as marathon is either waiting for resources or is delayed') autoscaling_params = marathon_service_config.get_autoscaling_params() autoscaling_metrics_provider = get_autoscaling_metrics_provider(autoscaling_params.pop(METRICS_PROVIDER_KEY)) autoscaling_decision_policy = get_autoscaling_decision_policy(autoscaling_params.pop(DECISION_POLICY_KEY)) error = autoscaling_metrics_provider(marathon_service_config, marathon_tasks, mesos_tasks, **autoscaling_params) - autoscaling_params.pop('setpoint') write_to_log(config=marathon_service_config, line='Recieved error from metrics provider: %f' % error) autoscaling_direction = autoscaling_decision_policy(marathon_service_config, error, **autoscaling_params) if autoscaling_direction: autoscaling_amount = get_new_instance_count(current_instances, autoscaling_direction) instances = marathon_service_config.limit_instance_count(autoscaling_amount) if instances != current_instances: write_to_log(config=marathon_service_config, line='Scaling from %d to %d' % (current_instances, instances)) set_instances_for_marathon_service( service=marathon_service_config.service, instance=marathon_service_config.instance, instance_count=instances, )
def update_autoscaler_count(request): service = request.swagger_data.get("service") instance = request.swagger_data.get("instance") desired_instances = request.swagger_data.get("json_body")["desired_instances"] try: service_config = load_marathon_service_config( service=service, instance=instance, cluster=settings.cluster, soa_dir=settings.soa_dir, load_deployments=False, ) except Exception: error_message = "Unable to load service config for %s.%s" % (service, instance) raise ApiFailure(error_message, 404) max_instances = service_config.get_max_instances() if max_instances is None: error_message = "Autoscaling is not enabled for %s.%s" % (service, instance) raise ApiFailure(error_message, 404) min_instances = service_config.get_min_instances() # Dump whatever number from the client to zk. get_instances() will limit # readings from zk to [min_instances, max_instances]. set_instances_for_marathon_service(service=service, instance=instance, instance_count=desired_instances) status = "SUCCESS" if desired_instances > max_instances: desired_instances = max_instances status = "WARNING desired_instances is greater than max_instances %d" % max_instances elif desired_instances < min_instances: desired_instances = min_instances status = "WARNING desired_instances is less than min_instances %d" % min_instances response_body = {"desired_instances": desired_instances, "status": status} return Response(json_body=response_body, status_code=202)
def autoscale_marathon_instance( marathon_service_config: MarathonServiceConfig, system_paasta_config: SystemPaastaConfig, marathon_tasks: Sequence[MarathonTask], mesos_tasks: Sequence[Task], ) -> None: try: with create_autoscaling_lock(marathon_service_config.service, marathon_service_config.instance): current_instances = marathon_service_config.get_instances() task_data_insufficient = is_task_data_insufficient( marathon_service_config=marathon_service_config, marathon_tasks=marathon_tasks, current_instances=current_instances, ) autoscaling_params = marathon_service_config.get_autoscaling_params( ) log_utilization_data: Mapping = {} utilization = get_utilization( marathon_service_config=marathon_service_config, system_paasta_config=system_paasta_config, autoscaling_params=autoscaling_params, log_utilization_data=log_utilization_data, marathon_tasks=marathon_tasks, mesos_tasks=mesos_tasks, ) error = get_error_from_utilization( utilization=utilization, setpoint=autoscaling_params["setpoint"], current_instances=current_instances, ) num_healthy_instances = len(marathon_tasks) new_instance_count = get_new_instance_count( utilization=utilization, error=error, autoscaling_params=autoscaling_params, current_instances=current_instances, marathon_service_config=marathon_service_config, num_healthy_instances=num_healthy_instances, persist_data=(not task_data_insufficient), ) safe_downscaling_threshold = int(current_instances * 0.7) _record_autoscaling_decision( marathon_service_config=marathon_service_config, autoscaling_params=autoscaling_params, utilization=utilization, log_utilization_data=log_utilization_data, error=error, current_instances=current_instances, num_healthy_instances=num_healthy_instances, new_instance_count=new_instance_count, safe_downscaling_threshold=safe_downscaling_threshold, task_data_insufficient=task_data_insufficient, ) if new_instance_count != current_instances: if new_instance_count < current_instances and task_data_insufficient: write_to_log( config=marathon_service_config, line= "Delaying scaling *down* as we found too few healthy tasks running in marathon. " "This can happen because tasks are delayed/waiting/unhealthy or because we are " "waiting for tasks to be killed. Will wait for sufficient healthy tasks before " "we make a decision to scale down.", level="debug", ) return else: set_instances_for_marathon_service( service=marathon_service_config.service, instance=marathon_service_config.instance, instance_count=new_instance_count, ) write_to_log( config=marathon_service_config, line="Scaling from %d to %d instances (%s)" % ( current_instances, new_instance_count, humanize_error(error), ), level="event", ) else: write_to_log( config=marathon_service_config, line="Staying at %d instances (%s)" % (current_instances, humanize_error(error)), level="debug", ) except LockHeldException: log.warning( "Skipping autoscaling run for {service}.{instance} because the lock is held" .format( service=marathon_service_config.service, instance=marathon_service_config.instance, ))