Ejemplo n.º 1
0
def emit_replication_metrics(
    replication_infos: Mapping[str, Mapping[str, Mapping[str, int]]],
    instance_config: LongRunningServiceConfig,
    expected_count: int,
) -> None:
    for provider, replication_info in replication_infos.items():
        meteorite_dims = {
            "paasta_service": instance_config.service,
            "paasta_cluster": instance_config.cluster,
            "paasta_instance": instance_config.instance,
            "paasta_pool": instance_config.get_pool(),
            "service_discovery_provider": provider,
        }

        num_available_backends = 0
        for available_backends in replication_info.values():
            num_available_backends += available_backends.get(
                instance_config.job_id, 0)
        available_backends_gauge = yelp_meteorite.create_gauge(
            "paasta.service.available_backends", meteorite_dims)
        available_backends_gauge.set(num_available_backends)

        critical_percentage = instance_config.get_replication_crit_percentage()
        num_critical_backends = critical_percentage * expected_count / 100.0
        critical_backends_gauge = yelp_meteorite.create_gauge(
            "paasta.service.critical_backends", meteorite_dims)
        critical_backends_gauge.set(num_critical_backends)

        expected_backends_gauge = yelp_meteorite.create_gauge(
            "paasta.service.expected_backends", meteorite_dims)
        expected_backends_gauge.set(expected_count)
def emit_metrics_for_type(instance_type):
    cluster = load_system_paasta_config().get_cluster()
    instances = get_services_for_cluster(cluster=cluster, instance_type=instance_type)

    for service, instance in instances:
        service_instance_config = get_instance_config(
            service=service, instance=instance, cluster=cluster
        )
        dimensions = {
            "paasta_service": service_instance_config.service,
            "paasta_cluster": service_instance_config.cluster,
            "paasta_instance": service_instance_config.instance,
            "paasta_pool": service_instance_config.get_pool(),
        }

        log.info(f"Emitting paasta.service.* with dimensions {dimensions}")
        gauge = yelp_meteorite.create_gauge("paasta.service.cpus", dimensions)
        gauge.set(service_instance_config.get_cpus())
        gauge = yelp_meteorite.create_gauge("paasta.service.mem", dimensions)
        gauge.set(service_instance_config.get_mem())
        gauge = yelp_meteorite.create_gauge("paasta.service.disk", dimensions)
        gauge.set(service_instance_config.get_disk())
        if hasattr(service_instance_config, "get_instances"):
            if service_instance_config.get_max_instances() is None:
                gauge = yelp_meteorite.create_gauge(
                    "paasta.service.instances", dimensions
                )
                gauge.set(service_instance_config.get_instances())
Ejemplo n.º 3
0
def emit_metrics_for_type(instance_type):
    cluster = load_system_paasta_config().get_cluster()
    instances = get_services_for_cluster(
        cluster=cluster,
        instance_type=instance_type,
    )

    for service, instance in instances:
        service_instance_config = get_instance_config(
            service=service,
            instance=instance,
            cluster=cluster,
        )
        dimensions = {
            'paasta_service': service_instance_config.service,
            'paasta_cluster': service_instance_config.cluster,
            'paasta_instance': service_instance_config.instance,
        }

        log.info(f"Emitting paasta.service.* with dimensions {dimensions}")
        gauge = yelp_meteorite.create_gauge('paasta.service.cpus', dimensions)
        gauge.set(service_instance_config.get_cpus())
        gauge = yelp_meteorite.create_gauge('paasta.service.mem', dimensions)
        gauge.set(service_instance_config.get_mem())
        gauge = yelp_meteorite.create_gauge('paasta.service.disk', dimensions)
        gauge.set(service_instance_config.get_disk())
        if hasattr(service_instance_config, 'get_instances'):
            if service_instance_config.get_max_instances() is None:
                gauge = yelp_meteorite.create_gauge('paasta.service.instances', dimensions)
                gauge.set(service_instance_config.get_instances())
def _record_autoscaling_decision(
    marathon_service_config: MarathonServiceConfig,
    autoscaling_params: AutoscalingParamsDict,
    utilization: float,
    log_utilization_data: Mapping[str, str],
    error: float,
    current_instances: int,
    num_healthy_instances: int,
    new_instance_count: int,
    safe_downscaling_threshold: int,
    task_data_insufficient: bool,
) -> None:
    """
    Based on the calculations made, perform observability side effects.
    Log messages, generate time series, send any alerts, etc.
    """
    write_to_log(
        config=marathon_service_config,
        line=json.dumps(
            dict(
                timestamp=time.time(),
                paasta_cluster=marathon_service_config.get_cluster(),
                paasta_service=marathon_service_config.get_service(),
                paasta_instance=marathon_service_config.get_instance(),
                autoscaling_params=autoscaling_params,
                utilization=utilization,
                error=error,
                current_instances=current_instances,
                num_healthy_instances=num_healthy_instances,
                new_instance_count=new_instance_count,
                safe_downscaling_threshold=safe_downscaling_threshold,
                task_data_insufficient=task_data_insufficient,
            )
        ),
        level="debug",
    )
    meteorite_dims = {
        "paasta_service": marathon_service_config.service,
        "paasta_cluster": marathon_service_config.cluster,
        "paasta_instance": marathon_service_config.instance,
        "paasta_pool": marathon_service_config.get_pool(),
        "decision_policy": autoscaling_params[DECISION_POLICY_KEY],  # type: ignore
    }
    if yelp_meteorite:
        gauge = yelp_meteorite.create_gauge("paasta.service.instances", meteorite_dims)
        gauge.set(new_instance_count)
        gauge = yelp_meteorite.create_gauge(
            "paasta.service.max_instances", meteorite_dims
        )
        gauge.set(marathon_service_config.get_max_instances())
        gauge = yelp_meteorite.create_gauge(
            "paasta.service.min_instances", meteorite_dims
        )
        gauge.set(marathon_service_config.get_min_instances())
Ejemplo n.º 5
0
    def __init__(self, stat_gauge_name, **kwargs):
        self.dimensions = kwargs

        self._meteorite_gauge = yelp_meteorite.create_gauge(
            stat_gauge_name,
            self.dimensions
        )
def emit_cluster_replication_metrics(
    pct_under_replicated: float,
    cluster: str,
    scheduler: str,
) -> None:
    meteorite_dims = {"paasta_cluster": cluster, "scheduler": scheduler}
    gauge = yelp_meteorite.create_gauge("paasta.pct_services_under_replicated",
                                        meteorite_dims)
    gauge.set(pct_under_replicated)
def emit_cluster_replication_metrics(
    pct_under_replicated: float,
    cluster: str,
    scheduler: str,
    dry_run: bool = False,
) -> None:
    metric_name = "paasta.pct_services_under_replicated"
    if dry_run:
        print(
            f"Would've sent value {pct_under_replicated} for metric '{metric_name}'"
        )
    else:
        meteorite_dims = {"paasta_cluster": cluster, "scheduler": scheduler}
        gauge = yelp_meteorite.create_gauge(metric_name, meteorite_dims)
        gauge.set(pct_under_replicated)
def report_metric_to_meteorite(backend, metric, value, paasta_cluster):
    try:
        paasta_service, paasta_instance = parse_haproxy_backend_name(backend)
    except IndexError:
        return

    meteorite_dims = {
        'paasta_service': paasta_service,
        'paasta_cluster': paasta_cluster,
        'paasta_instance': paasta_instance,
    }
    path = f'paasta.service.requests.{metric}'
    if metric in GUAGES:
        guage = yelp_meteorite.create_gauge(path, meteorite_dims)
        guage.set(value)
    elif metric in COUNTERS:
        counter = yelp_meteorite.create_counter(path, meteorite_dims)
        counter.count(value)
    else:
        raise ValueError(
            f"{metric} hasn't been configured as a guage or counter")
    print(f"Sent {path}: {value} to meteorite")
Ejemplo n.º 9
0
 def create_gauge(self, name: str, **kwargs: Any) -> GaugeProtocol:
     return yelp_meteorite.create_gauge(self.base_name + "." + name, kwargs)
Ejemplo n.º 10
0
def autoscale_marathon_instance(marathon_service_config, system_paasta_config,
                                marathon_tasks, mesos_tasks):
    current_instances = marathon_service_config.get_instances()
    task_data_insufficient = is_task_data_insufficient(marathon_service_config,
                                                       marathon_tasks,
                                                       current_instances)
    autoscaling_params = marathon_service_config.get_autoscaling_params()
    log_utilization_data = {}
    utilization = get_utilization(
        marathon_service_config=marathon_service_config,
        system_paasta_config=system_paasta_config,
        autoscaling_params=autoscaling_params,
        log_utilization_data=log_utilization_data,
        marathon_tasks=marathon_tasks,
        mesos_tasks=mesos_tasks,
    )
    error = get_error_from_utilization(
        utilization=utilization,
        setpoint=autoscaling_params['setpoint'],
        current_instances=current_instances,
    )
    new_instance_count = get_new_instance_count(
        utilization=utilization,
        error=error,
        autoscaling_params=autoscaling_params,
        current_instances=current_instances,
        marathon_service_config=marathon_service_config,
        num_healthy_instances=len(marathon_tasks),
    )

    safe_downscaling_threshold = int(current_instances * 0.7)
    if new_instance_count != current_instances:
        if new_instance_count < current_instances and task_data_insufficient:
            write_to_log(
                config=marathon_service_config,
                line=
                'Delaying scaling *down* as we found too few healthy tasks running in marathon. '
                'This can happen because tasks are delayed/waiting/unhealthy or because we are '
                'waiting for tasks to be killed. Will wait for sufficient healthy tasks before '
                'we make a decision to scale down.',
            )
            return
        if new_instance_count == safe_downscaling_threshold:
            write_to_log(
                config=marathon_service_config,
                line='Autoscaler clamped: %s' % str(log_utilization_data),
                level='debug',
            )

        write_to_log(
            config=marathon_service_config,
            line='Scaling from %d to %d instances (%s)' % (
                current_instances,
                new_instance_count,
                humanize_error(error),
            ),
        )
        set_instances_for_marathon_service(
            service=marathon_service_config.service,
            instance=marathon_service_config.instance,
            instance_count=new_instance_count,
        )
    else:
        write_to_log(
            config=marathon_service_config,
            line='Staying at %d instances (%s)' %
            (current_instances, humanize_error(error)),
            level='debug',
        )
    meteorite_dims = {
        'service_name': marathon_service_config.service,
        'decision_policy': autoscaling_params[DECISION_POLICY_KEY],
        'paasta_cluster': marathon_service_config.cluster,
        'instance_name': marathon_service_config.instance,
    }
    if yelp_meteorite:
        gauge = yelp_meteorite.create_gauge('paasta.service.instances',
                                            meteorite_dims)
        gauge.set(new_instance_count)
        gauge = yelp_meteorite.create_gauge('paasta.service.max_instances',
                                            meteorite_dims)
        gauge.set(marathon_service_config.get_max_instances())
        gauge = yelp_meteorite.create_gauge('paasta.service.min_instances',
                                            meteorite_dims)
        gauge.set(marathon_service_config.get_min_instances())
Ejemplo n.º 11
0
 def create_gauge(name: str, *args: Any, **kwargs: Any) -> GaugeProtocol:
     return yelp_meteorite.create_gauge(name, *args, **kwargs)
Ejemplo n.º 12
0
 def create_gauge(self, name, **kwargs):
     return yelp_meteorite.create_gauge('paasta.deployd.{}'.format(name),
                                        kwargs)
Ejemplo n.º 13
0
    def __init__(self, stat_gauge_name, **kwargs):
        self.dimensions = kwargs

        self._meteorite_gauge = yelp_meteorite.create_gauge(
            stat_gauge_name, self.dimensions)