def format_custom_resource(
    instance_config: Mapping[str, Any],
    service: str,
    instance: str,
    cluster: str,
    kind: str,
    version: str,
    group: str,
    namespace: str,
) -> Mapping[str, Any]:
    sanitised_service = sanitise_kubernetes_name(service)
    sanitised_instance = sanitise_kubernetes_name(instance)
    resource: Mapping[str, Any] = {
        "apiVersion": f"{group}/{version}",
        "kind": kind,
        "metadata": {
            "name": f"{sanitised_service}-{sanitised_instance}",
            "namespace": namespace,
            "labels": {
                "yelp.com/paasta_service": service,
                "yelp.com/paasta_instance": instance,
                "yelp.com/paasta_cluster": cluster,
            },
            "annotations": {
                "yelp.com/desired_state": "running"
            },
        },
        "spec": instance_config,
    }
    config_hash = get_config_hash(instance_config)
    resource["metadata"]["labels"]["yelp.com/paasta_config_sha"] = config_hash
    return resource
Example #2
0
 def get_secret_env(self) -> Mapping[str, dict]:
     base_env = self.config_dict.get("env", {})
     secret_env = {}
     for k, v in base_env.items():
         if is_secret_ref(v):
             secret = get_secret_name_from_ref(v)
             sanitised_secret = sanitise_kubernetes_name(secret)
             service = (
                 self.service if not is_shared_secret(v) else SHARED_SECRET_SERVICE
             )
             sanitised_service = sanitise_kubernetes_name(service)
             secret_env[k] = {
                 "secret_name": f"tron-secret-{sanitised_service}-{sanitised_secret}",
                 "key": secret,
             }
     return secret_env
def format_custom_resource(
    instance_config: Mapping[str, Any],
    service: str,
    instance: str,
    cluster: str,
    kind: str,
    version: str,
    group: str,
    namespace: str,
    git_sha: str,
) -> Mapping[str, Any]:
    sanitised_service = sanitise_kubernetes_name(service)
    sanitised_instance = sanitise_kubernetes_name(instance)
    resource: Mapping[str, Any] = {
        "apiVersion": f"{group}/{version}",
        "kind": kind,
        "metadata": {
            "name": f"{sanitised_service}-{sanitised_instance}",
            "namespace": namespace,
            "labels": {
                "yelp.com/paasta_service": service,
                "yelp.com/paasta_instance": instance,
                "yelp.com/paasta_cluster": cluster,
                paasta_prefixed("service"): service,
                paasta_prefixed("instance"): instance,
                paasta_prefixed("cluster"): cluster,
            },
            "annotations": {},
        },
        "spec": instance_config,
    }

    url = get_dashboard_url(kind, service, instance, cluster)
    if url:
        resource["metadata"]["annotations"]["yelp.com/dashboard_url"] = url
        resource["metadata"]["annotations"][paasta_prefixed(
            "dashboard_url")] = url

    config_hash = get_config_hash(resource)

    resource["metadata"]["annotations"]["yelp.com/desired_state"] = "running"
    resource["metadata"]["annotations"][paasta_prefixed(
        "desired_state")] = "running"
    resource["metadata"]["labels"]["yelp.com/paasta_config_sha"] = config_hash
    resource["metadata"]["labels"][paasta_prefixed("config_sha")] = config_hash
    resource["metadata"]["labels"][paasta_prefixed("git_sha")] = git_sha
    return resource
def get_container_type(container_name: str, instance_name: str) -> str:
    """
    To differentiate between main service containers and sidecars
    """
    if instance_name and container_name == kubernetes_tools.sanitise_kubernetes_name(
            instance_name):
        return MAIN_CONTAINER_TYPE
    else:
        return container_name
Example #5
0
def validate_service_name(service):
    if len(sanitise_kubernetes_name(service)) > 63:
        paasta_print(
            failure(
                f"Length of service name {service} should be no more than 63."
                + " Note _ is replaced with - due to Kubernetes restriction",
                "http://paasta.readthedocs.io/en/latest/yelpsoa_configs.html",
            ))
        return False
    return True
Example #6
0
def validate_instance_names(config_file_object, file_path):
    errors = []
    for instance_name in config_file_object:
        if (not instance_name.startswith("_")
                and len(sanitise_kubernetes_name(instance_name)) > 63):
            errors.append(instance_name)
    if errors:
        error_string = "\n".join(errors)
        paasta_print(
            failure(
                f"Length of instance name \n{error_string}\n should be no more than 63."
                + " Note _ is replaced with -- due to Kubernetes restriction",
                "http://paasta.readthedocs.io/en/latest/yelpsoa_configs.html",
            ))
    return len(errors) == 0
Example #7
0
def get_app_name(service: str, instance: str):
    return sanitise_kubernetes_name(f"{service}-{instance}")
def create_instance_cpu_scaling_rule(
    service: str,
    instance: str,
    autoscaling_config: AutoscalingParamsDict,
    paasta_cluster: str,
) -> PrometheusAdapterRule:
    """
    Creates a Prometheus adapter rule config for a given service instance.
    """
    deployment_name = get_kubernetes_app_name(service=service,
                                              instance=instance)
    sanitized_instance_name = sanitise_kubernetes_name(instance)
    metric_name = f"{deployment_name}-cpu-prom"
    moving_average_window = autoscaling_config.get(
        "moving_average_window_seconds",
        DEFAULT_CPU_AUTOSCALING_MOVING_AVERAGE_WINDOW)

    # this series query is a bit of a hack: we don't use the Prometheus adapter as expected (i.e., very generic rules)
    # but we still need to give it a query that returns something even though we're not going to use the series/label
    # templates that are auto-extracted for us. That said: we still need this query to return labels that can be tied
    # back to k8s objects WITHOUT using label_replace
    series_query = f"""
        kube_deployment_labels{{
            deployment='{deployment_name}',
            paasta_cluster='{paasta_cluster}',
            namespace='paasta'
        }}
    """

    cpu_usage = f"""
        avg(
            irate(
                container_cpu_usage_seconds_total{{
                    namespace='paasta',
                    container='{sanitized_instance_name}',
                    paasta_cluster='{paasta_cluster}'
                }}[1m]
            )
        ) by (pod, container)
    """

    cpus_available = f"""
        sum(
            container_spec_cpu_quota{{
                namespace='paasta',
                container='{sanitized_instance_name}',
                paasta_cluster='{paasta_cluster}'
            }}
            / container_spec_cpu_period{{
                namespace='paasta',
                paasta_cluster='{paasta_cluster}'
            }}
        ) by (pod, container)
    """

    # NOTE: we only have Pod names in our container_cpu* metrics, but we can't get a
    # Deployment from those consistenly due to k8s limitations on certain field lengths
    # - thus we need to extract this information from the ReplicaSet name (which is made
    # possible by the fact that our ReplicaSets are named
    # {{deployment}}-{{10 character hex string}}) so that our query only considers the
    # service that we want to autoscale - without this we're only filtering by instance
    # name and these are very much not unique
    # k8s:pod:info is an internal recording rule that joins kube_pod_info with
    # kube_pod_status_phase
    pod_info_join = f"""
        on (pod) group_left(kube_deployment) label_replace(
            k8s:pod:info{{
                created_by_name=~'{deployment_name}.*',
                created_by_kind='ReplicaSet',
                namespace='paasta',
                paasta_cluster='{paasta_cluster}',
                phase='Running'
            }},
            'kube_deployment',
            '$1',
            'created_by_name',
            '(.+)-[a-f0-9]{{10}}'
        )
    """

    # get the total usage of all of our Pods divided by the number of CPUs available to
    # those Pods (i.e., the k8s CPU limit) in order to get the % of CPU used and then add
    # some labels to this vector
    load = f"""
        sum(
            (({cpu_usage}) / ({cpus_available})) * {pod_info_join}
        ) by (kube_deployment)
    """

    current_replicas = f"""
        (
            scalar(
                kube_deployment_spec_replicas{{paasta_cluster='{paasta_cluster}',deployment='{deployment_name}'}} >= 0
                or
                max_over_time(
                    kube_deployment_spec_replicas{{paasta_cluster='{paasta_cluster}',deployment='{deployment_name}'}}[{DEFAULT_EXTRAPOLATION_TIME}s]
                )
            )
        )
    """

    # we want to calculate:
    # * the desired replicas based on instantaneous load,
    # * smooth that over time,
    # * and then divide by the non-smoothed current number of replicas.
    # otherwise, if we do the naive thing and take the average of the load inside avg_over_time,
    # then we'll see the oscillations that we fixed in PR #2862
    moving_average_load = f"""
        avg_over_time(({load})[{moving_average_window}s:]) / {current_replicas}
    """

    # for some reason, during bounces we lose the labels from the previous timeseries (and thus end up with two
    # timeseries), so we avg these to merge them together
    # NOTE: we multiply by 100 to return a number between [0, 100] to the HPA
    moving_average_load_percent = f"avg({moving_average_load}) * 100"

    # we need to do some somwhat hacky label_replaces to inject labels that will then be used for association
    # without these, the adapter doesn't know what deployment to associate the query result with
    # NOTE: these labels MUST match the equivalent ones in the seriesQuery
    metrics_query = f"""
        label_replace(
            label_replace(
                {moving_average_load_percent},
                'deployment',
                '{deployment_name}',
                '',
                ''
            ),
            'namespace',
            'paasta',
            '',
            ''
        )
    """

    return {
        "name": {
            "as": metric_name
        },
        "seriesQuery": _minify_promql(series_query),
        "metricsQuery": _minify_promql(metrics_query),
        "resources": {
            "overrides": {
                "namespace": {
                    "resource": "namespace"
                },
                "deployment": {
                    "group": "apps",
                    "resource": "deployments"
                },
            },
        },
    }
Example #9
0
 def get_sanitised_instance_name(self) -> str:
     return sanitise_kubernetes_name(self.get_instance())
Example #10
0
 def get_sanitised_service_name(self) -> str:
     return sanitise_kubernetes_name(self.get_service())
Example #11
0
def reconcile_kubernetes_resource(
    kube_client: KubeClient,
    service: str,
    instance_configs: Mapping[str, Any],
    custom_resources: Sequence[KubeCustomResource],
    kind: KubeKind,
    version: str,
    group: str,
    cluster: str,
    instance: str = None,
) -> bool:

    results = []
    for inst, config in instance_configs.items():
        if instance is not None and instance != inst:
            continue
        formatted_resource = format_custom_resource(
            instance_config=config,
            service=service,
            instance=inst,
            cluster=cluster,
            kind=kind.singular,
            version=version,
            group=group,
            namespace=f"paasta-{kind.plural}",
        )
        desired_resource = KubeCustomResource(
            service=service,
            instance=inst,
            config_sha=formatted_resource["metadata"]["labels"]
            ["paasta.yelp.com/config_sha"],
            kind=kind.singular,
            name=formatted_resource["metadata"]["name"],
            namespace=f"paasta-{kind.plural}",
        )

        try:
            if not (service, inst, kind.singular) in [
                (c.service, c.instance, c.kind) for c in custom_resources
            ]:
                log.info(f"{desired_resource} does not exist so creating")
                create_custom_resource(
                    kube_client=kube_client,
                    version=version,
                    kind=kind,
                    formatted_resource=formatted_resource,
                    group=group,
                )
            elif desired_resource not in custom_resources:
                sanitised_service = sanitise_kubernetes_name(service)
                sanitised_instance = sanitise_kubernetes_name(inst)
                log.info(
                    f"{desired_resource} exists but config_sha doesn't match")
                update_custom_resource(
                    kube_client=kube_client,
                    name=f"{sanitised_service}-{sanitised_instance}",
                    version=version,
                    kind=kind,
                    formatted_resource=formatted_resource,
                    group=group,
                )
            else:
                log.info(f"{desired_resource} is up to date, no action taken")
        except Exception as e:
            log.error(str(e))
            results.append(False)
        results.append(True)
    return all(results) if results else True
Example #12
0
def sanitised_name(service: str, instance: str) -> str:
    sanitised_service = sanitise_kubernetes_name(service)
    sanitised_instance = sanitise_kubernetes_name(instance)
    return f"{sanitised_service}-{sanitised_instance}"
Example #13
0
def sanitise_kubernetes_service_name(name: str) -> str:
    return limit_size_with_hash(
        sanitise_kubernetes_name(name).replace(".", "---"))
def reconcile_kubernetes_resource(
    kube_client: KubeClient,
    service: str,
    instance_configs: Mapping[str, Any],
    custom_resources: Sequence[KubeCustomResource],
    kind: KubeKind,
    version: str,
    group: str,
    crd: CustomResourceDefinition,
    cluster: str,
    instance: str = None,
) -> bool:
    succeeded = True
    config_handler = LONG_RUNNING_INSTANCE_TYPE_HANDLERS[crd.file_prefix]
    for inst, config in instance_configs.items():
        if instance is not None and instance != inst:
            continue
        try:
            soa_config = config_handler.loader(
                service=service,
                instance=inst,
                cluster=cluster,
                load_deployments=True,
                soa_dir=DEFAULT_SOA_DIR,
            )
            git_sha = get_git_sha_from_dockerurl(soa_config.get_docker_url(),
                                                 long=True)
            formatted_resource = format_custom_resource(
                instance_config=config,
                service=service,
                instance=inst,
                cluster=cluster,
                kind=kind.singular,
                version=version,
                group=group,
                namespace=f"paasta-{kind.plural}",
                git_sha=git_sha,
            )
            desired_resource = KubeCustomResource(
                service=service,
                instance=inst,
                config_sha=formatted_resource["metadata"]["labels"][
                    paasta_prefixed("config_sha")],
                git_sha=formatted_resource["metadata"]["labels"].get(
                    paasta_prefixed("git_sha")),
                kind=kind.singular,
                name=formatted_resource["metadata"]["name"],
                namespace=f"paasta-{kind.plural}",
            )
            if not (service, inst, kind.singular) in [
                (c.service, c.instance, c.kind) for c in custom_resources
            ]:
                log.info(f"{desired_resource} does not exist so creating")
                create_custom_resource(
                    kube_client=kube_client,
                    version=version,
                    kind=kind,
                    formatted_resource=formatted_resource,
                    group=group,
                )
            elif desired_resource not in custom_resources:
                sanitised_service = sanitise_kubernetes_name(service)
                sanitised_instance = sanitise_kubernetes_name(inst)
                log.info(
                    f"{desired_resource} exists but config_sha doesn't match")
                update_custom_resource(
                    kube_client=kube_client,
                    name=f"{sanitised_service}-{sanitised_instance}",
                    version=version,
                    kind=kind,
                    formatted_resource=formatted_resource,
                    group=group,
                )
            else:
                log.info(f"{desired_resource} is up to date, no action taken")
        except Exception as e:
            log.error(str(e))
            succeeded = False
    return succeeded