Esempio n. 1
0
    def get_replication_for_instance(
        self, instance_config: LongRunningServiceConfig
    ) -> Dict[str, Dict[str, Dict[str, int]]]:
        """Returns the number of registered instances in each discoverable
        location for each service dicrovery provider.

        :param instance_config: An instance of MarathonServiceConfig.
        :returns: a dict {'service_discovery_provider': {'location_type': {'service.instance': int}}}
        """
        replication_infos = {}
        for provider in self._service_discovery_providers:
            replication_info = {}
            attribute_host_dict = self.get_allowed_locations_and_hosts(instance_config)
            instance_pool = instance_config.get_pool()
            for location, hosts in attribute_host_dict.items():
                # Try to get information from all available hosts in the pool before giving up
                hostnames = self.get_hostnames_in_pool(hosts, instance_pool)
                for hostname in hostnames:
                    try:
                        replication_info[location] = self._get_replication_info(
                            location, hostname, instance_config, provider
                        )
                        break
                    except Exception as e:
                        log.warn(
                            f"Error while getting replication info for {location} from {hostname}: {e}"
                        )
                        if hostname == hostnames[-1]:
                            # Last hostname failed, giving up
                            raise
            replication_infos[provider.NAME] = replication_info
        return replication_infos
Esempio n. 2
0
def emit_replication_metrics(
    replication_infos: Mapping[str, Mapping[str, Mapping[str, int]]],
    instance_config: LongRunningServiceConfig,
    expected_count: int,
) -> None:
    for provider, replication_info in replication_infos.items():
        meteorite_dims = {
            "paasta_service": instance_config.service,
            "paasta_cluster": instance_config.cluster,
            "paasta_instance": instance_config.instance,
            "paasta_pool": instance_config.get_pool(),
            "service_discovery_provider": provider,
        }

        num_available_backends = 0
        for available_backends in replication_info.values():
            num_available_backends += available_backends.get(
                instance_config.job_id, 0)
        available_backends_gauge = yelp_meteorite.create_gauge(
            "paasta.service.available_backends", meteorite_dims)
        available_backends_gauge.set(num_available_backends)

        critical_percentage = instance_config.get_replication_crit_percentage()
        num_critical_backends = critical_percentage * expected_count / 100.0
        critical_backends_gauge = yelp_meteorite.create_gauge(
            "paasta.service.critical_backends", meteorite_dims)
        critical_backends_gauge.set(num_critical_backends)

        expected_backends_gauge = yelp_meteorite.create_gauge(
            "paasta.service.expected_backends", meteorite_dims)
        expected_backends_gauge.set(expected_count)
Esempio n. 3
0
def smartstack_status(
    service: str,
    instance: str,
    job_config: LongRunningServiceConfig,
    service_namespace_config: ServiceNamespaceConfig,
    pods: Sequence[V1Pod],
    settings: Any,
    should_return_individual_backends: bool = False,
) -> Mapping[str, Any]:

    registration = job_config.get_registrations()[0]
    instance_pool = job_config.get_pool()

    smartstack_replication_checker = KubeSmartstackReplicationChecker(
        nodes=kubernetes_tools.get_all_nodes(settings.kubernetes_client),
        system_paasta_config=settings.system_paasta_config,
    )
    node_hostname_by_location = smartstack_replication_checker.get_allowed_locations_and_hosts(
        job_config
    )

    expected_smartstack_count = marathon_tools.get_expected_instance_count_for_namespace(
        service=service,
        namespace=instance,
        cluster=settings.cluster,
        instance_type_class=KubernetesDeploymentConfig,
    )
    expected_count_per_location = int(
        expected_smartstack_count / len(node_hostname_by_location)
    )
    smartstack_status: MutableMapping[str, Any] = {
        "registration": registration,
        "expected_backends_per_location": expected_count_per_location,
        "locations": [],
    }

    for location, hosts in node_hostname_by_location.items():
        synapse_host = smartstack_replication_checker.get_first_host_in_pool(
            hosts, instance_pool
        )
        sorted_backends = sorted(
            smartstack_tools.get_backends(
                registration,
                synapse_host=synapse_host,
                synapse_port=settings.system_paasta_config.get_synapse_port(),
                synapse_haproxy_url_format=settings.system_paasta_config.get_synapse_haproxy_url_format(),
            ),
            key=lambda backend: backend["status"],
            reverse=True,  # put 'UP' backends above 'MAINT' backends
        )

        matched_backends_and_pods = match_backends_and_pods(sorted_backends, pods)
        location_dict = smartstack_tools.build_smartstack_location_dict(
            location, matched_backends_and_pods, should_return_individual_backends
        )
        smartstack_status["locations"].append(location_dict)

    return smartstack_status
Esempio n. 4
0
    def get_replication_for_instance(
        self, instance_config: LongRunningServiceConfig
    ) -> Dict[str, Dict[str, int]]:
        """Returns the number of registered instances in each discoverable location.

        :param instance_config: An instance of MarathonServiceConfig.
        :returns: a dict {'location_type': {'service.instance': int}}
        """
        replication_info = {}
        attribute_host_dict = self.get_allowed_locations_and_hosts(
            instance_config)
        instance_pool = instance_config.get_pool()
        for location, hosts in attribute_host_dict.items():
            hostname = self.get_first_host_in_pool(hosts, instance_pool)
            replication_info[location] = self._get_replication_info(
                location, hostname, instance_config)
        return replication_info
Esempio n. 5
0
def mesh_status(
    service: str,
    service_mesh: ServiceMesh,
    instance: str,
    job_config: LongRunningServiceConfig,
    service_namespace_config: ServiceNamespaceConfig,
    pods: Sequence[V1Pod],
    settings: Any,
    should_return_individual_backends: bool = False,
) -> Mapping[str, Any]:

    registration = job_config.get_registrations()[0]
    instance_pool = job_config.get_pool()

    replication_checker = KubeSmartstackEnvoyReplicationChecker(
        nodes=kubernetes_tools.get_all_nodes(settings.kubernetes_client),
        system_paasta_config=settings.system_paasta_config,
    )
    node_hostname_by_location = replication_checker.get_allowed_locations_and_hosts(
        job_config)

    expected_smartstack_count = marathon_tools.get_expected_instance_count_for_namespace(
        service=service,
        namespace=job_config.get_nerve_namespace(),
        cluster=settings.cluster,
        instance_type_class=KubernetesDeploymentConfig,
    )
    expected_count_per_location = int(expected_smartstack_count /
                                      len(node_hostname_by_location))
    mesh_status: MutableMapping[str, Any] = {
        "registration": registration,
        "expected_backends_per_location": expected_count_per_location,
        "locations": [],
    }

    for location, hosts in node_hostname_by_location.items():
        host = replication_checker.get_first_host_in_pool(hosts, instance_pool)
        if service_mesh == ServiceMesh.SMARTSTACK:
            mesh_status["locations"].append(
                _build_smartstack_location_dict(
                    synapse_host=host,
                    synapse_port=settings.system_paasta_config.
                    get_synapse_port(),
                    synapse_haproxy_url_format=settings.system_paasta_config.
                    get_synapse_haproxy_url_format(),
                    registration=registration,
                    pods=pods,
                    location=location,
                    should_return_individual_backends=
                    should_return_individual_backends,
                ))
        elif service_mesh == ServiceMesh.ENVOY:
            mesh_status["locations"].append(
                _build_envoy_location_dict(
                    envoy_host=host,
                    envoy_admin_port=settings.system_paasta_config.
                    get_envoy_admin_port(),
                    envoy_admin_endpoint_format=settings.system_paasta_config.
                    get_envoy_admin_endpoint_format(),
                    registration=registration,
                    pods=pods,
                    location=location,
                    should_return_individual_backends=
                    should_return_individual_backends,
                ))
    return mesh_status