def get_replication_for_instance( self, instance_config: LongRunningServiceConfig ) -> Dict[str, Dict[str, Dict[str, int]]]: """Returns the number of registered instances in each discoverable location for each service dicrovery provider. :param instance_config: An instance of MarathonServiceConfig. :returns: a dict {'service_discovery_provider': {'location_type': {'service.instance': int}}} """ replication_infos = {} for provider in self._service_discovery_providers: replication_info = {} attribute_host_dict = self.get_allowed_locations_and_hosts(instance_config) instance_pool = instance_config.get_pool() for location, hosts in attribute_host_dict.items(): # Try to get information from all available hosts in the pool before giving up hostnames = self.get_hostnames_in_pool(hosts, instance_pool) for hostname in hostnames: try: replication_info[location] = self._get_replication_info( location, hostname, instance_config, provider ) break except Exception as e: log.warn( f"Error while getting replication info for {location} from {hostname}: {e}" ) if hostname == hostnames[-1]: # Last hostname failed, giving up raise replication_infos[provider.NAME] = replication_info return replication_infos
def emit_replication_metrics( replication_infos: Mapping[str, Mapping[str, Mapping[str, int]]], instance_config: LongRunningServiceConfig, expected_count: int, ) -> None: for provider, replication_info in replication_infos.items(): meteorite_dims = { "paasta_service": instance_config.service, "paasta_cluster": instance_config.cluster, "paasta_instance": instance_config.instance, "paasta_pool": instance_config.get_pool(), "service_discovery_provider": provider, } num_available_backends = 0 for available_backends in replication_info.values(): num_available_backends += available_backends.get( instance_config.job_id, 0) available_backends_gauge = yelp_meteorite.create_gauge( "paasta.service.available_backends", meteorite_dims) available_backends_gauge.set(num_available_backends) critical_percentage = instance_config.get_replication_crit_percentage() num_critical_backends = critical_percentage * expected_count / 100.0 critical_backends_gauge = yelp_meteorite.create_gauge( "paasta.service.critical_backends", meteorite_dims) critical_backends_gauge.set(num_critical_backends) expected_backends_gauge = yelp_meteorite.create_gauge( "paasta.service.expected_backends", meteorite_dims) expected_backends_gauge.set(expected_count)
def smartstack_status( service: str, instance: str, job_config: LongRunningServiceConfig, service_namespace_config: ServiceNamespaceConfig, pods: Sequence[V1Pod], settings: Any, should_return_individual_backends: bool = False, ) -> Mapping[str, Any]: registration = job_config.get_registrations()[0] instance_pool = job_config.get_pool() smartstack_replication_checker = KubeSmartstackReplicationChecker( nodes=kubernetes_tools.get_all_nodes(settings.kubernetes_client), system_paasta_config=settings.system_paasta_config, ) node_hostname_by_location = smartstack_replication_checker.get_allowed_locations_and_hosts( job_config ) expected_smartstack_count = marathon_tools.get_expected_instance_count_for_namespace( service=service, namespace=instance, cluster=settings.cluster, instance_type_class=KubernetesDeploymentConfig, ) expected_count_per_location = int( expected_smartstack_count / len(node_hostname_by_location) ) smartstack_status: MutableMapping[str, Any] = { "registration": registration, "expected_backends_per_location": expected_count_per_location, "locations": [], } for location, hosts in node_hostname_by_location.items(): synapse_host = smartstack_replication_checker.get_first_host_in_pool( hosts, instance_pool ) sorted_backends = sorted( smartstack_tools.get_backends( registration, synapse_host=synapse_host, synapse_port=settings.system_paasta_config.get_synapse_port(), synapse_haproxy_url_format=settings.system_paasta_config.get_synapse_haproxy_url_format(), ), key=lambda backend: backend["status"], reverse=True, # put 'UP' backends above 'MAINT' backends ) matched_backends_and_pods = match_backends_and_pods(sorted_backends, pods) location_dict = smartstack_tools.build_smartstack_location_dict( location, matched_backends_and_pods, should_return_individual_backends ) smartstack_status["locations"].append(location_dict) return smartstack_status
def get_replication_for_instance( self, instance_config: LongRunningServiceConfig ) -> Dict[str, Dict[str, int]]: """Returns the number of registered instances in each discoverable location. :param instance_config: An instance of MarathonServiceConfig. :returns: a dict {'location_type': {'service.instance': int}} """ replication_info = {} attribute_host_dict = self.get_allowed_locations_and_hosts( instance_config) instance_pool = instance_config.get_pool() for location, hosts in attribute_host_dict.items(): hostname = self.get_first_host_in_pool(hosts, instance_pool) replication_info[location] = self._get_replication_info( location, hostname, instance_config) return replication_info
def mesh_status( service: str, service_mesh: ServiceMesh, instance: str, job_config: LongRunningServiceConfig, service_namespace_config: ServiceNamespaceConfig, pods: Sequence[V1Pod], settings: Any, should_return_individual_backends: bool = False, ) -> Mapping[str, Any]: registration = job_config.get_registrations()[0] instance_pool = job_config.get_pool() replication_checker = KubeSmartstackEnvoyReplicationChecker( nodes=kubernetes_tools.get_all_nodes(settings.kubernetes_client), system_paasta_config=settings.system_paasta_config, ) node_hostname_by_location = replication_checker.get_allowed_locations_and_hosts( job_config) expected_smartstack_count = marathon_tools.get_expected_instance_count_for_namespace( service=service, namespace=job_config.get_nerve_namespace(), cluster=settings.cluster, instance_type_class=KubernetesDeploymentConfig, ) expected_count_per_location = int(expected_smartstack_count / len(node_hostname_by_location)) mesh_status: MutableMapping[str, Any] = { "registration": registration, "expected_backends_per_location": expected_count_per_location, "locations": [], } for location, hosts in node_hostname_by_location.items(): host = replication_checker.get_first_host_in_pool(hosts, instance_pool) if service_mesh == ServiceMesh.SMARTSTACK: mesh_status["locations"].append( _build_smartstack_location_dict( synapse_host=host, synapse_port=settings.system_paasta_config. get_synapse_port(), synapse_haproxy_url_format=settings.system_paasta_config. get_synapse_haproxy_url_format(), registration=registration, pods=pods, location=location, should_return_individual_backends= should_return_individual_backends, )) elif service_mesh == ServiceMesh.ENVOY: mesh_status["locations"].append( _build_envoy_location_dict( envoy_host=host, envoy_admin_port=settings.system_paasta_config. get_envoy_admin_port(), envoy_admin_endpoint_format=settings.system_paasta_config. get_envoy_admin_endpoint_format(), registration=registration, pods=pods, location=location, should_return_individual_backends= should_return_individual_backends, )) return mesh_status