Ejemplo n.º 1
0
def smartstack_status(
    service: str,
    instance: str,
    job_config: LongRunningServiceConfig,
    service_namespace_config: ServiceNamespaceConfig,
    pods: Sequence[V1Pod],
    settings: Any,
    should_return_individual_backends: bool = False,
) -> Mapping[str, Any]:

    registration = job_config.get_registrations()[0]
    instance_pool = job_config.get_pool()

    smartstack_replication_checker = KubeSmartstackReplicationChecker(
        nodes=kubernetes_tools.get_all_nodes(settings.kubernetes_client),
        system_paasta_config=settings.system_paasta_config,
    )
    node_hostname_by_location = smartstack_replication_checker.get_allowed_locations_and_hosts(
        job_config
    )

    expected_smartstack_count = marathon_tools.get_expected_instance_count_for_namespace(
        service=service,
        namespace=instance,
        cluster=settings.cluster,
        instance_type_class=KubernetesDeploymentConfig,
    )
    expected_count_per_location = int(
        expected_smartstack_count / len(node_hostname_by_location)
    )
    smartstack_status: MutableMapping[str, Any] = {
        "registration": registration,
        "expected_backends_per_location": expected_count_per_location,
        "locations": [],
    }

    for location, hosts in node_hostname_by_location.items():
        synapse_host = smartstack_replication_checker.get_first_host_in_pool(
            hosts, instance_pool
        )
        sorted_backends = sorted(
            smartstack_tools.get_backends(
                registration,
                synapse_host=synapse_host,
                synapse_port=settings.system_paasta_config.get_synapse_port(),
                synapse_haproxy_url_format=settings.system_paasta_config.get_synapse_haproxy_url_format(),
            ),
            key=lambda backend: backend["status"],
            reverse=True,  # put 'UP' backends above 'MAINT' backends
        )

        matched_backends_and_pods = match_backends_and_pods(sorted_backends, pods)
        location_dict = smartstack_tools.build_smartstack_location_dict(
            location, matched_backends_and_pods, should_return_individual_backends
        )
        smartstack_status["locations"].append(location_dict)

    return smartstack_status
Ejemplo n.º 2
0
def check_all_kubernetes_services_replication(soa_dir: str) -> None:
    kube_client = KubeClient()
    all_pods = get_all_pods(kube_client)
    all_nodes = get_all_nodes(kube_client)
    system_paasta_config = load_system_paasta_config()
    cluster = system_paasta_config.get_cluster()
    smartstack_replication_checker = KubeSmartstackReplicationChecker(
        nodes=all_nodes,
        system_paasta_config=system_paasta_config,
    )

    for service in list_services(soa_dir=soa_dir):
        service_config = PaastaServiceConfigLoader(service=service,
                                                   soa_dir=soa_dir)
        for instance_config in service_config.instance_configs(
                cluster=cluster,
                instance_type_class=kubernetes_tools.
                KubernetesDeploymentConfig,
        ):
            if instance_config.get_docker_image():
                check_service_replication(
                    instance_config=instance_config,
                    all_pods=all_pods,
                    smartstack_replication_checker=
                    smartstack_replication_checker,
                )
            else:
                log.debug(
                    '%s is not deployed. Skipping replication monitoring.' %
                    instance_config.job_id, )
Ejemplo n.º 3
0
def main(
    instance_type_class: Type[InstanceConfig_T],
    check_service_replication: CheckServiceReplication,
    namespace: str,
    mesos: bool = False,
) -> None:
    args = parse_args()
    if args.verbose:
        logging.basicConfig(level=logging.DEBUG)
    else:
        logging.basicConfig(level=logging.WARNING)

    system_paasta_config = load_system_paasta_config()
    cluster = system_paasta_config.get_cluster()
    replication_checker: SmartstackReplicationChecker

    if mesos:
        tasks_or_pods, slaves = get_mesos_tasks_and_slaves(
            system_paasta_config)
        replication_checker = MesosSmartstackReplicationChecker(
            mesos_slaves=slaves,
            system_paasta_config=system_paasta_config,
        )
    else:
        tasks_or_pods, nodes = get_kubernetes_pods_and_nodes(namespace)
        replication_checker = KubeSmartstackReplicationChecker(
            nodes=nodes,
            system_paasta_config=system_paasta_config,
        )

    pct_under_replicated = check_services_replication(
        soa_dir=args.soa_dir,
        cluster=cluster,
        service_instances=args.service_instance_list,
        instance_type_class=instance_type_class,
        check_service_replication=check_service_replication,
        replication_checker=replication_checker,
        all_tasks_or_pods=tasks_or_pods,
    )
    if yelp_meteorite is not None:
        emit_cluster_replication_metrics(
            pct_under_replicated,
            cluster,
            scheduler="mesos" if mesos else "kubernetes")

    if pct_under_replicated >= args.under_replicated_crit_pct:
        log.critical(
            f"{pct_under_replicated}% of instances are under replicated "
            f"(past {args.under_replicated_crit_pct} is critical)!")
        sys.exit(2)
    elif pct_under_replicated >= args.under_replicated_warn_pct:
        log.warning(
            f"{pct_under_replicated}% of instances are under replicated "
            f"(past {args.under_replicated_warn_pct} is a warning)!")
        sys.exit(1)
    else:
        sys.exit(0)
Ejemplo n.º 4
0
def check_all_kubernetes_based_services_replication(
    soa_dir: str,
    service_instances: Sequence[str],
    instance_type_class: Type[InstanceConfig_T],
    check_service_replication: CheckServiceReplication,
    namespace: str,
) -> None:
    kube_client = KubeClient()
    all_pods = get_all_pods(kube_client=kube_client, namespace=namespace)
    all_nodes = get_all_nodes(kube_client)
    system_paasta_config = load_system_paasta_config()
    cluster = system_paasta_config.get_cluster()
    smartstack_replication_checker = KubeSmartstackReplicationChecker(
        nodes=all_nodes, system_paasta_config=system_paasta_config)
    service_instances_set = set(service_instances)

    for service in list_services(soa_dir=soa_dir):
        service_config = PaastaServiceConfigLoader(service=service,
                                                   soa_dir=soa_dir)
        for instance_config in service_config.instance_configs(
                cluster=cluster, instance_type_class=instance_type_class):
            if (service_instances_set
                    and f"{service}{SPACER}{instance_config.instance}"
                    not in service_instances_set):
                continue
            if instance_config.get_docker_image():
                check_service_replication(
                    instance_config=instance_config,
                    all_pods=all_pods,
                    smartstack_replication_checker=
                    smartstack_replication_checker,
                )
            else:
                log.debug(
                    "%s is not deployed. Skipping replication monitoring." %
                    instance_config.job_id)