def main() -> None:
    system_paasta_config = load_system_paasta_config()

    kube_client = KubeClient()

    services = {
        service
        for service, instance in get_services_for_cluster(
            cluster=system_paasta_config.get_cluster(),
            instance_type="kubernetes")
    }

    for service in services:
        pscl = PaastaServiceConfigLoader(service=service,
                                         load_deployments=False)
        for instance_config in pscl.instance_configs(
                cluster=system_paasta_config.get_cluster(),
                instance_type_class=KubernetesDeploymentConfig,
        ):
            max_instances = instance_config.get_max_instances()
            if max_instances is not None:
                formatted_application = instance_config.format_kubernetes_app()
                formatted_application.spec.replicas = max_instances
                wrapper = get_application_wrapper(formatted_application)
                wrapper.soa_config = instance_config
                print(f"Scaling up {service}.{instance_config.instance}")
                wrapper.update(kube_client)
Beispiel #2
0
def check_all_kubernetes_services_replication(soa_dir: str) -> None:
    kube_client = KubeClient()
    all_pods = get_all_pods(kube_client)
    all_nodes = get_all_nodes(kube_client)
    system_paasta_config = load_system_paasta_config()
    cluster = system_paasta_config.get_cluster()
    smartstack_replication_checker = KubeSmartstackReplicationChecker(
        nodes=all_nodes,
        system_paasta_config=system_paasta_config,
    )

    for service in list_services(soa_dir=soa_dir):
        service_config = PaastaServiceConfigLoader(service=service,
                                                   soa_dir=soa_dir)
        for instance_config in service_config.instance_configs(
                cluster=cluster,
                instance_type_class=kubernetes_tools.
                KubernetesDeploymentConfig,
        ):
            if instance_config.get_docker_image():
                check_service_replication(
                    instance_config=instance_config,
                    all_pods=all_pods,
                    smartstack_replication_checker=
                    smartstack_replication_checker,
                )
            else:
                log.debug(
                    '%s is not deployed. Skipping replication monitoring.' %
                    instance_config.job_id, )
def create_prometheus_adapter_config(paasta_cluster: str,
                                     soa_dir: Path) -> PrometheusAdapterConfig:
    """
    Given a paasta cluster and a soaconfigs directory, create the necessary Prometheus adapter
    config to autoscale services.
    Currently supports the following metrics providers:
        * uwsgi
    """
    rules: List[PrometheusAdapterRule] = []
    # get_services_for_cluster() returns a list of (service, instance) tuples, but this
    # is not great for us: if we were to iterate over that we'd end up getting duplicates
    # for every service as PaastaServiceConfigLoader does not expose a way to get configs
    # for a single instance by name. instead, we get the unique set of service names and then
    # let PaastaServiceConfigLoader iterate over instances for us later
    services = {
        service_name
        for service_name, _ in get_services_for_cluster(
            cluster=paasta_cluster,
            instance_type="kubernetes",
            soa_dir=str(soa_dir))
    }
    for service_name in services:
        config_loader = PaastaServiceConfigLoader(service=service_name,
                                                  soa_dir=str(soa_dir))
        for instance_config in config_loader.instance_configs(
                cluster=paasta_cluster,
                instance_type_class=KubernetesDeploymentConfig,
        ):
            rules.extend(
                get_rules_for_service_instance(
                    service_name=service_name,
                    instance_name=instance_config.instance,
                    autoscaling_config=instance_config.get_autoscaling_params(
                    ),
                    paasta_cluster=paasta_cluster,
                ))

    return {
        # we sort our rules so that we can easily compare between two different configmaps
        # as otherwise we'd need to do fancy order-independent comparisons between the two
        # sets of rules later due to the fact that we're not iterating in a deterministic
        # way and can add rules in any arbitrary order
        "rules": sorted(rules, key=lambda rule: rule["name"]["as"]),
    }
def get_configs_of_services_to_scale(
    cluster: str,
    soa_dir: str = DEFAULT_SOA_DIR,
    services: Optional[Sequence[str]] = None,
) -> Sequence[MarathonServiceConfig]:
    if not services:
        services = list_services(soa_dir=soa_dir)
    configs = []
    for service in services:
        service_config = PaastaServiceConfigLoader(service=service,
                                                   soa_dir=soa_dir)
        for instance_config in service_config.instance_configs(
                cluster=cluster, instance_type_class=MarathonServiceConfig):
            if (instance_config.get_max_instances()
                    and instance_config.get_desired_state() == "start" and
                    instance_config.get_autoscaling_params()["decision_policy"]
                    != "bespoke"):
                configs.append(instance_config)

    return configs
def check_services_replication(
    soa_dir: str,
    cluster: str,
    service_instances: Sequence[str],
    instance_type_class: Type[InstanceConfig_T],
    check_service_replication: CheckServiceReplication,
    replication_checker: ReplicationChecker,
    all_tasks_or_pods: Sequence[Union[MarathonTask, V1Pod]],
    dry_run: bool = False,
) -> Tuple[int, int]:
    service_instances_set = set(service_instances)
    replication_statuses: List[bool] = []

    for service in list_services(soa_dir=soa_dir):
        service_config = PaastaServiceConfigLoader(service=service,
                                                   soa_dir=soa_dir)
        for instance_config in service_config.instance_configs(
                cluster=cluster, instance_type_class=instance_type_class):
            if (service_instances_set
                    and f"{service}{SPACER}{instance_config.instance}"
                    not in service_instances_set):
                continue
            if instance_config.get_docker_image():
                is_well_replicated = check_service_replication(
                    instance_config=instance_config,
                    all_tasks_or_pods=all_tasks_or_pods,
                    replication_checker=replication_checker,
                    dry_run=dry_run,
                )
                if is_well_replicated is not None:
                    replication_statuses.append(is_well_replicated)

            else:
                log.debug(
                    "%s is not deployed. Skipping replication monitoring." %
                    instance_config.job_id)

    num_under_replicated = len(
        [status for status in replication_statuses if status is False])
    return num_under_replicated, len(replication_statuses)
Beispiel #6
0
def main():
    args = parse_args()

    if args.verbose:
        logging.basicConfig(level=logging.DEBUG)
    else:
        logging.basicConfig(level=logging.WARNING)

    system_paasta_config = load_system_paasta_config()
    cluster = system_paasta_config.get_cluster()

    clients = marathon_tools.get_marathon_clients(
        marathon_tools.get_marathon_servers(system_paasta_config))
    all_clients = clients.get_all_clients()
    all_tasks = []
    for client in all_clients:
        all_tasks.extend(client.list_tasks())
    mesos_slaves = a_sync.block(get_slaves)
    smartstack_replication_checker = MesosSmartstackReplicationChecker(
        mesos_slaves, system_paasta_config)

    for service in list_services(soa_dir=args.soa_dir):
        service_config = PaastaServiceConfigLoader(service=service,
                                                   soa_dir=args.soa_dir)
        for instance_config in service_config.instance_configs(
                cluster=cluster,
                instance_type_class=marathon_tools.MarathonServiceConfig,
        ):
            if instance_config.get_docker_image():
                check_service_replication(
                    instance_config=instance_config,
                    all_tasks=all_tasks,
                    smartstack_replication_checker=
                    smartstack_replication_checker,
                )
            else:
                log.debug(
                    '%s is not deployed. Skipping replication monitoring.' %
                    instance_config.job_id, )
Beispiel #7
0
def check_all_kubernetes_based_services_replication(
    soa_dir: str,
    service_instances: Sequence[str],
    instance_type_class: Type[InstanceConfig_T],
    check_service_replication: CheckServiceReplication,
    namespace: str,
) -> None:
    kube_client = KubeClient()
    all_pods = get_all_pods(kube_client=kube_client, namespace=namespace)
    all_nodes = get_all_nodes(kube_client)
    system_paasta_config = load_system_paasta_config()
    cluster = system_paasta_config.get_cluster()
    smartstack_replication_checker = KubeSmartstackReplicationChecker(
        nodes=all_nodes, system_paasta_config=system_paasta_config)
    service_instances_set = set(service_instances)

    for service in list_services(soa_dir=soa_dir):
        service_config = PaastaServiceConfigLoader(service=service,
                                                   soa_dir=soa_dir)
        for instance_config in service_config.instance_configs(
                cluster=cluster, instance_type_class=instance_type_class):
            if (service_instances_set
                    and f"{service}{SPACER}{instance_config.instance}"
                    not in service_instances_set):
                continue
            if instance_config.get_docker_image():
                check_service_replication(
                    instance_config=instance_config,
                    all_pods=all_pods,
                    smartstack_replication_checker=
                    smartstack_replication_checker,
                )
            else:
                log.debug(
                    "%s is not deployed. Skipping replication monitoring." %
                    instance_config.job_id)
Beispiel #8
0
def check_services_replication(
    soa_dir: str,
    cluster: str,
    service_instances: Sequence[str],
    instance_type_class: Type[InstanceConfig_T],
    check_service_replication: CheckServiceReplication,
    replication_checker: SmartstackReplicationChecker,
    all_tasks_or_pods: Sequence[Union[MarathonTask, V1Pod]],
) -> float:
    service_instances_set = set(service_instances)
    replication_statuses: List[bool] = []

    for service in list_services(soa_dir=soa_dir):
        service_config = PaastaServiceConfigLoader(service=service,
                                                   soa_dir=soa_dir)
        for instance_config in service_config.instance_configs(
                cluster=cluster, instance_type_class=instance_type_class):
            if (service_instances_set
                    and f"{service}{SPACER}{instance_config.instance}"
                    not in service_instances_set):
                continue
            if instance_config.get_docker_image():
                is_well_replicated = check_service_replication(
                    instance_config=instance_config,
                    all_tasks_or_pods=all_tasks_or_pods,
                    smartstack_replication_checker=replication_checker,
                )
                if is_well_replicated is not None:
                    replication_statuses.append(is_well_replicated)

            else:
                log.debug(
                    "%s is not deployed. Skipping replication monitoring." %
                    instance_config.job_id)

    return calculate_pct_under_replicated(replication_statuses)
Beispiel #9
0
def create_marathon_dashboard(
        cluster: str,
        soa_dir: str=DEFAULT_SOA_DIR,
        marathon_clients: MarathonClients=None,
        system_paasta_config: SystemPaastaConfig=None,
) -> Marathon_Dashboard:
    try:
        instances: List = get_services_for_cluster(
            cluster=cluster,
            instance_type='marathon',
            soa_dir=soa_dir,
        )
    except FileNotFoundError:
        instances = []
    dashboard: Marathon_Dashboard = {cluster: []}
    if system_paasta_config is None:
        system_paasta_config = load_system_paasta_config()
    marathon_servers = get_marathon_servers(system_paasta_config=system_paasta_config)
    if marathon_clients is None:
        marathon_clients = get_marathon_clients(marathon_servers=marathon_servers, cached=False)

    dashboard_links: Dict = system_paasta_config.get_dashboard_links()
    marathon_links = dashboard_links.get(cluster, {}).get('Marathon RO')

    # e.g. 'http://10.64.97.75:5052': 'http://marathon-norcal-prod.yelpcorp.com'
    shard_url_to_marathon_link_dict: Dict[str, str] = {}
    if isinstance(marathon_links, list):
        # Sanity check and log error if necessary
        if len(marathon_links) != len(marathon_servers.current):
            log.error('len(marathon_links) != len(marathon_servers.current). This may be a cause of concern')
        for shard_number, shard in enumerate(marathon_servers.current):
            shard_url_to_marathon_link_dict[shard.url[0]] = marathon_links[shard_number]
    elif isinstance(marathon_links, str):
        # In this case, the shard url will be the same for every service instance
        static_shard_url = marathon_links.split(' ')[0]
        return {cluster: [{'service': si[0], 'instance': si[1], 'shard_url': static_shard_url} for si in instances]}

    # Setup with service as key since will instantiate 1 PSCL per service
    service_instances_dict: Dict[str, Set[str]] = defaultdict(set)
    for si in instances:
        service, instance = si[0], si[1]
        service_instances_dict[service].add(instance)

    for service, instance_set in service_instances_dict.items():
        pscl = PaastaServiceConfigLoader(
            service=service,
            soa_dir=soa_dir,
            load_deployments=False,
        )
        for marathon_service_config in pscl.instance_configs(cluster, MarathonServiceConfig):
            if marathon_service_config.get_instance() in instance_set:
                client: MarathonClient = \
                    marathon_clients.get_current_client_for_service(job_config=marathon_service_config)
                ip_url: str = client.servers[0]
                # Convert to a marathon link if possible else default to the originalIP address
                shard_url: str = shard_url_to_marathon_link_dict.get(ip_url, ip_url)
                service_info: Marathon_Dashboard_Item = {
                    'service': service,
                    'instance': instance,
                    'shard_url': shard_url,
                }
                dashboard[cluster].append(service_info)
    return dashboard
Beispiel #10
0
def wait_for_deployment(service, deploy_group, git_sha, soa_dir, timeout):
    # Currently only 'marathon' instances are supported for wait_for_deployment because they
    # are the only thing that are worth waiting on.
    service_configs = PaastaServiceConfigLoader(service=service,
                                                soa_dir=soa_dir,
                                                load_deployments=False)

    total_instances = 0
    clusters_data = []
    api_endpoints = load_system_paasta_config().get_api_endpoints()
    for cluster in service_configs.clusters:
        if cluster not in api_endpoints:
            paasta_print(
                PaastaColors.red(
                    'Cluster %s is NOT in paasta-api endpoints config.' %
                    cluster, ))
            raise NoSuchCluster

        instances_queue = Queue()
        for instance_config in service_configs.instance_configs(
                cluster=cluster,
                instance_type_class=MarathonServiceConfig,
        ):
            if instance_config.get_deploy_group() == deploy_group:
                instances_queue.put(instance_config)
                total_instances += 1

        if not instances_queue.empty():
            clusters_data.append(
                ClusterData(
                    cluster=cluster,
                    service=service,
                    git_sha=git_sha,
                    instances_queue=instances_queue,
                ))

    if not clusters_data:
        _log(
            service=service,
            component='deploy',
            line=
            ("Couldn't find any marathon instances for service {} in deploy group {}. Exiting."
             .format(service, deploy_group)),
            level='event',
        )
        return

    paasta_print("Waiting for deployment of {} for '{}' to complete...".format(
        git_sha, deploy_group))

    deadline = time.time() + timeout
    green_light = Event()
    green_light.set()

    with progressbar.ProgressBar(maxval=total_instances) as bar:
        while time.time() < deadline:
            _query_clusters(clusters_data, green_light)
            if not green_light.is_set():
                raise KeyboardInterrupt

            bar.update(total_instances - sum((c.instances_queue.qsize()
                                              for c in clusters_data)))

            if all((cluster.instances_queue.empty()
                    for cluster in clusters_data)):
                sys.stdout.flush()
                return 0
            else:
                time.sleep(min(60, timeout))
            sys.stdout.flush()

    _log(
        service=service,
        component='deploy',
        line=compose_timeout_message(clusters_data, timeout, deploy_group,
                                     service, git_sha),
        level='event',
    )
    raise TimeoutError
Beispiel #11
0
def main() -> None:
    args = parse_args()
    if args.debug:
        logging.basicConfig(level=logging.DEBUG)
    else:
        logging.basicConfig(level=logging.WARNING)

    instances = []
    return_codes = []
    command = args.command
    if (args.service_instance):
        service_instance = args.service_instance
        service, instance, _, __ = decompose_job_id(service_instance)
        instances.append(instance)
    elif (args.service and args.instances):
        service = args.service
        instances = args.instances.split(',')
    else:
        log.error(
            "The name of service or the name of instance to inspect is missing. Exiting."
        )
        sys.exit(1)

    # Setting up transparent cache for http API calls
    requests_cache.install_cache("paasta_serviceinit", backend="memory")

    cluster = load_system_paasta_config().get_cluster()
    actual_deployments = get_actual_deployments(service, args.soa_dir)
    clients = PaastaClients(cached=(command == 'status'))

    instance_types = ['marathon', 'chronos', 'paasta_native', 'adhoc']
    instance_types_map: Dict[str,
                             List[str]] = {it: []
                                           for it in instance_types}
    for instance in instances:
        try:
            instance_type = validate_service_instance(
                service,
                instance,
                cluster,
                args.soa_dir,
            )
        except Exception:
            log.error(
                ('Exception raised while looking at service %s instance %s:'
                 ).format(service, instance), )
            log.error(traceback.format_exc())
            return_codes.append(1)
            continue

        if instance_type not in instance_types:
            log.error(
                ("I calculated an instance_type of {} for {} which I don't "
                 "know how to handle.").format(
                     instance_type,
                     compose_job_id(service, instance),
                 ), )
            return_codes.append(1)
        else:
            instance_types_map[instance_type].append(instance)

    remote_run_frameworks = None
    if len(instance_types_map['adhoc']) > 0:
        remote_run_frameworks = paasta_remote_run.remote_run_frameworks()

    service_config_loader = PaastaServiceConfigLoader(service)

    for instance_type in instance_types:

        if instance_type == 'marathon':
            job_configs = {
                jc.instance: jc
                for jc in service_config_loader.instance_configs(
                    cluster=cluster,
                    instance_type_class=marathon_tools.MarathonServiceConfig,
                )
            }

        for instance in instance_types_map[instance_type]:
            try:
                version = get_deployment_version(
                    actual_deployments,
                    cluster,
                    instance,
                )
                paasta_print('instance: %s' % PaastaColors.blue(instance))
                paasta_print('Git sha:    %s (desired)' % version)

                if instance_type == 'marathon':
                    return_code = marathon_serviceinit.perform_command(
                        command=command,
                        service=service,
                        instance=instance,
                        cluster=cluster,
                        verbose=args.verbose,
                        soa_dir=args.soa_dir,
                        app_id=args.app_id,
                        clients=clients.marathon(),
                        job_config=job_configs[instance],
                    )
                elif instance_type == 'chronos':
                    return_code = chronos_serviceinit.perform_command(
                        command=command,
                        service=service,
                        instance=instance,
                        cluster=cluster,
                        verbose=args.verbose,
                        soa_dir=args.soa_dir,
                        client=clients.chronos(),
                    )
                elif instance_type == 'paasta_native':
                    return_code = paasta_native_serviceinit.perform_command(
                        command=command,
                        service=service,
                        instance=instance,
                        cluster=cluster,
                        verbose=args.verbose,
                        soa_dir=args.soa_dir,
                    )
                elif instance_type == 'adhoc':
                    if command != 'status':
                        raise NotImplementedError
                    paasta_remote_run.remote_run_list_report(
                        service=service,
                        instance=instance,
                        cluster=cluster,
                        frameworks=remote_run_frameworks,
                    )
                    return_code = 0
            except Exception:
                log.error(('Exception raised while looking at service {} '
                           'instance {}:').format(service, instance), )
                log.error(traceback.format_exc())
                return_code = 1

            return_codes.append(return_code)

    sys.exit(max(return_codes))
Beispiel #12
0
def sync_boto_secrets(
    kube_client: KubeClient,
    cluster: str,
    service: str,
    secret_provider_name: str,
    vault_cluster_config: Mapping[str, str],
    soa_dir: str,
    namespace: str,
) -> bool:
    # Update boto key secrets
    config_loader = PaastaServiceConfigLoader(service=service, soa_dir=soa_dir)
    for instance_config in config_loader.instance_configs(
            cluster=cluster, instance_type_class=KubernetesDeploymentConfig):
        instance = instance_config.instance
        boto_keys = instance_config.config_dict.get("boto_keys", [])
        if not boto_keys:
            continue
        boto_keys.sort()
        secret_data = {}
        for key in boto_keys:
            for filetype in ["sh", "yaml", "json", "cfg"]:
                this_key = key + "." + filetype
                sanitised_key = this_key.replace(".", "-").replace("_", "--")
                try:
                    with open(f"/etc/boto_cfg_private/{this_key}") as f:
                        secret_data[sanitised_key] = base64.b64encode(
                            f.read().encode("utf-8")).decode("utf-8")
                except IOError:
                    log.warning(
                        f"Boto key {this_key} required for {service} could not be found."
                    )
        if not secret_data:
            continue
        # In order to prevent slamming the k8s API, add some artificial delay here
        time.sleep(0.3)
        app_name = get_kubernetes_app_name(service, instance)
        secret = limit_size_with_hash(f"paasta-boto-key-{app_name}")
        hashable_data = "".join([secret_data[key] for key in secret_data])
        signature = hashlib.sha1(hashable_data.encode("utf-8")).hexdigest()
        kubernetes_signature = get_kubernetes_secret_signature(
            kube_client=kube_client,
            secret=secret,
            service=service,
            namespace=namespace,
        )
        if not kubernetes_signature:
            log.info(
                f"{secret} for {service} in {namespace} not found, creating")
            try:
                create_plaintext_dict_secret(
                    kube_client=kube_client,
                    secret_name=secret,
                    secret_data=secret_data,
                    service=service,
                    namespace=namespace,
                )
            except ApiException as e:
                if e.status == 409:
                    log.warning(
                        f"Secret {secret} for {service} already exists in {namespace} but no signature found. Updating secret and signature."
                    )
                    update_plaintext_dict_secret(
                        kube_client=kube_client,
                        secret_name=secret,
                        secret_data=secret_data,
                        service=service,
                        namespace=namespace,
                    )
                else:
                    raise
            create_kubernetes_secret_signature(
                kube_client=kube_client,
                secret=secret,
                service=service,
                secret_signature=signature,
                namespace=namespace,
            )
        elif signature != kubernetes_signature:
            log.info(
                f"{secret} for {service} in {namespace} needs updating as signature changed"
            )
            update_plaintext_dict_secret(
                kube_client=kube_client,
                secret_name=secret,
                secret_data=secret_data,
                service=service,
                namespace=namespace,
            )
            update_kubernetes_secret_signature(
                kube_client=kube_client,
                secret=secret,
                service=service,
                secret_signature=signature,
                namespace=namespace,
            )
        else:
            log.info(f"{secret} for {service} in {namespace} up to date")
    return True