def check_service_replication(client, service, instance, cluster, soa_dir): """Checks a service's replication levels based on how the service's replication should be monitored. (smartstack or mesos) :param service: Service name, like "example_service" :param instance: Instance name, like "main" or "canary" :param cluster: name of the cluster :param soa_dir: The SOA configuration directory to read from """ job_id = compose_job_id(service, instance) try: expected_count = marathon_tools.get_expected_instance_count_for_namespace(service, instance, soa_dir=soa_dir) except NoDeploymentsAvailable: log.info("deployments.json missing for %s. Skipping replication monitoring." % job_id) return if expected_count is None: return log.info("Expecting %d total tasks for %s" % (expected_count, job_id)) proxy_port = marathon_tools.get_proxy_port_for_instance(service, instance, soa_dir=soa_dir) if proxy_port is not None: check_smartstack_replication_for_instance( service=service, instance=instance, cluster=cluster, soa_dir=soa_dir, expected_count=expected_count ) else: check_healthy_marathon_tasks_for_service_instance( client=client, service=service, instance=instance, cluster=cluster, soa_dir=soa_dir, expected_count=expected_count, )
def perform_command(command, service, instance, cluster, verbose, soa_dir, app_id=None, delta=None): """Performs a start/stop/restart/status on an instance :param command: String of start, stop, restart, status :param service: service name :param instance: instance name, like "main" or "canary" :param cluster: cluster name :param verbose: int verbosity level :returns: A unix-style return code """ system_config = load_system_paasta_config() marathon_config = marathon_tools.load_marathon_config() job_config = marathon_tools.load_marathon_service_config(service, instance, cluster, soa_dir=soa_dir) if not app_id: try: app_id = job_config.format_marathon_app_dict()['id'] except NoDockerImageError: job_id = compose_job_id(service, instance) print "Docker image for %s not in deployments.json. Exiting. Has Jenkins deployed it?" % job_id return 1 normal_instance_count = job_config.get_instances() normal_smartstack_count = marathon_tools.get_expected_instance_count_for_namespace(service, instance, cluster) proxy_port = marathon_tools.get_proxy_port_for_instance(service, instance, cluster, soa_dir=soa_dir) client = marathon_tools.get_marathon_client(marathon_config.get_url(), marathon_config.get_username(), marathon_config.get_password()) if command == 'restart': restart_marathon_job(service, instance, app_id, client, cluster) elif command == 'status': print status_desired_state(service, instance, client, job_config) print status_marathon_job(service, instance, app_id, normal_instance_count, client) tasks, out = status_marathon_job_verbose(service, instance, client) if verbose > 0: print out print status_mesos_tasks(service, instance, normal_instance_count) if verbose > 0: tail_lines = calculate_tail_lines(verbose_level=verbose) print status_mesos_tasks_verbose( job_id=app_id, get_short_task_id=get_short_task_id, tail_lines=tail_lines, ) if proxy_port is not None: print status_smartstack_backends( service=service, instance=instance, cluster=cluster, job_config=job_config, tasks=tasks, expected_count=normal_smartstack_count, soa_dir=soa_dir, verbose=verbose > 0, synapse_port=system_config.get_synapse_port(), synapse_haproxy_url_format=system_config.get_synapse_haproxy_url_format(), ) else: # The command parser shouldn't have let us get this far... raise NotImplementedError("Command %s is not implemented!" % command) return 0
def check_service_replication(client, service, instance, cluster, soa_dir): """Checks a service's replication levels based on how the service's replication should be monitored. (smartstack or mesos) :param service: Service name, like "example_service" :param instance: Instance name, like "main" or "canary" :param cluster: name of the cluster :param soa_dir: The SOA configuration directory to read from """ job_id = compose_job_id(service, instance) try: expected_count = marathon_tools.get_expected_instance_count_for_namespace(service, instance, soa_dir=soa_dir) except NoDeploymentsAvailable: log.debug('deployments.json missing for %s. Skipping replication monitoring.' % job_id) return if expected_count is None: return log.info("Expecting %d total tasks for %s" % (expected_count, job_id)) proxy_port = marathon_tools.get_proxy_port_for_instance(service, instance, soa_dir=soa_dir) if proxy_port is not None: check_smartstack_replication_for_instance( service=service, instance=instance, cluster=cluster, soa_dir=soa_dir, expected_count=expected_count) else: check_healthy_marathon_tasks_for_service_instance( client=client, service=service, instance=instance, cluster=cluster, soa_dir=soa_dir, expected_count=expected_count, )
def perform_command(command, service, instance, cluster, verbose, soa_dir, app_id=None, delta=None): """Performs a start/stop/restart/status on an instance :param command: String of start, stop, restart, status :param service: service name :param instance: instance name, like "main" or "canary" :param cluster: cluster name :param verbose: int verbosity level :returns: A unix-style return code """ system_config = load_system_paasta_config() marathon_config = marathon_tools.load_marathon_config() job_config = marathon_tools.load_marathon_service_config(service, instance, cluster, soa_dir=soa_dir) if not app_id: try: app_id = job_config.format_marathon_app_dict()['id'] except NoDockerImageError: job_id = compose_job_id(service, instance) print "Docker image for %s not in deployments.json. Exiting. Has Jenkins deployed it?" % job_id return 1 normal_instance_count = job_config.get_instances() normal_smartstack_count = marathon_tools.get_expected_instance_count_for_namespace(service, instance, cluster) proxy_port = marathon_tools.get_proxy_port_for_instance(service, instance, cluster, soa_dir=soa_dir) client = marathon_tools.get_marathon_client(marathon_config.get_url(), marathon_config.get_username(), marathon_config.get_password()) if command == 'restart': restart_marathon_job(service, instance, app_id, client, cluster) elif command == 'status': print status_desired_state(service, instance, client, job_config) print status_marathon_job(service, instance, app_id, normal_instance_count, client) tasks, out = status_marathon_job_verbose(service, instance, client) if verbose > 0: print out print status_mesos_tasks(service, instance, normal_instance_count) if verbose > 0: tail_lines = calculate_tail_lines(verbose_level=verbose) print status_mesos_tasks_verbose( job_id=app_id, get_short_task_id=get_short_task_id, tail_lines=tail_lines, ) if proxy_port is not None: print status_smartstack_backends( service=service, instance=instance, cluster=cluster, job_config=job_config, tasks=tasks, expected_count=normal_smartstack_count, soa_dir=soa_dir, verbose=verbose > 0, synapse_port=system_config.get_synapse_port(), synapse_haproxy_url_format=system_config.get_synapse_haproxy_url_format(), ) else: # The command parser shouldn't have let us get this far... raise NotImplementedError("Command %s is not implemented!" % command) return 0
def synapse_replication_is_low(service, instance, system_paasta_config, local_backends): crit_threshold = 80 reg_svc, reg_namespace, _, __ = utils.decompose_job_id( read_registration_for_service_instance( service=service, instance=instance ) ) # We only actually care about the replication of where we're registering service, namespace = reg_svc, reg_namespace smartstack_replication_info = load_smartstack_info_for_service( service=service, namespace=namespace, blacklist=[], system_paasta_config=system_paasta_config, ) expected_count = get_expected_instance_count_for_namespace(service=service, namespace=namespace) expected_count_per_location = int(expected_count / len(smartstack_replication_info)) synapse_name = utils.compose_job_id(service, namespace) local_replication = get_replication_for_services( synapse_host=system_paasta_config.get_default_synapse_host(), synapse_port=system_paasta_config.get_synapse_port(), synapse_haproxy_url_format=system_paasta_config.get_synapse_haproxy_url_format(), services=[synapse_name], ) num_available = local_replication.get(synapse_name, 0) under_replicated, ratio = utils.is_under_replicated( num_available, expected_count_per_location, crit_threshold) log.info('Service %s.%s has %d out of %d expected instances' % ( service, instance, num_available, expected_count_per_location)) return under_replicated
def synapse_replication_is_low(service, instance, system_paasta_config, local_backends): crit_threshold = 80 reg_svc, reg_namespace, _, __ = utils.decompose_job_id( read_registration_for_service_instance(service=service, instance=instance)) # We only actually care about the replication of where we're registering service, namespace = reg_svc, reg_namespace smartstack_replication_info = load_smartstack_info_for_service( service=service, namespace=namespace, blacklist=[], system_paasta_config=system_paasta_config, ) expected_count = get_expected_instance_count_for_namespace( service=service, namespace=namespace) expected_count_per_location = int(expected_count / len(smartstack_replication_info)) synapse_name = utils.compose_job_id(service, namespace) local_replication = get_replication_for_services( synapse_host=system_paasta_config.get_default_synapse_host(), synapse_port=system_paasta_config.get_synapse_port(), synapse_haproxy_url_format=system_paasta_config. get_synapse_haproxy_url_format(), services=[synapse_name], ) num_available = local_replication.get(synapse_name, 0) under_replicated, ratio = utils.is_under_replicated( num_available, expected_count_per_location, crit_threshold) log.info('Service %s.%s has %d out of %d expected instances' % (service, instance, num_available, expected_count_per_location)) return under_replicated
def perform_command(command, service, instance, cluster, verbose, soa_dir, app_id=None, delta=None): """Performs a start/stop/restart/status/scale on an instance :param command: String of start, stop, restart, status or scale :param service: service name :param instance: instance name, like "main" or "canary" :param cluster: cluster name :param verbose: bool if the output should be verbose or not :returns: A unix-style return code """ marathon_config = marathon_tools.load_marathon_config() job_config = marathon_tools.load_marathon_service_config(service, instance, cluster, soa_dir=soa_dir) if not app_id: try: app_id = marathon_tools.create_complete_config(service, instance, marathon_config, soa_dir=soa_dir)['id'] except NoDockerImageError: job_id = compose_job_id(service, instance) print "Docker image for %s not in deployments.json. Exiting. Has Jenkins deployed it?" % job_id return 1 normal_instance_count = job_config.get_instances() normal_smartstack_count = marathon_tools.get_expected_instance_count_for_namespace(service, instance) proxy_port = marathon_tools.get_proxy_port_for_instance(service, instance, soa_dir=soa_dir) client = marathon_tools.get_marathon_client(marathon_config.get_url(), marathon_config.get_username(), marathon_config.get_password()) if command == 'start': start_marathon_job(service, instance, app_id, normal_instance_count, client, cluster) elif command == 'stop': stop_marathon_job(service, instance, app_id, client, cluster) elif command == 'restart': restart_marathon_job(service, instance, app_id, normal_instance_count, client, cluster) elif command == 'status': # Setting up transparent cache for http API calls requests_cache.install_cache('paasta_serviceinit', backend='memory') print status_desired_state(service, instance, client, job_config) print status_marathon_job(service, instance, app_id, normal_instance_count, client) tasks, out = status_marathon_job_verbose(service, instance, client) if verbose: print out print status_mesos_tasks(service, instance, normal_instance_count) if verbose: print status_mesos_tasks_verbose(app_id, get_short_task_id) if proxy_port is not None: print status_smartstack_backends( service=service, instance=instance, cluster=cluster, job_config=job_config, tasks=tasks, expected_count=normal_smartstack_count, soa_dir=soa_dir, verbose=verbose, ) elif command == 'scale': scale_marathon_job(service, instance, app_id, delta, client, cluster) else: # The command parser shouldn't have let us get this far... raise NotImplementedError("Command %s is not implemented!" % command) return 0
def synapse_replication_is_low(service, instance, system_paasta_config, local_backends): crit_threshold = 80 namespace = read_namespace_for_service_instance(service=service, instance=instance) smartstack_replication_info = load_smartstack_info_for_service( service=service, namespace=namespace, blacklist=[], system_paasta_config=system_paasta_config, ) expected_count = get_expected_instance_count_for_namespace( service=service, namespace=namespace) expected_count_per_location = int(expected_count / len(smartstack_replication_info)) synapse_name = "%s.%s" % (service, namespace) local_replication = get_replication_for_services( synapse_host=system_paasta_config.get_default_synapse_host(), synapse_port=system_paasta_config.get_synapse_port(), synapse_haproxy_url_format=system_paasta_config. get_synapse_haproxy_url_format(), services=[synapse_name], ) num_available = local_replication.get(synapse_name, 0) under_replicated, ratio = utils.is_under_replicated( num_available, expected_count_per_location, crit_threshold) log.info('Service %s.%s has %d out of %d expected instances' % (service, instance, num_available, expected_count_per_location)) return under_replicated
def smartstack_status( service: str, instance: str, job_config: LongRunningServiceConfig, service_namespace_config: ServiceNamespaceConfig, pods: Sequence[V1Pod], settings: Any, should_return_individual_backends: bool = False, ) -> Mapping[str, Any]: registration = job_config.get_registrations()[0] instance_pool = job_config.get_pool() smartstack_replication_checker = KubeSmartstackReplicationChecker( nodes=kubernetes_tools.get_all_nodes(settings.kubernetes_client), system_paasta_config=settings.system_paasta_config, ) node_hostname_by_location = smartstack_replication_checker.get_allowed_locations_and_hosts( job_config ) expected_smartstack_count = marathon_tools.get_expected_instance_count_for_namespace( service=service, namespace=instance, cluster=settings.cluster, instance_type_class=KubernetesDeploymentConfig, ) expected_count_per_location = int( expected_smartstack_count / len(node_hostname_by_location) ) smartstack_status: MutableMapping[str, Any] = { "registration": registration, "expected_backends_per_location": expected_count_per_location, "locations": [], } for location, hosts in node_hostname_by_location.items(): synapse_host = smartstack_replication_checker.get_first_host_in_pool( hosts, instance_pool ) sorted_backends = sorted( smartstack_tools.get_backends( registration, synapse_host=synapse_host, synapse_port=settings.system_paasta_config.get_synapse_port(), synapse_haproxy_url_format=settings.system_paasta_config.get_synapse_haproxy_url_format(), ), key=lambda backend: backend["status"], reverse=True, # put 'UP' backends above 'MAINT' backends ) matched_backends_and_pods = match_backends_and_pods(sorted_backends, pods) location_dict = smartstack_tools.build_smartstack_location_dict( location, matched_backends_and_pods, should_return_individual_backends ) smartstack_status["locations"].append(location_dict) return smartstack_status
def marathon_smartstack_status( service: str, instance: str, job_config: marathon_tools.MarathonServiceConfig, service_namespace_config: ServiceNamespaceConfig, tasks: Sequence[MarathonTask], should_return_individual_backends: bool = False, ) -> Mapping[str, Any]: registration = job_config.get_registrations()[0] discover_location_type = service_namespace_config.get_discover() monitoring_blacklist = job_config.get_monitoring_blacklist( system_deploy_blacklist=settings.system_paasta_config. get_deploy_blacklist()) filtered_slaves = get_all_slaves_for_blacklist_whitelist( blacklist=monitoring_blacklist, whitelist=None) grouped_slaves = get_mesos_slaves_grouped_by_attribute( slaves=filtered_slaves, attribute=discover_location_type) # rebuild the dict, replacing the slave object with just their hostname slave_hostname_by_location = { attribute_value: [slave["hostname"] for slave in slaves] for attribute_value, slaves in grouped_slaves.items() } expected_smartstack_count = marathon_tools.get_expected_instance_count_for_namespace( service, instance, settings.cluster) expected_count_per_location = int(expected_smartstack_count / len(slave_hostname_by_location)) smartstack_status: MutableMapping[str, Any] = { "registration": registration, "expected_backends_per_location": expected_count_per_location, "locations": [], } for location, hosts in slave_hostname_by_location.items(): synapse_host = hosts[0] sorted_backends = sorted( get_backends( registration, synapse_host=synapse_host, synapse_port=settings.system_paasta_config.get_synapse_port(), synapse_haproxy_url_format=settings.system_paasta_config. get_synapse_haproxy_url_format(), ), key=lambda backend: backend["status"], reverse=True, # put 'UP' backends above 'MAINT' backends ) matched_backends_and_tasks = match_backends_and_tasks( sorted_backends, tasks) location_dict = build_smartstack_location_dict( location, matched_backends_and_tasks, should_return_individual_backends) smartstack_status["locations"].append(location_dict) return smartstack_status
def marathon_service_mesh_status( service: str, service_mesh: pik.ServiceMesh, instance: str, job_config: marathon_tools.MarathonServiceConfig, service_namespace_config: ServiceNamespaceConfig, tasks: Sequence[MarathonTask], should_return_individual_backends: bool = False, ) -> Mapping[str, Any]: registration = job_config.get_registrations()[0] discover_location_type = service_namespace_config.get_discover() grouped_slaves = get_mesos_slaves_grouped_by_attribute( slaves=get_slaves(), attribute=discover_location_type) # rebuild the dict, replacing the slave object with just their hostname slave_hostname_by_location = { attribute_value: [slave["hostname"] for slave in slaves] for attribute_value, slaves in grouped_slaves.items() } expected_instance_count = marathon_tools.get_expected_instance_count_for_namespace( service, instance, settings.cluster) expected_count_per_location = int(expected_instance_count / len(slave_hostname_by_location)) service_mesh_status: MutableMapping[str, Any] = { "registration": registration, "expected_backends_per_location": expected_count_per_location, "locations": [], } for location, hosts in slave_hostname_by_location.items(): if service_mesh == pik.ServiceMesh.SMARTSTACK: service_mesh_status["locations"].append( _build_smartstack_location_dict_for_backends( synapse_host=hosts[0], registration=registration, tasks=tasks, location=location, should_return_individual_backends= should_return_individual_backends, )) elif service_mesh == pik.ServiceMesh.ENVOY: service_mesh_status["locations"].append( _build_envoy_location_dict_for_backends( envoy_host=hosts[0], registration=registration, tasks=tasks, location=location, should_return_individual_backends= should_return_individual_backends, )) return service_mesh_status
def perform_command(command, service, instance, cluster, verbose, soa_dir, app_id=None, delta=None): """Performs a start/stop/restart/status/scale on an instance :param command: String of start, stop, restart, status or scale :param service: service name :param instance: instance name, like "main" or "canary" :param cluster: cluster name :param verbose: bool if the output should be verbose or not :returns: A unix-style return code """ marathon_config = marathon_tools.load_marathon_config() job_config = marathon_tools.load_marathon_service_config(service, instance, cluster, soa_dir=soa_dir) if not app_id: try: app_id = marathon_tools.create_complete_config( service, instance, marathon_config, soa_dir=soa_dir)['id'] except NoDockerImageError: job_id = compose_job_id(service, instance) print "Docker image for %s not in deployments.json. Exiting. Has Jenkins deployed it?" % job_id return 1 normal_instance_count = job_config.get_instances() normal_smartstack_count = marathon_tools.get_expected_instance_count_for_namespace( service, instance) proxy_port = marathon_tools.get_proxy_port_for_instance(service, instance, soa_dir=soa_dir) client = marathon_tools.get_marathon_client(marathon_config.get_url(), marathon_config.get_username(), marathon_config.get_password()) if command == 'start': start_marathon_job(service, instance, app_id, normal_instance_count, client, cluster) elif command == 'stop': stop_marathon_job(service, instance, app_id, client, cluster) elif command == 'restart': restart_marathon_job(service, instance, app_id, normal_instance_count, client, cluster) elif command == 'status': # Setting up transparent cache for http API calls requests_cache.install_cache('paasta_serviceinit', backend='memory') print status_desired_state(service, instance, client, job_config) print status_marathon_job(service, instance, app_id, normal_instance_count, client) tasks, out = status_marathon_job_verbose(service, instance, client) if verbose: print out print status_mesos_tasks(service, instance, normal_instance_count) if verbose: print status_mesos_tasks_verbose(app_id, get_short_task_id) if proxy_port is not None: print status_smartstack_backends( service=service, instance=instance, cluster=cluster, job_config=job_config, tasks=tasks, expected_count=normal_smartstack_count, soa_dir=soa_dir, verbose=verbose, ) elif command == 'scale': scale_marathon_job(service, instance, app_id, delta, client, cluster) else: # The command parser shouldn't have let us get this far... raise NotImplementedError("Command %s is not implemented!" % command) return 0
def mesh_status( service: str, service_mesh: ServiceMesh, instance: str, job_config: LongRunningServiceConfig, service_namespace_config: ServiceNamespaceConfig, pods: Sequence[V1Pod], settings: Any, should_return_individual_backends: bool = False, ) -> Mapping[str, Any]: registration = job_config.get_registrations()[0] instance_pool = job_config.get_pool() replication_checker = KubeSmartstackEnvoyReplicationChecker( nodes=kubernetes_tools.get_all_nodes(settings.kubernetes_client), system_paasta_config=settings.system_paasta_config, ) node_hostname_by_location = replication_checker.get_allowed_locations_and_hosts( job_config) expected_smartstack_count = marathon_tools.get_expected_instance_count_for_namespace( service=service, namespace=job_config.get_nerve_namespace(), cluster=settings.cluster, instance_type_class=KubernetesDeploymentConfig, ) expected_count_per_location = int(expected_smartstack_count / len(node_hostname_by_location)) mesh_status: MutableMapping[str, Any] = { "registration": registration, "expected_backends_per_location": expected_count_per_location, "locations": [], } for location, hosts in node_hostname_by_location.items(): host = replication_checker.get_first_host_in_pool(hosts, instance_pool) if service_mesh == ServiceMesh.SMARTSTACK: mesh_status["locations"].append( _build_smartstack_location_dict( synapse_host=host, synapse_port=settings.system_paasta_config. get_synapse_port(), synapse_haproxy_url_format=settings.system_paasta_config. get_synapse_haproxy_url_format(), registration=registration, pods=pods, location=location, should_return_individual_backends= should_return_individual_backends, )) elif service_mesh == ServiceMesh.ENVOY: mesh_status["locations"].append( _build_envoy_location_dict( envoy_host=host, envoy_admin_port=settings.system_paasta_config. get_envoy_admin_port(), envoy_admin_endpoint_format=settings.system_paasta_config. get_envoy_admin_endpoint_format(), registration=registration, pods=pods, location=location, should_return_individual_backends= should_return_individual_backends, )) return mesh_status
def perform_command( command: str, service: str, instance: str, cluster: str, verbose: int, soa_dir: str, clients: marathon_tools.MarathonClients, job_config: marathon_tools.MarathonServiceConfig, app_id: str = None, ) -> int: """Performs a start/stop/restart/status on an instance :param command: String of start, stop, restart, status :param service: service name :param instance: instance name, like "main" or "canary" :param cluster: cluster name :param verbose: int verbosity level :param client: MarathonClient or CachingMarathonClient :returns: A unix-style return code """ system_config = load_system_paasta_config() if not app_id: try: app_id = job_config.format_marathon_app_dict()["id"] except NoDockerImageError: job_id = compose_job_id(service, instance) paasta_print( "Docker image for %s not in deployments.json. Exiting. Has Jenkins deployed it?" % job_id ) return 1 normal_instance_count = job_config.get_instances() current_client = clients.get_current_client_for_service(job_config) if command == "restart": restart_marathon_job(service, instance, app_id, current_client, cluster) elif command == "status": paasta_print( status_desired_state(service, instance, current_client, job_config) ) dashboards = get_marathon_dashboard_links(clients, system_config) tasks, out = status_marathon_job( service=service, instance=instance, cluster=cluster, soa_dir=soa_dir, dashboards=dashboards, normal_instance_count=normal_instance_count, clients=clients, job_config=job_config, desired_app_id=app_id, verbose=verbose, ) paasta_print(out) service_namespace_config = marathon_tools.load_service_namespace_config( service=service, namespace=job_config.get_nerve_namespace(), soa_dir=soa_dir ) paasta_print( status_mesos_tasks(service, instance, normal_instance_count, verbose) ) proxy_port = service_namespace_config.get("proxy_port") if proxy_port is not None: normal_smartstack_count = marathon_tools.get_expected_instance_count_for_namespace( service, instance, cluster ) paasta_print( status_smartstack_backends( service=service, instance=instance, cluster=cluster, job_config=job_config, service_namespace_config=service_namespace_config, tasks=tasks, expected_count=normal_smartstack_count, soa_dir=soa_dir, verbose=verbose > 0, synapse_port=system_config.get_synapse_port(), synapse_haproxy_url_format=system_config.get_synapse_haproxy_url_format(), system_deploy_blacklist=system_config.get_deploy_blacklist(), system_deploy_whitelist=system_config.get_deploy_whitelist(), ) ) else: # The command parser shouldn't have let us get this far... raise NotImplementedError("Command %s is not implemented!" % command) return 0