def instance_set_state(request, ) -> None: service = request.swagger_data.get("service") instance = request.swagger_data.get("instance") desired_state = request.swagger_data.get("desired_state") try: instance_type = validate_service_instance(service, instance, settings.cluster, settings.soa_dir) except NoConfigurationForServiceError: error_message = "deployment key %s not found" % ".".join( [settings.cluster, instance]) raise ApiFailure(error_message, 404) except Exception: error_message = traceback.format_exc() raise ApiFailure(error_message, 500) if pik.can_set_state(instance_type): try: pik.set_cr_desired_state( kube_client=settings.kubernetes_client, service=service, instance=instance, instance_type=instance_type, desired_state=desired_state, ) except RuntimeError as e: raise ApiFailure(e, 500) else: error_message = ( f"instance_type {instance_type} of {service}.{instance} doesn't " "support set_state") raise ApiFailure(error_message, 500)
def bounce_status(request): service = request.swagger_data.get("service") instance = request.swagger_data.get("instance") try: instance_type = validate_service_instance(service, instance, settings.cluster, settings.soa_dir) except NoConfigurationForServiceError: error_message = no_configuration_for_service_message( settings.cluster, service, instance, ) raise ApiFailure(error_message, 404) except Exception: error_message = traceback.format_exc() raise ApiFailure(error_message, 500) if instance_type != "kubernetes": # We are using HTTP 204 to indicate that the instance exists but has # no bounce status to be returned. The client should just mark the # instance as bounced. response = Response() response.status_int = 204 return response try: return pik.bounce_status(service, instance, settings) except Exception as e: raise ApiFailure(e, 500)
def instance_set_state(request, ) -> None: service = request.swagger_data.get("service") instance = request.swagger_data.get("instance") desired_state = request.swagger_data.get("desired_state") try: instance_type = validate_service_instance(service, instance, settings.cluster, settings.soa_dir) except NoConfigurationForServiceError: error_message = "deployment key %s not found" % ".".join( [settings.cluster, instance]) raise ApiFailure(error_message, 404) except Exception: error_message = traceback.format_exc() raise ApiFailure(error_message, 500) if instance_type == "flink": try: kube_client = KubeClient() set_flink_desired_state( kube_client=kube_client, service=service, instance=instance, desired_state=desired_state, ) except ApiException as e: error_message = f"Error while setting state {desired_state} of {service}.{instance}: {e}" raise ApiFailure(error_message, 500) else: error_message = f"Unknown instance_type {instance_type} of {service}.{instance}" raise ApiFailure(error_message, 404)
def update_autoscaler_count(request): service = request.swagger_data.get("service") instance = request.swagger_data.get("instance") cluster = settings.cluster soa_dir = settings.soa_dir desired_instances = request.swagger_data.get( "json_body")["desired_instances"] if not isinstance(desired_instances, int): error_message = 'The provided body does not have an integer value for "desired_instances": {}'.format( request.swagger_data.get("json_body")) raise ApiFailure(error_message, 500) instance_config = get_instance_config(service, instance, cluster, soa_dir, True) if not isinstance(instance_config, (KubernetesDeploymentConfig, MarathonServiceConfig)): error_message = ( f"Autoscaling is not supported for {service}.{instance} because instance type is not " f"marathon or kubernetes.") raise ApiFailure(error_message, 501) max_instances = instance_config.get_max_instances() if max_instances is None: error_message = f"Autoscaling is not enabled for {service}.{instance}" raise ApiFailure(error_message, 404) min_instances = instance_config.get_min_instances() status = "SUCCESS" if desired_instances > max_instances: desired_instances = max_instances status = ( "WARNING desired_instances is greater than max_instances %d" % max_instances) elif desired_instances < min_instances: desired_instances = min_instances status = ("WARNING desired_instances is less than min_instances %d" % min_instances) try: if isinstance(instance_config, KubernetesDeploymentConfig): instance_config.set_autoscaled_instances( instance_count=desired_instances, kube_client=settings.kubernetes_client) else: instance_config.set_autoscaled_instances( instance_count=desired_instances) except Exception as err: raise ApiFailure(err, 500) response_body = {"desired_instances": desired_instances, "status": status} return Response(json_body=response_body, status_code=202)
def instance_status(request): service = request.swagger_data.get('service') instance = request.swagger_data.get('instance') verbose = request.swagger_data.get('verbose', False) instance_status: Dict[str, Any] = {} instance_status['service'] = service instance_status['instance'] = instance try: actual_deployments = get_actual_deployments(service, settings.soa_dir) except Exception: error_message = traceback.format_exc() raise ApiFailure(error_message, 500) version = get_deployment_version(actual_deployments, settings.cluster, instance) # exit if the deployment key is not found if not version: error_message = 'deployment key %s not found' % '.'.join( [settings.cluster, instance]) raise ApiFailure(error_message, 404) instance_status['git_sha'] = version try: instance_type = validate_service_instance(service, instance, settings.cluster, settings.soa_dir) if instance_type == 'marathon': instance_status['marathon'] = marathon_instance_status( instance_status, service, instance, verbose) elif instance_type == 'chronos': instance_status['chronos'] = chronos_instance_status( instance_status, service, instance, verbose) elif instance_type == 'adhoc': instance_status['adhoc'] = adhoc_instance_status( instance_status, service, instance, verbose) elif instance_type == 'kubernetes': instance_status['kubernetes'] = kubernetes_instance_status( instance_status, service, instance, verbose) else: error_message = f'Unknown instance_type {instance_type} of {service}.{instance}' raise ApiFailure(error_message, 404) except Exception: error_message = traceback.format_exc() raise ApiFailure(error_message, 500) return instance_status
def update_autoscaler_count(request): service = request.swagger_data.get("service") instance = request.swagger_data.get("instance") desired_instances = request.swagger_data.get( "json_body")["desired_instances"] if not isinstance(desired_instances, int): error_message = 'The provided body does not have an integer value for "desired_instances": {}'.format( request.swagger_data.get("json_body")) raise ApiFailure(error_message, 500) try: service_config = load_marathon_service_config( service=service, instance=instance, cluster=settings.cluster, soa_dir=settings.soa_dir, load_deployments=False, ) except Exception: error_message = f"Unable to load service config for {service}.{instance}" raise ApiFailure(error_message, 404) max_instances = service_config.get_max_instances() if max_instances is None: error_message = f"Autoscaling is not enabled for {service}.{instance}" raise ApiFailure(error_message, 404) min_instances = service_config.get_min_instances() # Dump whatever number from the client to zk. get_instances() will limit # readings from zk to [min_instances, max_instances]. set_instances_for_marathon_service(service=service, instance=instance, instance_count=desired_instances) status = "SUCCESS" if desired_instances > max_instances: desired_instances = max_instances status = ( "WARNING desired_instances is greater than max_instances %d" % max_instances) elif desired_instances < min_instances: desired_instances = min_instances status = ("WARNING desired_instances is less than min_instances %d" % min_instances) response_body = {"desired_instances": desired_instances, "status": status} return Response(json_body=response_body, status_code=202)
def delete_service_autoscaler_pause(request): with ZookeeperPool() as zk: try: zk.ensure_path(ZK_PAUSE_AUTOSCALE_PATH) zk.delete(ZK_PAUSE_AUTOSCALE_PATH) except Exception as e: raise ApiFailure(e, 500) return
def get_service_autoscaler_pause(request): with ZookeeperPool() as zk: try: pause_until = zk.get(ZK_PAUSE_AUTOSCALE_PATH)[0].decode("utf8") except (NoNodeError, ValueError): pause_until = "0" except Exception as e: raise ApiFailure(e, 500) return pause_until
def instance_task(request): status = instance_status(request) task_id = request.swagger_data.get('task_id', None) verbose = request.swagger_data.get('verbose', False) try: mstatus = status['marathon'] except KeyError: raise ApiFailure("Only marathon tasks supported", 400) try: task = a_sync.block(get_task, task_id, app_id=mstatus['app_id']) except TaskNotFound: raise ApiFailure(f"Task with id {task_id} not found", 404) except Exception: error_message = traceback.format_exc() raise ApiFailure(error_message, 500) if verbose: task = add_slave_info(task) task = add_executor_info(task) return task._Task__items
def update_autoscaler_count(request): service = request.swagger_data.get('service') instance = request.swagger_data.get('instance') desired_instances = request.swagger_data.get( 'json_body')['desired_instances'] try: service_config = load_marathon_service_config( service=service, instance=instance, cluster=settings.cluster, soa_dir=settings.soa_dir, load_deployments=False, ) except Exception: error_message = 'Unable to load service config for %s.%s' % (service, instance) raise ApiFailure(error_message, 404) max_instances = service_config.get_max_instances() if max_instances is None: error_message = 'Autoscaling is not enabled for %s.%s' % (service, instance) raise ApiFailure(error_message, 404) min_instances = service_config.get_min_instances() # Dump whatever number from the client to zk. get_instances() will limit # readings from zk to [min_instances, max_instances]. set_instances_for_marathon_service(service=service, instance=instance, instance_count=desired_instances) status = 'SUCCESS' if desired_instances > max_instances: desired_instances = max_instances status = 'WARNING desired_instances is greater than max_instances %d' % max_instances elif desired_instances < min_instances: desired_instances = min_instances status = 'WARNING desired_instances is less than min_instances %d' % min_instances response_body = {'desired_instances': desired_instances, 'status': status} return Response(json_body=response_body, status_code=202)
def update_service_autoscaler_pause(request): minutes = request.swagger_data.get("json_body")["minutes"] current_time = time.time() expiry_time = current_time + minutes * 60 with ZookeeperPool() as zk: try: zk.ensure_path(ZK_PAUSE_AUTOSCALE_PATH) zk.set(ZK_PAUSE_AUTOSCALE_PATH, str(expiry_time).encode("utf-8")) except Exception as e: raise ApiFailure(e, 500) return
def instance_mesh_status(request): service = request.swagger_data.get("service") instance = request.swagger_data.get("instance") include_smartstack = request.swagger_data.get("include_smartstack") include_envoy = request.swagger_data.get("include_envoy") instance_mesh: Dict[str, Any] = {} instance_mesh["service"] = service instance_mesh["instance"] = instance try: instance_type = validate_service_instance(service, instance, settings.cluster, settings.soa_dir) except NoConfigurationForServiceError: error_message = ( f"No instance named '{compose_job_id(service, instance)}' has been " f"configured to run in the {settings.cluster} cluster") raise ApiFailure(error_message, 404) except Exception: error_message = traceback.format_exc() raise ApiFailure(error_message, 500) try: instance_mesh.update( pik.kubernetes_mesh_status( service=service, instance=instance, instance_type=instance_type, settings=settings, include_smartstack=include_smartstack, include_envoy=include_envoy, )) except RuntimeError as e: raise ApiFailure(str(e), 405) except Exception: error_message = traceback.format_exc() raise ApiFailure(error_message, 500) return instance_mesh
def instance_set_state(request,) -> None: service = request.swagger_data.get("service") instance = request.swagger_data.get("instance") desired_state = request.swagger_data.get("desired_state") try: instance_type = validate_service_instance( service, instance, settings.cluster, settings.soa_dir ) except NoConfigurationForServiceError: error_message = "deployment key %s not found" % ".".join( [settings.cluster, instance] ) raise ApiFailure(error_message, 404) except Exception: error_message = traceback.format_exc() raise ApiFailure(error_message, 500) if instance_type in INSTANCE_TYPES_WITH_SET_STATE: try: cr_id_fn = cr_id_fn_for_instance_type(instance_type) kube_client = KubeClient() kubernetes_tools.set_cr_desired_state( kube_client=kube_client, cr_id=cr_id_fn(service=service, instance=instance), desired_state=desired_state, ) except ApiException as e: error_message = ( f"Error while setting state {desired_state} of " f"{service}.{instance}: {e}" ) raise ApiFailure(error_message, 500) else: error_message = ( f"instance_type {instance_type} of {service}.{instance} doesn't " f"support set_state, must be in INSTANCE_TYPES_WITH_SET_STATE, " f"currently: {INSTANCE_TYPES_WITH_SET_STATE}" ) raise ApiFailure(error_message, 404)
def instance_tasks(request): status = instance_status(request) slave_hostname = request.swagger_data.get('slave_hostname', None) verbose = request.swagger_data.get('verbose', False) try: mstatus = status['marathon'] except KeyError: raise ApiFailure("Only marathon tasks supported", 400) tasks = get_tasks_from_app_id(mstatus['app_id'], slave_hostname=slave_hostname) if verbose: tasks = [add_executor_info(task) for task in tasks] tasks = [add_slave_info(task) for task in tasks] return [task._Task__items for task in tasks]
def instance_tasks(request): status = instance_status(request) slave_hostname = request.swagger_data.get("slave_hostname", None) verbose = request.swagger_data.get("verbose", False) try: mstatus = status["marathon"] except KeyError: raise ApiFailure("Only marathon tasks supported", 400) tasks = a_sync.block(get_tasks_from_app_id, mstatus["desired_app_id"], slave_hostname=slave_hostname) if verbose: tasks = [add_executor_info(task) for task in tasks] tasks = [add_slave_info(task) for task in tasks] return [task._Task__items for task in tasks]
def get_autoscaler_count(request): service = request.swagger_data.get('service') instance = request.swagger_data.get('instance') cluster = settings.cluster soa_dir = settings.soa_dir try: service_config = load_marathon_service_config( service=service, instance=instance, cluster=cluster, soa_dir=soa_dir, load_deployments=False, ) except Exception: error_message = 'Unable to load service config for %s.%s' % (service, instance) raise ApiFailure(error_message, 404) response_body = {'desired_instances': service_config.get_instances()} return Response(json_body=response_body, status_code=200)
def get_autoscaler_count(request): service = request.swagger_data.get("service") instance = request.swagger_data.get("instance") cluster = settings.cluster soa_dir = settings.soa_dir instance_config = get_instance_config(service, instance, cluster, soa_dir) if not isinstance(instance_config, (KubernetesDeploymentConfig, MarathonServiceConfig)): error_message = ( f"Autoscaling is not supported for {service}.{instance} because instance type is not " f"marathon or kubernetes.") raise ApiFailure(error_message, 501) response_body = { "desired_instances": instance_config.get_instances(), "calculated_instances": instance_config.get_instances(with_limit=False), } return Response(json_body=response_body, status_code=200)
def get_autoscaler_count(request): service = request.swagger_data.get("service") instance = request.swagger_data.get("instance") cluster = settings.cluster soa_dir = settings.soa_dir try: service_config = load_marathon_service_config( service=service, instance=instance, cluster=cluster, soa_dir=soa_dir, load_deployments=False, ) except Exception: error_message = f"Unable to load service config for {service}.{instance}" raise ApiFailure(error_message, 404) response_body = { "desired_instances": service_config.get_instances(), "calculated_instances": service_config.get_instances(with_limit=False), } return Response(json_body=response_body, status_code=200)
def instance_status(request): service = request.swagger_data.get("service") instance = request.swagger_data.get("instance") verbose = request.swagger_data.get("verbose") or 0 use_new = request.swagger_data.get("new") or False include_smartstack = request.swagger_data.get("include_smartstack") if include_smartstack is None: include_smartstack = True include_envoy = request.swagger_data.get("include_envoy") if include_envoy is None: include_envoy = True include_mesos = request.swagger_data.get("include_mesos") if include_mesos is None: include_mesos = True instance_status: Dict[str, Any] = {} instance_status["service"] = service instance_status["instance"] = instance try: instance_type = validate_service_instance(service, instance, settings.cluster, settings.soa_dir) except NoConfigurationForServiceError: error_message = no_configuration_for_service_message( settings.cluster, service, instance, ) raise ApiFailure(error_message, 404) except Exception: error_message = traceback.format_exc() raise ApiFailure(error_message, 500) if instance_type != "tron": try: actual_deployments = get_actual_deployments( service, settings.soa_dir) except Exception: error_message = traceback.format_exc() raise ApiFailure(error_message, 500) version = get_deployment_version(actual_deployments, settings.cluster, instance) # exit if the deployment key is not found if not version: error_message = ( "Deployment key %s not found. Try to execute the corresponding pipeline if it's a fresh instance" % ".".join([settings.cluster, instance])) raise ApiFailure(error_message, 404) instance_status["git_sha"] = version else: instance_status["git_sha"] = "" try: if instance_type == "marathon": instance_status["marathon"] = marathon_instance_status( instance_status, service, instance, verbose, include_smartstack=include_smartstack, include_envoy=include_envoy, include_mesos=include_mesos, ) elif instance_type == "adhoc": instance_status["adhoc"] = adhoc_instance_status( instance_status, service, instance, verbose) elif pik.can_handle(instance_type): instance_status.update( pik.instance_status( service=service, instance=instance, verbose=verbose, include_smartstack=include_smartstack, include_envoy=include_envoy, use_new=use_new, instance_type=instance_type, settings=settings, )) elif instance_type == "tron": instance_status["tron"] = tron_instance_status( instance_status, service, instance, verbose) else: error_message = ( f"Unknown instance_type {instance_type} of {service}.{instance}" ) raise ApiFailure(error_message, 404) except Exception: error_message = traceback.format_exc() raise ApiFailure(error_message, 500) return instance_status
def cr_id_fn_for_instance_type(instance_type: str): if instance_type not in INSTANCE_TYPE_CR_ID: raise ApiFailure(f"Error looking up cr_id function for {instance_type}", 500) return INSTANCE_TYPE_CR_ID[instance_type]
def instance_status(request): service = request.swagger_data.get("service") instance = request.swagger_data.get("instance") verbose = request.swagger_data.get("verbose") or 0 include_smartstack = request.swagger_data.get("include_smartstack") if include_smartstack is None: include_smartstack = True include_mesos = request.swagger_data.get("include_mesos") if include_mesos is None: include_mesos = True instance_status: Dict[str, Any] = {} instance_status["service"] = service instance_status["instance"] = instance try: instance_type = validate_service_instance( service, instance, settings.cluster, settings.soa_dir ) except NoConfigurationForServiceError: error_message = ( "Deployment key %s not found. Try to execute the corresponding pipeline if it's a fresh instance" % ".".join([settings.cluster, instance]) ) raise ApiFailure(error_message, 404) except Exception: error_message = traceback.format_exc() raise ApiFailure(error_message, 500) if instance_type != "tron": try: actual_deployments = get_actual_deployments(service, settings.soa_dir) except Exception: error_message = traceback.format_exc() raise ApiFailure(error_message, 500) version = get_deployment_version(actual_deployments, settings.cluster, instance) # exit if the deployment key is not found if not version: error_message = ( "Deployment key %s not found. Try to execute the corresponding pipeline if it's a fresh instance" % ".".join([settings.cluster, instance]) ) raise ApiFailure(error_message, 404) instance_status["git_sha"] = version else: instance_status["git_sha"] = "" try: if instance_type == "marathon": instance_status["marathon"] = marathon_instance_status( instance_status, service, instance, verbose, include_smartstack=include_smartstack, include_mesos=include_mesos, ) elif instance_type == "adhoc": instance_status["adhoc"] = adhoc_instance_status( instance_status, service, instance, verbose ) elif instance_type == "kubernetes": instance_status["kubernetes"] = kubernetes_instance_status( instance_status, service, instance, verbose, include_smartstack=include_smartstack, instance_type=instance_type, ) elif instance_type == "tron": instance_status["tron"] = tron_instance_status( instance_status, service, instance, verbose ) elif instance_type in INSTANCE_TYPES_K8S: cr_id_fn = cr_id_fn_for_instance_type(instance_type) cr_id = cr_id_fn(service, instance) status = kubernetes_cr_status(cr_id, verbose) metadata = kubernetes_cr_metadata(cr_id, verbose) instance_status[instance_type] = {} if status is not None: instance_status[instance_type]["status"] = status if metadata is not None: instance_status[instance_type]["metadata"] = metadata else: error_message = ( f"Unknown instance_type {instance_type} of {service}.{instance}" ) raise ApiFailure(error_message, 404) if instance_type == "cassandracluster": instance_status["kubernetes"] = kubernetes_instance_status( instance_status, service, instance, verbose, include_smartstack=include_smartstack, instance_type=instance_type, ) except Exception: error_message = traceback.format_exc() raise ApiFailure(error_message, 500) return instance_status
def instance_status(request): service = request.swagger_data.get("service") instance = request.swagger_data.get("instance") verbose = request.swagger_data.get("verbose") or 0 omit_smartstack = request.swagger_data.get("omit_smartstack") or False omit_mesos = request.swagger_data.get("omit_mesos") or False instance_status: Dict[str, Any] = {} instance_status["service"] = service instance_status["instance"] = instance try: instance_type = validate_service_instance(service, instance, settings.cluster, settings.soa_dir) except NoConfigurationForServiceError: error_message = "deployment key %s not found" % ".".join( [settings.cluster, instance]) raise ApiFailure(error_message, 404) except Exception: error_message = traceback.format_exc() raise ApiFailure(error_message, 500) if instance_type != "flink" and instance_type != "tron": try: actual_deployments = get_actual_deployments( service, settings.soa_dir) except Exception: error_message = traceback.format_exc() raise ApiFailure(error_message, 500) version = get_deployment_version(actual_deployments, settings.cluster, instance) # exit if the deployment key is not found if not version: error_message = "deployment key %s not found" % ".".join( [settings.cluster, instance]) raise ApiFailure(error_message, 404) instance_status["git_sha"] = version else: instance_status["git_sha"] = "" try: if instance_type == "marathon": instance_status["marathon"] = marathon_instance_status( instance_status, service, instance, verbose, omit_smartstack=omit_smartstack, omit_mesos=omit_mesos, ) elif instance_type == "chronos": if verbose: instance_status["chronos"] = chronos_instance_status( service, instance, 1) else: instance_status["chronos"] = chronos_instance_status( service, instance, 0) elif instance_type == "adhoc": instance_status["adhoc"] = adhoc_instance_status( instance_status, service, instance, verbose) elif instance_type == "kubernetes": instance_status["kubernetes"] = kubernetes_instance_status( instance_status, service, instance, verbose) elif instance_type == "tron": instance_status["tron"] = tron_instance_status( instance_status, service, instance, verbose) elif instance_type == "flink": status = flink_instance_status(instance_status, service, instance, verbose) if status is not None: instance_status["flink"] = {"status": status} else: instance_status["flink"] = {} else: error_message = ( f"Unknown instance_type {instance_type} of {service}.{instance}" ) raise ApiFailure(error_message, 404) except Exception: error_message = traceback.format_exc() raise ApiFailure(error_message, 500) return instance_status