def test_external_storage_namespace_isolation(shutdown_only): addr = ray.init(namespace="a", _system_config={ "external_storage_namespace": "c1" }).address_info["address"] gcs_client = GcsClient(address=addr) assert gcs_client.internal_kv_put(b"ABC", b"DEF", True, None) == 1 assert gcs_client.internal_kv_get(b"ABC", None) == b"DEF" ray.shutdown() addr = ray.init(namespace="a", _system_config={ "external_storage_namespace": "c2" }).address_info["address"] gcs_client = GcsClient(address=addr) assert gcs_client.internal_kv_get(b"ABC", None) is None assert gcs_client.internal_kv_put(b"ABC", b"XYZ", True, None) == 1 assert gcs_client.internal_kv_get(b"ABC", None) == b"XYZ" ray.shutdown() addr = ray.init(namespace="a", _system_config={ "external_storage_namespace": "c1" }).address_info["address"] gcs_client = GcsClient(address=addr) assert gcs_client.internal_kv_get(b"ABC", None) == b"DEF"
def __init__( self, address: str, autoscaling_config: Union[str, Callable[[], Dict[str, Any]]], redis_password: Optional[str] = None, prefix_cluster_info: bool = False, monitor_ip: Optional[str] = None, stop_event: Optional[Event] = None, retry_on_failure: bool = True, ): gcs_address = address options = (("grpc.enable_http_proxy", 0), ) gcs_channel = ray._private.utils.init_grpc_channel( gcs_address, options) # TODO: Use gcs client for this self.gcs_node_resources_stub = ( gcs_service_pb2_grpc.NodeResourceInfoGcsServiceStub(gcs_channel)) self.gcs_node_info_stub = gcs_service_pb2_grpc.NodeInfoGcsServiceStub( gcs_channel) if redis_password is not None: logger.warning("redis_password has been deprecated.") # Set the redis client and mode so _internal_kv works for autoscaler. worker = ray.worker.global_worker gcs_client = GcsClient(address=gcs_address) if monitor_ip: monitor_addr = f"{monitor_ip}:{AUTOSCALER_METRIC_PORT}" gcs_client.internal_kv_put(b"AutoscalerMetricsAddress", monitor_addr.encode(), True, None) _initialize_internal_kv(gcs_client) if monitor_ip: monitor_addr = f"{monitor_ip}:{AUTOSCALER_METRIC_PORT}" gcs_client.internal_kv_put(b"AutoscalerMetricsAddress", monitor_addr.encode(), True, None) worker.mode = 0 head_node_ip = gcs_address.split(":")[0] self.load_metrics = LoadMetrics() self.last_avail_resources = None self.event_summarizer = EventSummarizer() self.prefix_cluster_info = prefix_cluster_info # Can be used to signal graceful exit from monitor loop. self.stop_event = stop_event # type: Optional[Event] self.retry_on_failure = retry_on_failure self.autoscaling_config = autoscaling_config self.autoscaler = None # If set, we are in a manually created cluster (non-autoscaling) and # simply mirroring what the GCS tells us the cluster node types are. self.readonly_config = None self.prom_metrics = AutoscalerPrometheusMetrics() if monitor_ip and prometheus_client: # If monitor_ip wasn't passed in, then don't attempt to start the # metric server to keep behavior identical to before metrics were # introduced try: logger.info( "Starting autoscaler metrics server on port {}".format( AUTOSCALER_METRIC_PORT)) prometheus_client.start_http_server( port=AUTOSCALER_METRIC_PORT, addr="127.0.0.1" if head_node_ip == "127.0.0.1" else "", registry=self.prom_metrics.registry, ) except Exception: logger.exception( "An exception occurred while starting the metrics server.") elif not prometheus_client: logger.warning( "`prometheus_client` not found, so metrics will not be exported." ) logger.info("Monitor: Started")
class RayInternalKVStore(KVStoreBase): """Wraps ray's internal_kv with a namespace to avoid collisions. Supports string keys and bytes values, caller must handle serialization. """ def __init__( self, namespace: str = None, ): if namespace is not None and not isinstance(namespace, str): raise TypeError("namespace must a string, got: {}.".format( type(namespace))) self.gcs_client = GcsClient( address=ray.get_runtime_context().gcs_address) self.timeout = RAY_SERVE_KV_TIMEOUT_S self.namespace = namespace or "" def get_storage_key(self, key: str) -> str: return "{ns}-{key}".format(ns=self.namespace, key=key) def put(self, key: str, val: bytes) -> bool: """Put the key-value pair into the store. Args: key (str) val (bytes) """ if not isinstance(key, str): raise TypeError("key must be a string, got: {}.".format(type(key))) if not isinstance(val, bytes): raise TypeError("val must be bytes, got: {}.".format(type(val))) try: return self.gcs_client.internal_kv_put( self.get_storage_key(key).encode(), val, overwrite=True, namespace=ray_constants.KV_NAMESPACE_SERVE, timeout=self.timeout, ) except Exception as e: raise KVStoreError(e.code()) def get(self, key: str) -> Optional[bytes]: """Get the value associated with the given key from the store. Args: key (str) Returns: The bytes value. If the key wasn't found, returns None. """ if not isinstance(key, str): raise TypeError("key must be a string, got: {}.".format(type(key))) try: return self.gcs_client.internal_kv_get( self.get_storage_key(key).encode(), namespace=ray_constants.KV_NAMESPACE_SERVE, timeout=self.timeout, ) except Exception as e: raise KVStoreError(e.code()) def delete(self, key: str): """Delete the value associated with the given key from the store. Args: key (str) """ if not isinstance(key, str): raise TypeError("key must be a string, got: {}.".format(type(key))) try: return self.gcs_client.internal_kv_del( self.get_storage_key(key).encode(), False, namespace=ray_constants.KV_NAMESPACE_SERVE, timeout=self.timeout, ) except Exception as e: raise KVStoreError(e.code())
def __init__(self, address, autoscaling_config, redis_password=None, prefix_cluster_info=False, monitor_ip=None, stop_event: Optional[Event] = None): if not use_gcs_for_bootstrap(): # Initialize the Redis clients. redis_address = address self.redis = ray._private.services.create_redis_client( redis_address, password=redis_password) (ip, port) = address.split(":") # Initialize the gcs stub for getting all node resource usage. gcs_address = self.redis.get("GcsServerAddress").decode("utf-8") else: gcs_address = address redis_address = None options = (("grpc.enable_http_proxy", 0), ) gcs_channel = ray._private.utils.init_grpc_channel( gcs_address, options) # TODO: Use gcs client for this self.gcs_node_resources_stub = \ gcs_service_pb2_grpc.NodeResourceInfoGcsServiceStub(gcs_channel) self.gcs_node_info_stub = \ gcs_service_pb2_grpc.NodeInfoGcsServiceStub(gcs_channel) # Set the redis client and mode so _internal_kv works for autoscaler. worker = ray.worker.global_worker if use_gcs_for_bootstrap(): gcs_client = GcsClient(address=gcs_address) else: worker.redis_client = self.redis gcs_client = GcsClient.create_from_redis(self.redis) if monitor_ip: monitor_addr = f"{monitor_ip}:{AUTOSCALER_METRIC_PORT}" if use_gcs_for_bootstrap(): gcs_client.internal_kv_put(b"AutoscalerMetricsAddress", monitor_addr.encode(), True, None) else: self.redis.set("AutoscalerMetricsAddress", monitor_addr) _initialize_internal_kv(gcs_client) if monitor_ip: monitor_addr = f"{monitor_ip}:{AUTOSCALER_METRIC_PORT}" if use_gcs_for_bootstrap(): gcs_client.internal_kv_put(b"AutoscalerMetricsAddress", monitor_addr.encode(), True, None) else: self.redis.set("AutoscalerMetricsAddress", monitor_addr) worker.mode = 0 if use_gcs_for_bootstrap(): head_node_ip = gcs_address.split(":")[0] else: head_node_ip = redis_address.split(":")[0] self.redis_address = redis_address self.redis_password = redis_password self.load_metrics = LoadMetrics() self.last_avail_resources = None self.event_summarizer = EventSummarizer() self.prefix_cluster_info = prefix_cluster_info # Can be used to signal graceful exit from monitor loop. self.stop_event = stop_event # type: Optional[Event] self.autoscaling_config = autoscaling_config self.autoscaler = None # If set, we are in a manually created cluster (non-autoscaling) and # simply mirroring what the GCS tells us the cluster node types are. self.readonly_config = None self.prom_metrics = AutoscalerPrometheusMetrics() if monitor_ip and prometheus_client: # If monitor_ip wasn't passed in, then don't attempt to start the # metric server to keep behavior identical to before metrics were # introduced try: logger.info( "Starting autoscaler metrics server on port {}".format( AUTOSCALER_METRIC_PORT)) prometheus_client.start_http_server( port=AUTOSCALER_METRIC_PORT, addr="127.0.0.1" if head_node_ip == "127.0.0.1" else "", registry=self.prom_metrics.registry) except Exception: logger.exception( "An exception occurred while starting the metrics server.") elif not prometheus_client: logger.warning("`prometheus_client` not found, so metrics will " "not be exported.") logger.info("Monitor: Started")