def setUp(self): # Start one Redis server and N pairs of (plasma, local_scheduler) self.node_ip_address = "127.0.0.1" redis_address, redis_shards = services.start_redis( self.node_ip_address) redis_port = services.get_port(redis_address) time.sleep(0.1) # Create a client for the global state store. self.state = state.GlobalState() self.state._initialize_global_state(self.node_ip_address, redis_port) # Start one global scheduler. self.p1 = global_scheduler.start_global_scheduler( redis_address, self.node_ip_address, use_valgrind=USE_VALGRIND) self.plasma_store_pids = [] self.plasma_manager_pids = [] self.local_scheduler_pids = [] self.plasma_clients = [] self.local_scheduler_clients = [] for i in range(NUM_CLUSTER_NODES): # Start the Plasma store. Plasma store name is randomly generated. plasma_store_name, p2 = plasma.start_plasma_store() self.plasma_store_pids.append(p2) # Start the Plasma manager. # Assumption: Plasma manager name and port are randomly generated # by the plasma module. manager_info = plasma.start_plasma_manager(plasma_store_name, redis_address) plasma_manager_name, p3, plasma_manager_port = manager_info self.plasma_manager_pids.append(p3) plasma_address = "{}:{}".format(self.node_ip_address, plasma_manager_port) plasma_client = pa.plasma.connect(plasma_store_name, plasma_manager_name, 64) self.plasma_clients.append(plasma_client) # Start the local scheduler. local_scheduler_name, p4 = local_scheduler.start_local_scheduler( plasma_store_name, plasma_manager_name=plasma_manager_name, plasma_address=plasma_address, redis_address=redis_address, static_resources={"CPU": 10}) # Connect to the scheduler. local_scheduler_client = local_scheduler.LocalSchedulerClient( local_scheduler_name, NIL_WORKER_ID, False, random_task_id(), False) self.local_scheduler_clients.append(local_scheduler_client) self.local_scheduler_pids.append(p4)
def __init__(self, redis_address, autoscaling_config, redis_password=None): # Initialize the Redis clients. self.state = ray.experimental.state.GlobalState() redis_ip_address = get_ip_address(args.redis_address) redis_port = get_port(args.redis_address) self.state._initialize_global_state(redis_ip_address, redis_port, redis_password=redis_password) self.redis = ray.services.create_redis_client(redis_address, password=redis_password) # Setup subscriptions to the primary Redis server and the Redis shards. self.primary_subscribe_client = self.redis.pubsub( ignore_subscribe_messages=True) # Keep a mapping from raylet client ID to IP address to use # for updating the load metrics. self.raylet_id_to_ip_map = {} self.load_metrics = LoadMetrics() if autoscaling_config: self.autoscaler = StandardAutoscaler(autoscaling_config, self.load_metrics) else: self.autoscaler = None # Experimental feature: GCS flushing. self.issue_gcs_flushes = "RAY_USE_NEW_GCS" in os.environ self.gcs_flush_policy = None if self.issue_gcs_flushes: # Data is stored under the first data shard, so we issue flushes to # that redis server. addr_port = self.redis.lrange("RedisShards", 0, -1) if len(addr_port) > 1: logger.warning( "Monitor: " "TODO: if launching > 1 redis shard, flushing needs to " "touch shards in parallel.") self.issue_gcs_flushes = False else: addr_port = addr_port[0].split(b":") self.redis_shard = redis.StrictRedis(host=addr_port[0], port=addr_port[1], password=redis_password) try: self.redis_shard.execute_command("HEAD.FLUSH 0") except redis.exceptions.ResponseError as e: logger.info( "Monitor: " "Turning off flushing due to exception: {}".format( str(e))) self.issue_gcs_flushes = False
def __init__(self, redis_address, autoscaling_config, redis_password=None): # Initialize the Redis clients. self.state = ray.experimental.state.GlobalState() redis_ip_address = get_ip_address(args.redis_address) redis_port = get_port(args.redis_address) self.state._initialize_global_state( redis_ip_address, redis_port, redis_password=redis_password) self.redis = ray.services.create_redis_client( redis_address, password=redis_password) # Setup subscriptions to the primary Redis server and the Redis shards. self.primary_subscribe_client = self.redis.pubsub( ignore_subscribe_messages=True) # Keep a mapping from local scheduler client ID to IP address to use # for updating the load metrics. self.local_scheduler_id_to_ip_map = {} self.load_metrics = LoadMetrics() if autoscaling_config: self.autoscaler = StandardAutoscaler(autoscaling_config, self.load_metrics) else: self.autoscaler = None # Experimental feature: GCS flushing. self.issue_gcs_flushes = "RAY_USE_NEW_GCS" in os.environ self.gcs_flush_policy = None if self.issue_gcs_flushes: # Data is stored under the first data shard, so we issue flushes to # that redis server. addr_port = self.redis.lrange("RedisShards", 0, -1) if len(addr_port) > 1: logger.warning( "Monitor: " "TODO: if launching > 1 redis shard, flushing needs to " "touch shards in parallel.") self.issue_gcs_flushes = False else: addr_port = addr_port[0].split(b":") self.redis_shard = redis.StrictRedis( host=addr_port[0], port=addr_port[1], password=redis_password) try: self.redis_shard.execute_command("HEAD.FLUSH 0") except redis.exceptions.ResponseError as e: logger.info( "Monitor: " "Turning off flushing due to exception: {}".format( str(e))) self.issue_gcs_flushes = False
def setUp(self): # Start one Redis server and N pairs of (plasma, local_scheduler) self.node_ip_address = "127.0.0.1" redis_address, redis_shards = services.start_redis( self.node_ip_address) redis_port = services.get_port(redis_address) time.sleep(0.1) # Create a client for the global state store. self.state = state.GlobalState() self.state._initialize_global_state(self.node_ip_address, redis_port) # Start one global scheduler. self.p1 = global_scheduler.start_global_scheduler( redis_address, self.node_ip_address, use_valgrind=USE_VALGRIND) self.plasma_store_pids = [] self.plasma_manager_pids = [] self.local_scheduler_pids = [] self.plasma_clients = [] self.local_scheduler_clients = [] for i in range(NUM_CLUSTER_NODES): # Start the Plasma store. Plasma store name is randomly generated. plasma_store_name, p2 = plasma.start_plasma_store() self.plasma_store_pids.append(p2) # Start the Plasma manager. # Assumption: Plasma manager name and port are randomly generated # by the plasma module. manager_info = plasma.start_plasma_manager(plasma_store_name, redis_address) plasma_manager_name, p3, plasma_manager_port = manager_info self.plasma_manager_pids.append(p3) plasma_address = "{}:{}".format(self.node_ip_address, plasma_manager_port) plasma_client = pa.plasma.connect(plasma_store_name, plasma_manager_name, 64) self.plasma_clients.append(plasma_client) # Start the local scheduler. local_scheduler_name, p4 = local_scheduler.start_local_scheduler( plasma_store_name, plasma_manager_name=plasma_manager_name, plasma_address=plasma_address, redis_address=redis_address, static_resources={"CPU": 10}) # Connect to the scheduler. local_scheduler_client = local_scheduler.LocalSchedulerClient( local_scheduler_name, NIL_WORKER_ID, NIL_ACTOR_ID, False, 0) self.local_scheduler_clients.append(local_scheduler_client) self.local_scheduler_pids.append(p4)
time.sleep(ray._config.heartbeat_timeout_milliseconds() * 1e-3) # TODO(rkn): This infinite loop should be inside of a try/except block, # and if an exception is thrown we should push an error message to all # drivers. if __name__ == "__main__": parser = argparse.ArgumentParser(description=("Parse Redis server for the " "monitor to connect to.")) parser.add_argument("--redis-address", required=True, type=str, help="the address to use for Redis") parser.add_argument("--autoscaling-config", required=False, type=str, help="the path to the autoscaling config file") args = parser.parse_args() redis_ip_address = get_ip_address(args.redis_address) redis_port = get_port(args.redis_address) if args.autoscaling_config: autoscaling_config = os.path.expanduser(args.autoscaling_config) else: autoscaling_config = None monitor = Monitor(redis_ip_address, redis_port, autoscaling_config) monitor.run()
type=str, default=ray_constants.LOGGER_LEVEL, choices=ray_constants.LOGGER_LEVEL_CHOICES, help=ray_constants.LOGGER_LEVEL_HELP) parser.add_argument( "--logging-format", required=False, type=str, default=ray_constants.LOGGER_FORMAT, help=ray_constants.LOGGER_FORMAT_HELP) args = parser.parse_args() level = logging.getLevelName(args.logging_level.upper()) logging.basicConfig(level=level, format=args.logging_format) redis_ip_address = get_ip_address(args.redis_address) redis_port = get_port(args.redis_address) if args.autoscaling_config: autoscaling_config = os.path.expanduser(args.autoscaling_config) else: autoscaling_config = None monitor = Monitor( redis_ip_address, redis_port, autoscaling_config, redis_password=args.redis_password) try: monitor.run() except Exception as e: