def __init__(self, redis_address, autoscaling_config, redis_password=None): # Initialize the Redis clients. self.state = ray.experimental.state.GlobalState() redis_ip_address = get_ip_address(args.redis_address) redis_port = get_port(args.redis_address) self.state._initialize_global_state(redis_ip_address, redis_port, redis_password=redis_password) self.redis = ray.services.create_redis_client(redis_address, password=redis_password) # Setup subscriptions to the primary Redis server and the Redis shards. self.primary_subscribe_client = self.redis.pubsub( ignore_subscribe_messages=True) # Keep a mapping from raylet client ID to IP address to use # for updating the load metrics. self.raylet_id_to_ip_map = {} self.load_metrics = LoadMetrics() if autoscaling_config: self.autoscaler = StandardAutoscaler(autoscaling_config, self.load_metrics) else: self.autoscaler = None # Experimental feature: GCS flushing. self.issue_gcs_flushes = "RAY_USE_NEW_GCS" in os.environ self.gcs_flush_policy = None if self.issue_gcs_flushes: # Data is stored under the first data shard, so we issue flushes to # that redis server. addr_port = self.redis.lrange("RedisShards", 0, -1) if len(addr_port) > 1: logger.warning( "Monitor: " "TODO: if launching > 1 redis shard, flushing needs to " "touch shards in parallel.") self.issue_gcs_flushes = False else: addr_port = addr_port[0].split(b":") self.redis_shard = redis.StrictRedis(host=addr_port[0], port=addr_port[1], password=redis_password) try: self.redis_shard.execute_command("HEAD.FLUSH 0") except redis.exceptions.ResponseError as e: logger.info( "Monitor: " "Turning off flushing due to exception: {}".format( str(e))) self.issue_gcs_flushes = False
def __init__(self, redis_address, autoscaling_config, redis_password=None): # Initialize the Redis clients. self.state = ray.experimental.state.GlobalState() redis_ip_address = get_ip_address(args.redis_address) redis_port = get_port(args.redis_address) self.state._initialize_global_state( redis_ip_address, redis_port, redis_password=redis_password) self.redis = ray.services.create_redis_client( redis_address, password=redis_password) # Setup subscriptions to the primary Redis server and the Redis shards. self.primary_subscribe_client = self.redis.pubsub( ignore_subscribe_messages=True) # Keep a mapping from local scheduler client ID to IP address to use # for updating the load metrics. self.local_scheduler_id_to_ip_map = {} self.load_metrics = LoadMetrics() if autoscaling_config: self.autoscaler = StandardAutoscaler(autoscaling_config, self.load_metrics) else: self.autoscaler = None # Experimental feature: GCS flushing. self.issue_gcs_flushes = "RAY_USE_NEW_GCS" in os.environ self.gcs_flush_policy = None if self.issue_gcs_flushes: # Data is stored under the first data shard, so we issue flushes to # that redis server. addr_port = self.redis.lrange("RedisShards", 0, -1) if len(addr_port) > 1: logger.warning( "Monitor: " "TODO: if launching > 1 redis shard, flushing needs to " "touch shards in parallel.") self.issue_gcs_flushes = False else: addr_port = addr_port[0].split(b":") self.redis_shard = redis.StrictRedis( host=addr_port[0], port=addr_port[1], password=redis_password) try: self.redis_shard.execute_command("HEAD.FLUSH 0") except redis.exceptions.ResponseError as e: logger.info( "Monitor: " "Turning off flushing due to exception: {}".format( str(e))) self.issue_gcs_flushes = False
time.sleep(ray._config.heartbeat_timeout_milliseconds() * 1e-3) # TODO(rkn): This infinite loop should be inside of a try/except block, # and if an exception is thrown we should push an error message to all # drivers. if __name__ == "__main__": parser = argparse.ArgumentParser(description=("Parse Redis server for the " "monitor to connect to.")) parser.add_argument("--redis-address", required=True, type=str, help="the address to use for Redis") parser.add_argument("--autoscaling-config", required=False, type=str, help="the path to the autoscaling config file") args = parser.parse_args() redis_ip_address = get_ip_address(args.redis_address) redis_port = get_port(args.redis_address) if args.autoscaling_config: autoscaling_config = os.path.expanduser(args.autoscaling_config) else: autoscaling_config = None monitor = Monitor(redis_ip_address, redis_port, autoscaling_config) monitor.run()
required=False, type=str, default=ray_constants.LOGGER_LEVEL, choices=ray_constants.LOGGER_LEVEL_CHOICES, help=ray_constants.LOGGER_LEVEL_HELP) parser.add_argument( "--logging-format", required=False, type=str, default=ray_constants.LOGGER_FORMAT, help=ray_constants.LOGGER_FORMAT_HELP) args = parser.parse_args() level = logging.getLevelName(args.logging_level.upper()) logging.basicConfig(level=level, format=args.logging_format) redis_ip_address = get_ip_address(args.redis_address) redis_port = get_port(args.redis_address) if args.autoscaling_config: autoscaling_config = os.path.expanduser(args.autoscaling_config) else: autoscaling_config = None monitor = Monitor( redis_ip_address, redis_port, autoscaling_config, redis_password=args.redis_password) try: monitor.run()