def _get_resources_per_node(): """ Maps node id to available resources on that node :return: dict with available :class:`Resources` for each node """ def _is_node_key(k): return k.startswith(ray.resource_spec.NODE_ID_PREFIX) # Only consider active/alive nodes nodes = filter(lambda node: node["Alive"], ray.nodes()) resources = map(lambda node: node["Resources"], nodes) # Group resources by node resources_by_node = {} for item in resources: node_id = next(filter(_is_node_key, item.keys())) item = item.copy() num_cpus = item.pop("CPU", 0) num_gpus = item.pop("GPU", 0) memory = ray_constants.from_memory_units(item.pop("memory", 0)) object_store_memory = ray_constants.from_memory_units( item.pop("object_store_memory", 0)) custom_resources = item resources_by_node[node_id] = Resources( int(num_cpus), int(num_gpus), memory=int(memory), object_store_memory=int(object_store_memory), custom_resources=custom_resources) return resources_by_node
def _update_avail_resources(self, num_retries=5): resources = None for i in range(num_retries): if i > 0: logger.warning( "Cluster resources not detected or are 0. Attempt #" "%s...", i + 1) time.sleep(0.5) try: resources = ray.cluster_resources() except Exception: # TODO(rliaw): Remove this when local mode is fixed. # https://github.com/ray-project/ray/issues/4147 logger.debug("Using resources for local machine.") resources = ResourceSpec().resolve(True).to_resource_dict() if resources: break if not resources: # NOTE: This hides the possibility that Ray may be waiting for # clients to connect. resources.setdefault("CPU", 0) resources.setdefault("GPU", 0) logger.warning("Cluster resources cannot be detected or are 0. " "You can resume this experiment by passing in " "`resume=True` to `run`.") resources = resources.copy() num_cpus = resources.pop("CPU", 0) num_gpus = resources.pop("GPU", 0) memory = ray_constants.from_memory_units(resources.pop("memory", 0)) object_store_memory = ray_constants.from_memory_units( resources.pop("object_store_memory", 0)) custom_resources = resources if num_gpus == 0: warnings.warn( "No GPU resources found, assuming local test, using CPU resources instead" ) # local test num_gpus = num_cpus self._fake_gpus = True else: self._fake_gpus = False avail_resources = Resources( int(num_cpus), int(num_gpus), memory=int(memory), object_store_memory=int(object_store_memory), custom_resources=custom_resources, ) assert (self.idle_resources.is_nonnegative() ), "Cluster removed resources from running trials!" self._avail_resources = avail_resources self._last_resource_refresh = time.time() self._resources_initialized = True
def _update_avail_resources(self, num_retries=5): if time.time() - self._last_resource_refresh < self._refresh_period: return logger.debug("Checking Ray cluster resources.") resources = None for i in range(num_retries): if i > 0: logger.warning( "Cluster resources not detected or are 0. Attempt #" "%s...", i + 1) time.sleep(0.5) try: resources = ray.cluster_resources() except Exception as exc: # TODO(rliaw): Remove this when local mode is fixed. # https://github.com/ray-project/ray/issues/4147 logger.debug(f"{exc}: Using resources for local machine.") resources = ResourceSpec().resolve(True).to_resource_dict() if resources: break if not resources: # NOTE: This hides the possibility that Ray may be waiting for # clients to connect. resources.setdefault("CPU", 0) resources.setdefault("GPU", 0) logger.warning("Cluster resources cannot be detected or are 0. " "You can resume this experiment by passing in " "`resume=True` to `run`.") resources = resources.copy() num_cpus = resources.pop("CPU", 0) num_gpus = resources.pop("GPU", 0) memory = ray_constants.from_memory_units(resources.pop("memory", 0)) object_store_memory = ray_constants.from_memory_units( resources.pop("object_store_memory", 0)) custom_resources = resources self._avail_resources = Resources( int(num_cpus), int(num_gpus), memory=int(memory), object_store_memory=int(object_store_memory), custom_resources=custom_resources) self._last_resource_refresh = time.time() self._resources_initialized = True
def _update_avail_resources(self, num_retries=5): if time.time() - self._last_resource_refresh < self._refresh_period: return logger.debug("Checking Ray cluster resources.") resources = None for i in range(num_retries): if i > 0: logger.warning( "Cluster resources not detected or are 0. Attempt #" "%s...", i + 1 ) time.sleep(0.5) resources = ray.cluster_resources() if resources: break if not resources: # NOTE: This hides the possibility that Ray may be waiting for # clients to connect. resources.setdefault("CPU", 0) resources.setdefault("GPU", 0) logger.warning( "Cluster resources cannot be detected or are 0. " "You can resume this experiment by passing in " "`resume=True` to `run`." ) resources = resources.copy() num_cpus = resources.pop("CPU", 0) num_gpus = resources.pop("GPU", 0) memory = ray_constants.from_memory_units(resources.pop("memory", 0)) object_store_memory = ray_constants.from_memory_units( resources.pop("object_store_memory", 0) ) custom_resources = resources self._avail_resources = Resources( int(num_cpus), int(num_gpus), memory=int(memory), object_store_memory=int(object_store_memory), custom_resources=custom_resources, ) self._last_resource_refresh = time.time() self._resources_initialized = True
def limit_input_deltas(input_deltas: List[Dict[str, Any]], cluster_resources: Dict[str, float], user_hash_bucket_count: int, user_hash_bucket_chunk_size: int, round_completion_info: Dict[str, Any], deltacat_storage=unimplemented_deltacat_storage): # TODO (pdames): when row counts are available in metadata, use them # instead of bytes - memory consumption depends more on number of # input delta records than bytes. # Inflation multiplier from snappy-compressed parquet to pyarrow. # This should be kept larger than actual average inflation multipliers. # Note that this is a very rough guess since actual observed pyarrow # inflation multiplier for snappy-compressed parquet is about 5.45X for # all rows, but here we're trying to guess the inflation multipler for just # a primary key SHA1 digest and sort key columns (which could be all columns # of the table in the worst case, but here we're assuming that they # represent no more than ~1/4th of the total table bytes) PYARROW_INFLATION_MULTIPLIER = 1.5 # we assume here that we're running on a fixed-size cluster # this assumption could be removed, but we'd still need to know the max # resources we COULD get for this cluster, and the amount of memory # available per CPU should remain fixed across the cluster. worker_cpus = int(cluster_resources["CPU"]) worker_obj_store_mem = ray_constants.from_memory_units( cluster_resources["object_store_memory"]) logger.info(f"Total worker object store memory: {worker_obj_store_mem}") worker_obj_store_mem_per_task = worker_obj_store_mem / worker_cpus logger.info(f"Worker object store memory/task: " f"{worker_obj_store_mem_per_task}") worker_task_mem = ray_constants.from_memory_units( cluster_resources["memory"]) logger.info(f"Total worker memory: {worker_task_mem}") # TODO (pdames): ensure fixed memory per CPU in heterogenous clusters worker_mem_per_task = worker_task_mem / worker_cpus logger.info(f"Cluster worker memory/task: {worker_mem_per_task}") hash_bucket_count = 0 if round_completion_info: hash_bucket_count = round_completion_info["hash_buckets"] logger.info(f"Prior hash bucket count: {hash_bucket_count}") if not hash_bucket_count: hash_bucket_count = user_hash_bucket_count elif user_hash_bucket_count and hash_bucket_count != user_hash_bucket_count: raise ValueError(f"Given hash bucket count ({user_hash_bucket_count})" f"does not match the existing compacted hash bucket " f"count ({hash_bucket_count}. To resolve this " f"problem either omit a hash bucket count when " f"running compaction or rehash your existing " f"compacted dataset.") delta_bytes = 0 delta_bytes_pyarrow = 0 latest_stream_position = -1 limited_input_delta_manifests = [] for delta in input_deltas: delta_manifest = deltacat_storage.get_delta_manifest(delta) # TODO (pdames): ensure pyarrow object fits in per-task obj store mem position = dl.get_stream_position(dm.get_delta_locator(delta_manifest)) manifest_entries = rsm.get_entries(dm.get_manifest(delta_manifest)) for entry in manifest_entries: # TODO: Fetch s3_obj["Size"] if entry content length undefined? delta_bytes += rsmm.get_content_length(rsme.get_meta(entry)) delta_bytes_pyarrow = delta_bytes * PYARROW_INFLATION_MULTIPLIER latest_stream_position = max(position, latest_stream_position) if delta_bytes_pyarrow > worker_obj_store_mem: logger.info( f"Input delta manifests limited to " f"{len(limited_input_delta_manifests)} by object store mem " f"({delta_bytes_pyarrow} > {worker_obj_store_mem})") break limited_input_delta_manifests.append( dma.from_delta_manifest(delta_manifest)) logger.info(f"Input delta manifests to compact this round: " f"{len(limited_input_delta_manifests)}") logger.info(f"Input delta manifest bytes to compact: {delta_bytes}") logger.info( f"Latest input delta stream position: {latest_stream_position}") if not limited_input_delta_manifests: raise RuntimeError("No input deltas to compact!") # TODO (pdames): determine min hash buckets from size of all deltas # (not just deltas for this round) min_hash_bucket_count = math.ceil(delta_bytes_pyarrow / worker_obj_store_mem_per_task) logger.info("Minimum recommended hash buckets: ", min_hash_bucket_count) if hash_bucket_count <= 0: # TODO (pdames): calc default hash buckets from table growth rate... as # this stands, we don't know whether we're provisioning insufficient # hash buckets for the next 5 minutes of deltas or more than enough # for the next 10 years hash_bucket_count = min_hash_bucket_count logger.info(f"Default hash buckets: {hash_bucket_count}") if hash_bucket_count < min_hash_bucket_count: logger.warn( f"Provided hash bucket count ({hash_bucket_count}) " f"is less than the min recommended ({min_hash_bucket_count}). " f"This compaction job run may run out of memory. To resolve this " f"problem either specify a larger number of hash buckets when " f"running compaction, omit a custom hash bucket count when " f"running compaction, or provision workers with more task " f"memory per CPU.") hash_bucket_chunk_size = user_hash_bucket_chunk_size max_hash_bucket_chunk_size = math.ceil(worker_obj_store_mem_per_task / PYARROW_INFLATION_MULTIPLIER) logger.info(f"Max hash bucket chunk size: {max_hash_bucket_chunk_size}") if hash_bucket_chunk_size > max_hash_bucket_chunk_size: # TODO (pdames): note type of memory to increase (task or object store) logger.warn( f"Provided hash bucket chunk size " f"({user_hash_bucket_chunk_size}) is greater than the max " f"recommended ({max_hash_bucket_chunk_size}). This compaction " f"job may run out of memory. To resolve this problem either " f"specify a smaller hash bucket chunk size when running " f"compaction, omit a custom hash bucket chunk size when running " f"compaction, or provision workers with more task and object " f"store memory per CPU.") elif not hash_bucket_chunk_size: hash_bucket_chunk_size = math.ceil(max_hash_bucket_chunk_size) logger.info( f"Default hash bucket chunk size: {hash_bucket_chunk_size}") sized_delta_manifests = dma.size_limited_groups( limited_input_delta_manifests, hash_bucket_chunk_size, ) logger.info(f"Hash bucket chunk size: {hash_bucket_chunk_size}") logger.info(f"Hash bucket count: {hash_bucket_count}") logger.info(f"Input delta manifest count: {len(sized_delta_manifests)}") return sized_delta_manifests, hash_bucket_count, latest_stream_position