def has_resources(self, resources: Resources) -> bool: """Returns whether this runner has at least the specified resources. This refreshes the Ray cluster resources if the time since last update has exceeded self._refresh_period. This also assumes that the cluster is not resizing very frequently. """ if resources.has_placement_group: return self._pg_manager.can_stage() self._update_avail_resources() currently_available = Resources.subtract(self._avail_resources, self._committed_resources) have_space = ( resources.cpu_total() <= currently_available.cpu and resources.gpu_total() <= currently_available.gpu and resources.memory_total() <= currently_available.memory and resources.object_store_memory_total() <= currently_available.object_store_memory and all( resources.get_res_total(res) <= currently_available.get(res) for res in resources.custom_resources)) if have_space: # The assumption right now is that we block all trials if one # trial is queued. return True return False
def has_resources(self, resources: Resources) -> bool: """Returns whether this runner has at least the specified resources. This refreshes the Ray cluster resources if the time since last update has exceeded self._refresh_period. This also assumes that the cluster is not resizing very frequently. """ if resources.has_placement_group: return self._pg_manager.can_stage() self._update_avail_resources() currently_available = Resources.subtract(self._avail_resources, self._committed_resources) have_space = ( resources.cpu_total() <= currently_available.cpu and resources.gpu_total() <= currently_available.gpu and resources.memory_total() <= currently_available.memory and resources.object_store_memory_total() <= currently_available.object_store_memory and all( resources.get_res_total(res) <= currently_available.get(res) for res in resources.custom_resources)) if have_space: # The assumption right now is that we block all trials if one # trial is queued. self._trial_queued = False return True can_overcommit = self._queue_trials and not self._trial_queued if can_overcommit: self._trial_queued = True logger.warning( "Allowing trial to start even though the " "cluster does not have enough free resources. Trial actors " "may appear to hang until enough resources are added to the " "cluster (e.g., via autoscaling). You can disable this " "behavior by specifying `queue_trials=False` in " "ray.tune.run().") return True return False