def execute(self, context): try: pool = get_pool(name=self.name) if pool: self.log(f'Pool exists: {pool}') return except PoolNotFound: pool = create_pool(name=self.name, slots=self.slots, description=self.description) self.log(f'Created pool: {pool}')
def get_pool(name): """Get pool by a given name.""" try: pool = pool_api.get_pool(name=name) except AirflowException as err: log.error(err) response = jsonify(error="{}".format(err)) response.status_code = err.status_code return response else: return jsonify(pool.to_json())
def get_pool(name): """Get pool by a given name.""" try: pool = pool_api.get_pool(name=name) except AirflowException as e: _log.error(e) response = jsonify(error="{}".format(e)) response.status_code = getattr(e, 'status', 500) return response else: return jsonify(pool.to_json())
def test_get_pool_bad_name(self): for name in ('', ' '): with pytest.raises(AirflowBadRequest, match="^Pool name shouldn't be empty$"): pool_api.get_pool(name=name)
def test_get_pool_non_existing(self): with pytest.raises(PoolNotFound, match="^Pool 'test' doesn't exist$"): pool_api.get_pool(name='test')
def test_get_pool(self): pool = pool_api.get_pool(name=self.pools[0].pool) assert pool.pool == self.pools[0].pool
def get_pool(self, name): the_pool = pool.get_pool(name=name) return the_pool.pool, the_pool.slots, the_pool.description
def compute_allocated_resources(logger=None): Q_ = NodeUtil.ureg.Quantity data = {} NodeUtil.last_update = datetime.now() try: for node in NodeUtil.core_v1.list_node().items: stats = {} node_name = node.metadata.name capacity = node.status.capacity allocatable = node.status.allocatable conditions = node.status.conditions stats["memory_pressure"] = False stats["disk_pressure"] = False stats["pid_pressure"] = False for condition in conditions: if condition.type == "MemoryPressure": stats[ "memory_pressure"] = True if condition.status == "True" else False elif condition.type == "DiskPressure": stats[ "disk_pressure"] = True if condition.status == "True" else False elif condition.type == "PIDPressure": stats[ "pid_pressure"] = True if condition.status == "True" else False max_pods = int(int(allocatable["pods"]) * 1.5) field_selector = ( "status.phase!=Succeeded,status.phase!=Failed," + "spec.nodeName=" + node_name) stats["cpu_alloc"] = Q_(allocatable["cpu"]) stats["mem_alloc"] = Q_(allocatable["memory"]) stats["gpu_dev_count"] = Q_( capacity["nvidia.com/gpu"] if "nvidia.com/gpu" in capacity else 0) stats["gpu_dev_free"] = Q_( allocatable["nvidia.com/gpu"] if "nvidia.com/gpu" in allocatable else 0) pods = NodeUtil.core_v1.list_pod_for_all_namespaces( limit=max_pods, field_selector=field_selector).items # compute the allocated resources cpureqs, cpulmts, memreqs, memlmts = [], [], [], [] for pod in pods: for container in pod.spec.containers: res = container.resources reqs = defaultdict(lambda: 0, res.requests or {}) lmts = defaultdict(lambda: 0, res.limits or {}) cpureqs.append(Q_(reqs["cpu"])) memreqs.append(Q_(reqs["memory"])) cpulmts.append(Q_(lmts["cpu"])) memlmts.append(Q_(lmts["memory"])) stats["cpu_req"] = sum(cpureqs) stats["cpu_lmt"] = sum(cpulmts) stats["cpu_req_per"] = (stats["cpu_req"] / stats["cpu_alloc"] * 100) stats["cpu_lmt_per"] = (stats["cpu_lmt"] / stats["cpu_alloc"] * 100) stats["mem_req"] = sum(memreqs) stats["mem_lmt"] = sum(memlmts) stats["mem_req_per"] = (stats["mem_req"] / stats["mem_alloc"] * 100) stats["mem_lmt_per"] = (stats["mem_lmt"] / stats["mem_alloc"] * 100) data[node_name] = stats node_info = next(iter(data.values())) NodeUtil.cpu_alloc = node_info["cpu_alloc"].to_base_units( ).magnitude * 1000 NodeUtil.cpu_req = node_info["cpu_req"].to_base_units( ).magnitude * 1000 NodeUtil.cpu_lmt = node_info["cpu_lmt"].to_base_units( ).magnitude * 1000 NodeUtil.cpu_req_per = int( node_info["cpu_req_per"].to_base_units().magnitude * 1000) NodeUtil.cpu_lmt_per = int( node_info["cpu_lmt_per"].to_base_units().magnitude * 1000) NodeUtil.mem_alloc = int( node_info["mem_alloc"].to_base_units().magnitude // 1024 // 1024) NodeUtil.mem_req = int( node_info["mem_req"].to_base_units().magnitude // 1024 // 1024) NodeUtil.mem_lmt = int( node_info["mem_lmt"].to_base_units().magnitude // 1024 // 1024) NodeUtil.mem_req_per = int( node_info["mem_req_per"].to_base_units().magnitude) NodeUtil.mem_lmt_per = int( node_info["mem_lmt_per"].to_base_units().magnitude) NodeUtil.gpu_dev_count = int(node_info["gpu_dev_count"]) NodeUtil.gpu_dev_free = int(node_info["gpu_dev_free"]) NodeUtil.memory_pressure = node_info["memory_pressure"] NodeUtil.disk_pressure = node_info["disk_pressure"] NodeUtil.pid_pressure = node_info["pid_pressure"] gpu_count_pool = pool_api.get_pool(name="GPU_COUNT") if gpu_count_pool is None or gpu_count_pool.slots != NodeUtil.gpu_dev_count: pool_api.create_pool(name="GPU_COUNT", slots=NodeUtil.gpu_dev_count, description="Count of GPUs of the node") if NodeUtil.gpu_dev_count > 0: NodeUtil.gpu_mem_alloc, return_code = NodeUtil.get_node_info( query=NodeUtil.gpu_mem_available_query, logger=logger) if not return_code and logger is not None: logger.warning( "############################################# Could not fetch gpu_alloc utilization from prometheus!!" ) NodeUtil.gpu_mem_alloc = None NodeUtil.gpu_mem_used, return_code = NodeUtil.get_node_info( query=NodeUtil.gpu_mem_used_query, logger=logger) if not return_code and logger is not None: logger.warning( "############################################# Could not fetch gpu_used utilization from prometheus!!" ) NodeUtil.gpu_mem_used = None else: NodeUtil.gpu_mem_alloc = 0 NodeUtil.gpu_mem_used = 0 NodeUtil.cpu_available_req = NodeUtil.cpu_alloc - NodeUtil.cpu_req NodeUtil.cpu_available_limit = NodeUtil.cpu_alloc - NodeUtil.cpu_lmt NodeUtil.memory_available_req = NodeUtil.mem_alloc - NodeUtil.mem_req NodeUtil.memory_available_limit = NodeUtil.mem_alloc - NodeUtil.mem_lmt NodeUtil.gpu_memory_available = None if ( NodeUtil.gpu_mem_alloc is None or NodeUtil.gpu_mem_used is None ) else (NodeUtil.gpu_mem_alloc - NodeUtil.gpu_mem_used) Variable.set("CPU_NODE", "{}/{}".format(NodeUtil.cpu_lmt, NodeUtil.cpu_alloc)) Variable.set("CPU_FREE", "{}".format(NodeUtil.cpu_available_req)) Variable.set("RAM_NODE", "{}/{}".format(NodeUtil.mem_req, NodeUtil.mem_alloc)) Variable.set("RAM_FREE", "{}".format(NodeUtil.memory_available_req)) Variable.set( "GPU_DEV", "{}/{}".format(NodeUtil.gpu_dev_free, NodeUtil.gpu_dev_count)) Variable.set("GPU_DEV_FREE", "{}".format(NodeUtil.gpu_dev_free)) Variable.set( "GPU_MEM", "{}/{}".format(NodeUtil.gpu_mem_used, NodeUtil.gpu_mem_alloc)) Variable.set("GPU_MEM_FREE", "{}".format(NodeUtil.gpu_memory_available)) Variable.set("UPDATED", datetime.utcnow()) # Variable.set("cpu_alloc", "{}".format(NodeUtil.cpu_alloc)) # Variable.set("cpu_req", "{}".format(NodeUtil.cpu_req)) # Variable.set("cpu_lmt", "{}".format(NodeUtil.cpu_lmt)) # Variable.set("cpu_req_per", "{}".format(NodeUtil.cpu_req_per)) # Variable.set("cpu_lmt_per", "{}".format(NodeUtil.cpu_lmt_per)) # Variable.set("cpu_available_req", "{}".format(NodeUtil.cpu_available_req)) # Variable.set("cpu_available_limit", "{}".format(NodeUtil.cpu_available_limit)) # Variable.set("mem_alloc", "{}".format(NodeUtil.mem_alloc)) # Variable.set("mem_req", "{}".format(NodeUtil.mem_req)) # Variable.set("mem_lmt", "{}".format(NodeUtil.mem_lmt)) # Variable.set("mem_req_per", "{}".format(NodeUtil.mem_req_per)) # Variable.set("mem_lmt_per", "{}".format(NodeUtil.mem_lmt_per)) # Variable.set("memory_available_req", "{}".format(NodeUtil.memory_available_req)) # Variable.set("memory_available_limit", "{}".format(NodeUtil.memory_available_limit)) # Variable.set("gpu_count", "{}".format(NodeUtil.gpu_count)) # Variable.set("gpu_alloc", "{}".format(NodeUtil.gpu_alloc)) # Variable.set("gpu_used", "{}".format(NodeUtil.gpu_used)) # Variable.set("gpu_memory_available", "{}".format(NodeUtil.gpu_memory_available)) return True except Exception as e: print( "+++++++++++++++++++++++++++++++++++++++++ COULD NOT FETCH NODES!" ) print(e) return False
def test_get_pool(self): pool = pool_api.get_pool(name=self.pools[0].pool) self.assertEqual(pool.pool, self.pools[0].pool)
def get_pool(self, name): p = pool.get_pool(name=name) return p.pool, p.slots, p.description
def test_get_pool(self): pool = pool_api.get_pool(name=self.pools[0].pool, session=self.session) self.assertEqual(pool.pool, self.pools[0].pool)
def get_pool(self, name): p = pool.get_pool(name=name) return p.pool, p.slots, p.description
from kaapana.operators.LocalWorkflowCleanerOperator import LocalWorkflowCleanerOperator from kaapana.operators.ResampleOperator import ResampleOperator from nnunet.NnUnetOperator import NnUnetOperator from nnunet.LocalSegCheckOperator import LocalSegCheckOperator TASK_NAME = f"Task{random.randint(100,999):03}_Training" seg_filter = "" prep_modalities = "CT" train_network = "3d_lowres" train_network_trainer = "nnUNetTrainerV2" study_uid = pydicom.uid.generate_uid() gpu_count_pool = pool_api.get_pool(name="GPU_COUNT") gpu_count = int(gpu_count_pool.slots) if gpu_count_pool is not None else 1 cpu_count_pool = pool_api.get_pool(name="CPU") prep_threads = int(cpu_count_pool.slots//8) if cpu_count_pool is not None else 4 prep_threads = 2 if prep_threads < 2 else prep_threads prep_threads = 9 if prep_threads > 9 else prep_threads ui_forms = { "publication_form": { "type": "object", "properties": { "title": { "title": "Title", "default": "Automated Design of Deep Learning Methods\n for Biomedical Image Segmentation", "type": "string", "readOnly": True,