예제 #1
0
 def execute(self, context):
     try:
         pool = get_pool(name=self.name)
         if pool:
             self.log(f'Pool exists: {pool}')
             return
     except PoolNotFound:
         pool = create_pool(name=self.name,
                            slots=self.slots,
                            description=self.description)
         self.log(f'Created pool: {pool}')
예제 #2
0
def get_pool(name):
    """Get pool by a given name."""
    try:
        pool = pool_api.get_pool(name=name)
    except AirflowException as err:
        log.error(err)
        response = jsonify(error="{}".format(err))
        response.status_code = err.status_code
        return response
    else:
        return jsonify(pool.to_json())
예제 #3
0
def get_pool(name):
    """Get pool by a given name."""
    try:
        pool = pool_api.get_pool(name=name)
    except AirflowException as e:
        _log.error(e)
        response = jsonify(error="{}".format(e))
        response.status_code = getattr(e, 'status', 500)
        return response
    else:
        return jsonify(pool.to_json())
예제 #4
0
 def test_get_pool_bad_name(self):
     for name in ('', '    '):
         with pytest.raises(AirflowBadRequest,
                            match="^Pool name shouldn't be empty$"):
             pool_api.get_pool(name=name)
예제 #5
0
 def test_get_pool_non_existing(self):
     with pytest.raises(PoolNotFound, match="^Pool 'test' doesn't exist$"):
         pool_api.get_pool(name='test')
예제 #6
0
 def test_get_pool(self):
     pool = pool_api.get_pool(name=self.pools[0].pool)
     assert pool.pool == self.pools[0].pool
예제 #7
0
 def get_pool(self, name):
     the_pool = pool.get_pool(name=name)
     return the_pool.pool, the_pool.slots, the_pool.description
예제 #8
0
    def compute_allocated_resources(logger=None):
        Q_ = NodeUtil.ureg.Quantity
        data = {}
        NodeUtil.last_update = datetime.now()

        try:
            for node in NodeUtil.core_v1.list_node().items:
                stats = {}
                node_name = node.metadata.name
                capacity = node.status.capacity
                allocatable = node.status.allocatable
                conditions = node.status.conditions
                stats["memory_pressure"] = False
                stats["disk_pressure"] = False
                stats["pid_pressure"] = False
                for condition in conditions:
                    if condition.type == "MemoryPressure":
                        stats[
                            "memory_pressure"] = True if condition.status == "True" else False
                    elif condition.type == "DiskPressure":
                        stats[
                            "disk_pressure"] = True if condition.status == "True" else False
                    elif condition.type == "PIDPressure":
                        stats[
                            "pid_pressure"] = True if condition.status == "True" else False

                max_pods = int(int(allocatable["pods"]) * 1.5)
                field_selector = (
                    "status.phase!=Succeeded,status.phase!=Failed," +
                    "spec.nodeName=" + node_name)

                stats["cpu_alloc"] = Q_(allocatable["cpu"])
                stats["mem_alloc"] = Q_(allocatable["memory"])
                stats["gpu_dev_count"] = Q_(
                    capacity["nvidia.com/gpu"] if "nvidia.com/gpu" in
                    capacity else 0)
                stats["gpu_dev_free"] = Q_(
                    allocatable["nvidia.com/gpu"] if "nvidia.com/gpu" in
                    allocatable else 0)

                pods = NodeUtil.core_v1.list_pod_for_all_namespaces(
                    limit=max_pods, field_selector=field_selector).items
                # compute the allocated resources
                cpureqs, cpulmts, memreqs, memlmts = [], [], [], []
                for pod in pods:
                    for container in pod.spec.containers:
                        res = container.resources
                        reqs = defaultdict(lambda: 0, res.requests or {})
                        lmts = defaultdict(lambda: 0, res.limits or {})
                        cpureqs.append(Q_(reqs["cpu"]))
                        memreqs.append(Q_(reqs["memory"]))
                        cpulmts.append(Q_(lmts["cpu"]))
                        memlmts.append(Q_(lmts["memory"]))

                stats["cpu_req"] = sum(cpureqs)
                stats["cpu_lmt"] = sum(cpulmts)
                stats["cpu_req_per"] = (stats["cpu_req"] / stats["cpu_alloc"] *
                                        100)
                stats["cpu_lmt_per"] = (stats["cpu_lmt"] / stats["cpu_alloc"] *
                                        100)
                stats["mem_req"] = sum(memreqs)
                stats["mem_lmt"] = sum(memlmts)
                stats["mem_req_per"] = (stats["mem_req"] / stats["mem_alloc"] *
                                        100)
                stats["mem_lmt_per"] = (stats["mem_lmt"] / stats["mem_alloc"] *
                                        100)
                data[node_name] = stats

            node_info = next(iter(data.values()))
            NodeUtil.cpu_alloc = node_info["cpu_alloc"].to_base_units(
            ).magnitude * 1000
            NodeUtil.cpu_req = node_info["cpu_req"].to_base_units(
            ).magnitude * 1000
            NodeUtil.cpu_lmt = node_info["cpu_lmt"].to_base_units(
            ).magnitude * 1000
            NodeUtil.cpu_req_per = int(
                node_info["cpu_req_per"].to_base_units().magnitude * 1000)
            NodeUtil.cpu_lmt_per = int(
                node_info["cpu_lmt_per"].to_base_units().magnitude * 1000)
            NodeUtil.mem_alloc = int(
                node_info["mem_alloc"].to_base_units().magnitude // 1024 //
                1024)
            NodeUtil.mem_req = int(
                node_info["mem_req"].to_base_units().magnitude // 1024 // 1024)
            NodeUtil.mem_lmt = int(
                node_info["mem_lmt"].to_base_units().magnitude // 1024 // 1024)
            NodeUtil.mem_req_per = int(
                node_info["mem_req_per"].to_base_units().magnitude)
            NodeUtil.mem_lmt_per = int(
                node_info["mem_lmt_per"].to_base_units().magnitude)
            NodeUtil.gpu_dev_count = int(node_info["gpu_dev_count"])
            NodeUtil.gpu_dev_free = int(node_info["gpu_dev_free"])
            NodeUtil.memory_pressure = node_info["memory_pressure"]
            NodeUtil.disk_pressure = node_info["disk_pressure"]
            NodeUtil.pid_pressure = node_info["pid_pressure"]
            gpu_count_pool = pool_api.get_pool(name="GPU_COUNT")
            if gpu_count_pool is None or gpu_count_pool.slots != NodeUtil.gpu_dev_count:
                pool_api.create_pool(name="GPU_COUNT",
                                     slots=NodeUtil.gpu_dev_count,
                                     description="Count of GPUs of the node")

            if NodeUtil.gpu_dev_count > 0:
                NodeUtil.gpu_mem_alloc, return_code = NodeUtil.get_node_info(
                    query=NodeUtil.gpu_mem_available_query, logger=logger)
                if not return_code and logger is not None:
                    logger.warning(
                        "############################################# Could not fetch gpu_alloc utilization from prometheus!!"
                    )
                    NodeUtil.gpu_mem_alloc = None
                NodeUtil.gpu_mem_used, return_code = NodeUtil.get_node_info(
                    query=NodeUtil.gpu_mem_used_query, logger=logger)
                if not return_code and logger is not None:
                    logger.warning(
                        "############################################# Could not fetch gpu_used utilization from prometheus!!"
                    )
                    NodeUtil.gpu_mem_used = None
            else:
                NodeUtil.gpu_mem_alloc = 0
                NodeUtil.gpu_mem_used = 0

            NodeUtil.cpu_available_req = NodeUtil.cpu_alloc - NodeUtil.cpu_req
            NodeUtil.cpu_available_limit = NodeUtil.cpu_alloc - NodeUtil.cpu_lmt
            NodeUtil.memory_available_req = NodeUtil.mem_alloc - NodeUtil.mem_req
            NodeUtil.memory_available_limit = NodeUtil.mem_alloc - NodeUtil.mem_lmt
            NodeUtil.gpu_memory_available = None if (
                NodeUtil.gpu_mem_alloc is None or NodeUtil.gpu_mem_used is None
            ) else (NodeUtil.gpu_mem_alloc - NodeUtil.gpu_mem_used)

            Variable.set("CPU_NODE", "{}/{}".format(NodeUtil.cpu_lmt,
                                                    NodeUtil.cpu_alloc))
            Variable.set("CPU_FREE", "{}".format(NodeUtil.cpu_available_req))
            Variable.set("RAM_NODE", "{}/{}".format(NodeUtil.mem_req,
                                                    NodeUtil.mem_alloc))
            Variable.set("RAM_FREE",
                         "{}".format(NodeUtil.memory_available_req))
            Variable.set(
                "GPU_DEV", "{}/{}".format(NodeUtil.gpu_dev_free,
                                          NodeUtil.gpu_dev_count))
            Variable.set("GPU_DEV_FREE", "{}".format(NodeUtil.gpu_dev_free))
            Variable.set(
                "GPU_MEM", "{}/{}".format(NodeUtil.gpu_mem_used,
                                          NodeUtil.gpu_mem_alloc))
            Variable.set("GPU_MEM_FREE",
                         "{}".format(NodeUtil.gpu_memory_available))
            Variable.set("UPDATED", datetime.utcnow())

            # Variable.set("cpu_alloc", "{}".format(NodeUtil.cpu_alloc))
            # Variable.set("cpu_req", "{}".format(NodeUtil.cpu_req))
            # Variable.set("cpu_lmt", "{}".format(NodeUtil.cpu_lmt))
            # Variable.set("cpu_req_per", "{}".format(NodeUtil.cpu_req_per))
            # Variable.set("cpu_lmt_per", "{}".format(NodeUtil.cpu_lmt_per))
            # Variable.set("cpu_available_req", "{}".format(NodeUtil.cpu_available_req))
            # Variable.set("cpu_available_limit", "{}".format(NodeUtil.cpu_available_limit))
            # Variable.set("mem_alloc", "{}".format(NodeUtil.mem_alloc))
            # Variable.set("mem_req", "{}".format(NodeUtil.mem_req))
            # Variable.set("mem_lmt", "{}".format(NodeUtil.mem_lmt))
            # Variable.set("mem_req_per", "{}".format(NodeUtil.mem_req_per))
            # Variable.set("mem_lmt_per", "{}".format(NodeUtil.mem_lmt_per))
            # Variable.set("memory_available_req", "{}".format(NodeUtil.memory_available_req))
            # Variable.set("memory_available_limit", "{}".format(NodeUtil.memory_available_limit))
            # Variable.set("gpu_count", "{}".format(NodeUtil.gpu_count))
            # Variable.set("gpu_alloc", "{}".format(NodeUtil.gpu_alloc))
            # Variable.set("gpu_used", "{}".format(NodeUtil.gpu_used))
            # Variable.set("gpu_memory_available", "{}".format(NodeUtil.gpu_memory_available))
            return True

        except Exception as e:
            print(
                "+++++++++++++++++++++++++++++++++++++++++ COULD NOT FETCH NODES!"
            )
            print(e)
            return False
 def test_get_pool(self):
     pool = pool_api.get_pool(name=self.pools[0].pool)
     self.assertEqual(pool.pool, self.pools[0].pool)
예제 #10
0
 def get_pool(self, name):
     p = pool.get_pool(name=name)
     return p.pool, p.slots, p.description
예제 #11
0
 def test_get_pool(self):
     pool = pool_api.get_pool(name=self.pools[0].pool, session=self.session)
     self.assertEqual(pool.pool, self.pools[0].pool)
예제 #12
0
 def get_pool(self, name):
     p = pool.get_pool(name=name)
     return p.pool, p.slots, p.description
예제 #13
0
from kaapana.operators.LocalWorkflowCleanerOperator import LocalWorkflowCleanerOperator
from kaapana.operators.ResampleOperator import ResampleOperator


from nnunet.NnUnetOperator import NnUnetOperator
from nnunet.LocalSegCheckOperator import LocalSegCheckOperator

TASK_NAME = f"Task{random.randint(100,999):03}_Training"
seg_filter = ""
prep_modalities = "CT"
train_network = "3d_lowres"
train_network_trainer = "nnUNetTrainerV2"

study_uid = pydicom.uid.generate_uid()

gpu_count_pool = pool_api.get_pool(name="GPU_COUNT")
gpu_count = int(gpu_count_pool.slots) if gpu_count_pool is not None else 1
cpu_count_pool = pool_api.get_pool(name="CPU")
prep_threads = int(cpu_count_pool.slots//8) if cpu_count_pool is not None else 4
prep_threads = 2 if prep_threads < 2 else prep_threads
prep_threads = 9 if prep_threads > 9 else prep_threads

ui_forms = {
    "publication_form": {
        "type": "object",
        "properties": {
            "title": {
                "title": "Title",
                "default": "Automated Design of Deep Learning Methods\n for Biomedical Image Segmentation",
                "type": "string",
                "readOnly": True,