def test_create_pool_existing(self):
     pool = pool_api.create_pool(name=self.pools[0].pool,
                                 slots=5,
                                 description='',
                                 session=self.session)
     self.assertEqual(pool.pool, self.pools[0].pool)
     self.assertEqual(pool.slots, 5)
     self.assertEqual(pool.description, '')
     self.assertEqual(self.session.query(models.Pool).count(), 2)
Beispiel #2
0
 def test_create_pool(self):
     pool = pool_api.create_pool(name='foo',
                                 slots=5,
                                 description='')
     self.assertEqual(pool.pool, 'foo')
     self.assertEqual(pool.slots, 5)
     self.assertEqual(pool.description, '')
     with create_session() as session:
         self.assertEqual(session.query(models.Pool).count(), 3)
Beispiel #3
0
 def test_create_pool_existing(self):
     pool = pool_api.create_pool(name=self.pools[0].pool,
                                 slots=5,
                                 description='',
                                 session=self.session)
     self.assertEqual(pool.pool, self.pools[0].pool)
     self.assertEqual(pool.slots, 5)
     self.assertEqual(pool.description, '')
     self.assertEqual(
         self.session.query(models.Pool).count(), self.TOTAL_POOL_COUNT)
Beispiel #4
0
 def execute(self, context):
     try:
         pool = get_pool(name=self.name)
         if pool:
             self.log(f'Pool exists: {pool}')
             return
     except PoolNotFound:
         pool = create_pool(name=self.name,
                            slots=self.slots,
                            description=self.description)
         self.log(f'Created pool: {pool}')
Beispiel #5
0
def create_pool():
    """Create a pool."""
    params = request.get_json(force=True)
    try:
        pool = pool_api.create_pool(**params)
    except AirflowException as err:
        log.error(err)
        response = jsonify(error="{}".format(err))
        response.status_code = err.status_code
        return response
    else:
        return jsonify(pool.to_json())
Beispiel #6
0
def create_pool():
    """Create a pool."""
    params = request.get_json(force=True)
    try:
        pool = pool_api.create_pool(**params)
    except AirflowException as e:
        _log.error(e)
        response = jsonify(error="{}".format(e))
        response.status_code = getattr(e, 'status', 500)
        return response
    else:
        return jsonify(pool.to_json())
Beispiel #7
0
def init_pools():

    pools = pool_api.get_pools()
    pools = [i.pool for i in pools]

    if "MEMORY" not in pools:
        print()
        print("Get node resources...")
        print()
        node_memory = get_node_info(query=memory_query)
        node_cpu = get_node_info(query=cpu_core_query)
        node_gpu_mem = get_node_info(query=gpu_mem_available_query)
        node_gpu_count = get_node_info(query=gpu_query)
        try:
            print("+++++++++++++++++++++++++++++++++++++++++++++ CREATING MEMORY POOL")
            pool_api.create_pool(
                name="MEMORY",
                slots=abs(node_memory - 10000),
                description="Memory of the node in MB"
            )
            # Variable.set("mem_alloc", NodeUtil.mem_alloc)

            print("+++++++++++++++++++++++++++++++++++++++++++++ CREATING CPU POOL")
            pool_api.create_pool(
                name="CPU",
                slots=node_cpu,
                description="Count of CPU-cores of the node"
            )
            # Variable.set("cpu_alloc", NodeUtil.cpu_alloc)

            print("+++++++++++++++++++++++++++++++++++++++++++++ CREATING GPU POOL")
            pool_api.create_pool(
                name="GPU_MEM",
                slots=node_gpu_mem,
                description="Memory of all GPUs of the node in MB"
            )
            pool_api.create_pool(
                name="GPU_COUNT",
                slots=node_gpu_count,
                description="Count of GPUs of the node"
            )
            # Variable.set("gpu_alloc", NodeUtil.gpu_alloc)
            # Variable.set("gpu_count", NodeUtil.gpu_count)

        except Exception as e:
            print("++++++++++++++++++++++++++++++++++++++ Error @ creating pools!")
            print(e)
            exit(1)
 def create_pool(self, name, slots, description):
     the_pool = pool.create_pool(name=name, slots=slots, description=description)
     return the_pool.pool, the_pool.slots, the_pool.description
Beispiel #9
0
    def compute_allocated_resources(logger=None):
        Q_ = NodeUtil.ureg.Quantity
        data = {}
        NodeUtil.last_update = datetime.now()

        try:
            for node in NodeUtil.core_v1.list_node().items:
                stats = {}
                node_name = node.metadata.name
                capacity = node.status.capacity
                allocatable = node.status.allocatable
                conditions = node.status.conditions
                stats["memory_pressure"] = False
                stats["disk_pressure"] = False
                stats["pid_pressure"] = False
                for condition in conditions:
                    if condition.type == "MemoryPressure":
                        stats[
                            "memory_pressure"] = True if condition.status == "True" else False
                    elif condition.type == "DiskPressure":
                        stats[
                            "disk_pressure"] = True if condition.status == "True" else False
                    elif condition.type == "PIDPressure":
                        stats[
                            "pid_pressure"] = True if condition.status == "True" else False

                max_pods = int(int(allocatable["pods"]) * 1.5)
                field_selector = (
                    "status.phase!=Succeeded,status.phase!=Failed," +
                    "spec.nodeName=" + node_name)

                stats["cpu_alloc"] = Q_(allocatable["cpu"])
                stats["mem_alloc"] = Q_(allocatable["memory"])
                stats["gpu_dev_count"] = Q_(
                    capacity["nvidia.com/gpu"] if "nvidia.com/gpu" in
                    capacity else 0)
                stats["gpu_dev_free"] = Q_(
                    allocatable["nvidia.com/gpu"] if "nvidia.com/gpu" in
                    allocatable else 0)

                pods = NodeUtil.core_v1.list_pod_for_all_namespaces(
                    limit=max_pods, field_selector=field_selector).items
                # compute the allocated resources
                cpureqs, cpulmts, memreqs, memlmts = [], [], [], []
                for pod in pods:
                    for container in pod.spec.containers:
                        res = container.resources
                        reqs = defaultdict(lambda: 0, res.requests or {})
                        lmts = defaultdict(lambda: 0, res.limits or {})
                        cpureqs.append(Q_(reqs["cpu"]))
                        memreqs.append(Q_(reqs["memory"]))
                        cpulmts.append(Q_(lmts["cpu"]))
                        memlmts.append(Q_(lmts["memory"]))

                stats["cpu_req"] = sum(cpureqs)
                stats["cpu_lmt"] = sum(cpulmts)
                stats["cpu_req_per"] = (stats["cpu_req"] / stats["cpu_alloc"] *
                                        100)
                stats["cpu_lmt_per"] = (stats["cpu_lmt"] / stats["cpu_alloc"] *
                                        100)
                stats["mem_req"] = sum(memreqs)
                stats["mem_lmt"] = sum(memlmts)
                stats["mem_req_per"] = (stats["mem_req"] / stats["mem_alloc"] *
                                        100)
                stats["mem_lmt_per"] = (stats["mem_lmt"] / stats["mem_alloc"] *
                                        100)
                data[node_name] = stats

            node_info = next(iter(data.values()))
            NodeUtil.cpu_alloc = node_info["cpu_alloc"].to_base_units(
            ).magnitude * 1000
            NodeUtil.cpu_req = node_info["cpu_req"].to_base_units(
            ).magnitude * 1000
            NodeUtil.cpu_lmt = node_info["cpu_lmt"].to_base_units(
            ).magnitude * 1000
            NodeUtil.cpu_req_per = int(
                node_info["cpu_req_per"].to_base_units().magnitude * 1000)
            NodeUtil.cpu_lmt_per = int(
                node_info["cpu_lmt_per"].to_base_units().magnitude * 1000)
            NodeUtil.mem_alloc = int(
                node_info["mem_alloc"].to_base_units().magnitude // 1024 //
                1024)
            NodeUtil.mem_req = int(
                node_info["mem_req"].to_base_units().magnitude // 1024 // 1024)
            NodeUtil.mem_lmt = int(
                node_info["mem_lmt"].to_base_units().magnitude // 1024 // 1024)
            NodeUtil.mem_req_per = int(
                node_info["mem_req_per"].to_base_units().magnitude)
            NodeUtil.mem_lmt_per = int(
                node_info["mem_lmt_per"].to_base_units().magnitude)
            NodeUtil.gpu_dev_count = int(node_info["gpu_dev_count"])
            NodeUtil.gpu_dev_free = int(node_info["gpu_dev_free"])
            NodeUtil.memory_pressure = node_info["memory_pressure"]
            NodeUtil.disk_pressure = node_info["disk_pressure"]
            NodeUtil.pid_pressure = node_info["pid_pressure"]
            gpu_count_pool = pool_api.get_pool(name="GPU_COUNT")
            if gpu_count_pool is None or gpu_count_pool.slots != NodeUtil.gpu_dev_count:
                pool_api.create_pool(name="GPU_COUNT",
                                     slots=NodeUtil.gpu_dev_count,
                                     description="Count of GPUs of the node")

            if NodeUtil.gpu_dev_count > 0:
                NodeUtil.gpu_mem_alloc, return_code = NodeUtil.get_node_info(
                    query=NodeUtil.gpu_mem_available_query, logger=logger)
                if not return_code and logger is not None:
                    logger.warning(
                        "############################################# Could not fetch gpu_alloc utilization from prometheus!!"
                    )
                    NodeUtil.gpu_mem_alloc = None
                NodeUtil.gpu_mem_used, return_code = NodeUtil.get_node_info(
                    query=NodeUtil.gpu_mem_used_query, logger=logger)
                if not return_code and logger is not None:
                    logger.warning(
                        "############################################# Could not fetch gpu_used utilization from prometheus!!"
                    )
                    NodeUtil.gpu_mem_used = None
            else:
                NodeUtil.gpu_mem_alloc = 0
                NodeUtil.gpu_mem_used = 0

            NodeUtil.cpu_available_req = NodeUtil.cpu_alloc - NodeUtil.cpu_req
            NodeUtil.cpu_available_limit = NodeUtil.cpu_alloc - NodeUtil.cpu_lmt
            NodeUtil.memory_available_req = NodeUtil.mem_alloc - NodeUtil.mem_req
            NodeUtil.memory_available_limit = NodeUtil.mem_alloc - NodeUtil.mem_lmt
            NodeUtil.gpu_memory_available = None if (
                NodeUtil.gpu_mem_alloc is None or NodeUtil.gpu_mem_used is None
            ) else (NodeUtil.gpu_mem_alloc - NodeUtil.gpu_mem_used)

            Variable.set("CPU_NODE", "{}/{}".format(NodeUtil.cpu_lmt,
                                                    NodeUtil.cpu_alloc))
            Variable.set("CPU_FREE", "{}".format(NodeUtil.cpu_available_req))
            Variable.set("RAM_NODE", "{}/{}".format(NodeUtil.mem_req,
                                                    NodeUtil.mem_alloc))
            Variable.set("RAM_FREE",
                         "{}".format(NodeUtil.memory_available_req))
            Variable.set(
                "GPU_DEV", "{}/{}".format(NodeUtil.gpu_dev_free,
                                          NodeUtil.gpu_dev_count))
            Variable.set("GPU_DEV_FREE", "{}".format(NodeUtil.gpu_dev_free))
            Variable.set(
                "GPU_MEM", "{}/{}".format(NodeUtil.gpu_mem_used,
                                          NodeUtil.gpu_mem_alloc))
            Variable.set("GPU_MEM_FREE",
                         "{}".format(NodeUtil.gpu_memory_available))
            Variable.set("UPDATED", datetime.utcnow())

            # Variable.set("cpu_alloc", "{}".format(NodeUtil.cpu_alloc))
            # Variable.set("cpu_req", "{}".format(NodeUtil.cpu_req))
            # Variable.set("cpu_lmt", "{}".format(NodeUtil.cpu_lmt))
            # Variable.set("cpu_req_per", "{}".format(NodeUtil.cpu_req_per))
            # Variable.set("cpu_lmt_per", "{}".format(NodeUtil.cpu_lmt_per))
            # Variable.set("cpu_available_req", "{}".format(NodeUtil.cpu_available_req))
            # Variable.set("cpu_available_limit", "{}".format(NodeUtil.cpu_available_limit))
            # Variable.set("mem_alloc", "{}".format(NodeUtil.mem_alloc))
            # Variable.set("mem_req", "{}".format(NodeUtil.mem_req))
            # Variable.set("mem_lmt", "{}".format(NodeUtil.mem_lmt))
            # Variable.set("mem_req_per", "{}".format(NodeUtil.mem_req_per))
            # Variable.set("mem_lmt_per", "{}".format(NodeUtil.mem_lmt_per))
            # Variable.set("memory_available_req", "{}".format(NodeUtil.memory_available_req))
            # Variable.set("memory_available_limit", "{}".format(NodeUtil.memory_available_limit))
            # Variable.set("gpu_count", "{}".format(NodeUtil.gpu_count))
            # Variable.set("gpu_alloc", "{}".format(NodeUtil.gpu_alloc))
            # Variable.set("gpu_used", "{}".format(NodeUtil.gpu_used))
            # Variable.set("gpu_memory_available", "{}".format(NodeUtil.gpu_memory_available))
            return True

        except Exception as e:
            print(
                "+++++++++++++++++++++++++++++++++++++++++ COULD NOT FETCH NODES!"
            )
            print(e)
            return False
Beispiel #10
0
 def create_pool(self, name, slots, description):
     p = pool.create_pool(name=name, slots=slots, description=description)
     return p.pool, p.slots, p.description
def create_pools():
    create_pool("default_pool", 128, "default pool for tasks")
    create_pool("large_pool", 1024,
                "large pool to handle tasks parallelism surgically")
Beispiel #12
0
 def create_pool(self, name, slots, description):
     p = pool.create_pool(name=name, slots=slots, description=description)
     return p.pool, p.slots, p.description