Python DataHandler.GetALLJobsCount Examples

Programming Language: Python

Namespace/Package Name: DataHandler

Class/Type: DataHandler

Method/Function: GetALLJobsCount

Examples at hotexamples.com: 1

Python DataHandler.GetALLJobsCount - 1 examples found. These are the top rated real world Python examples of DataHandler.DataHandler.GetALLJobsCount extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

DataHandler(30)

Close(30)

AddandGetJobRetries(10)

loadActiveForums(5)

GetClusterStatus(4)

AddCommand(3)

AddJob(3)

GetDeadEndpoints(3)

GetCommands(3)

GetAcl(3)

DeleteResourceAcl(3)

ApproveJob(3)

DeleteAce(3)

GetActiveJobsCount(2)

AddVC(2)

FinishCommand(2)

DeleteTemplate(2)

makeLD(1)

sampleUsers(1)

getTokenizedUserMonthForumCSV(1)

getUserDataForDivergence(1)

getUserInitialData(1)

getUserMaturedData(1)

getUserMonths(1)

get_train(1)

get_test(1)

getTokenizedCSV(1)

printActiveForums(1)

preprocessVocab(1)

mapLD2Uni(1)

makeDist(1)

getTokenizedUserMonthCSV(1)

getActiveForum(1)

getPostingFreq(1)

GetFDInfo(1)

AddStorage(1)

AddUser(1)

DeleteStorage(1)

DeleteVC(1)

GetALLJobsCount(1)

GetActiveJobList(1)

addLangTags(1)

getPost2User(1)

exec_request(1)

getBasicUserMonthRecord(1)

getDoc2Post(1)

getForumInitialData(1)

getForumMaturedData(1)

getPost2Month(1)

update_table(1)

Example #1

Show file

File: node_manager.py Project: wuyuebupt/DLWorkspace

def get_cluster_status():
    cluster_status = {}
    gpuStr = "alpha.kubernetes.io/nvidia-gpu"
    try:
        output = k8sUtils.kubectl_exec(" get nodes -o yaml")
        nodeInfo = yaml.load(output)
        nodes_status = {}
        user_status = {}

        if "items" in nodeInfo:
            for node in nodeInfo["items"]:
                node_status = {}
                node_status["name"] = node["metadata"]["name"]
                node_status["labels"] = node["metadata"]["labels"]
                if (gpuStr in node["status"]["allocatable"]):
                    node_status["gpu_allocatable"] = int(
                        node["status"]["allocatable"][gpuStr])
                else:
                    node_status["gpu_allocatable"] = 0
                if (gpuStr in node["status"]["capacity"]):
                    node_status["gpu_capacity"] = int(
                        node["status"]["capacity"][gpuStr])
                else:
                    node_status["gpu_capacity"] = 0
                node_status["gpu_used"] = 0
                node_status["InternalIP"] = "unknown"
                node_status["pods"] = []

                if "addresses" in node["status"]:
                    for addr in node["status"]["addresses"]:
                        if addr["type"] == "InternalIP":
                            node_status["InternalIP"] = addr["address"]

                node_status["scheduled_service"] = []
                for l, s in node_status["labels"].iteritems():
                    if s == "active" and l != "all" and l != "default":
                        node_status["scheduled_service"].append(l)

                if "unschedulable" in node["spec"] and node["spec"][
                        "unschedulable"]:
                    node_status["unschedulable"] = True
                else:
                    node_status["unschedulable"] = False

                if "status" in node and "conditions" in node["status"]:
                    for condi in node["status"]:
                        if "type" in condi and condi[
                                "type"] == "Ready" and "status" in condi and condi[
                                    "status"] == "Unknown":
                            node_status["unschedulable"] = True

                nodes_status[node_status["name"]] = node_status

        output = k8sUtils.kubectl_exec(" get pods -o yaml")
        podsInfo = yaml.load(output)
        if "items" in podsInfo:
            for pod in podsInfo["items"]:
                gpus = 0
                username = None
                if "metadata" in pod and "labels" in pod[
                        "metadata"] and "userName" in pod["metadata"]["labels"]:
                    username = pod["metadata"]["labels"]["userName"]
                if "spec" in pod and "nodeName" in pod["spec"]:
                    node_name = pod["spec"]["nodeName"]
                    pod_name = pod["metadata"]["name"]
                    if username is not None:
                        pod_name += " : " + username
                    gpuUsage = get_job_gpu_usage(pod["metadata"]["name"])
                    if gpuUsage is not None:
                        pod_name += " (gpu usage:" + str(gpuUsage) + "%)"
                        if gpuUsage <= 25:
                            pod_name += "!!!!!!"
                    if "containers" in pod["spec"]:
                        for container in pod["spec"]["containers"]:
                            if "resources" in container and "requests" in container[
                                    "resources"] and gpuStr in container[
                                        "resources"]["requests"]:
                                gpus += int(
                                    container["resources"]["requests"][gpuStr])
                                pod_name += " (gpu #:" + container[
                                    "resources"]["requests"][gpuStr] + ")"
                    if node_name in nodes_status:
                        nodes_status[node_name]["gpu_used"] += gpus
                        nodes_status[node_name]["pods"].append(pod_name)

                if username is not None:
                    if username not in user_status:
                        user_status[username] = gpus
                    else:
                        user_status[username] += gpus

        gpu_avaliable = 0
        gpu_reserved = 0
        gpu_capacity = 0
        gpu_unschedulable = 0
        gpu_schedulable = 0
        gpu_used = 0

        for node_name, node_status in nodes_status.iteritems():
            if node_status["unschedulable"]:
                gpu_unschedulable += node_status["gpu_capacity"]
            else:
                gpu_avaliable += (node_status["gpu_allocatable"] -
                                  node_status["gpu_used"])
                gpu_schedulable += node_status["gpu_capacity"]
                gpu_unschedulable += (node_status["gpu_capacity"] -
                                      node_status["gpu_allocatable"])

            gpu_reserved += (node_status["gpu_capacity"] -
                             node_status["gpu_allocatable"])
            gpu_used += node_status["gpu_used"]
            gpu_capacity += node_status["gpu_capacity"]

        cluster_status["user_status"] = []
        for user_name, user_gpu in user_status.iteritems():
            cluster_status["user_status"].append({
                "userName": user_name,
                "userGPU": user_gpu
            })

        cluster_status["gpu_avaliable"] = gpu_avaliable
        cluster_status["gpu_capacity"] = gpu_capacity
        cluster_status["gpu_unschedulable"] = gpu_unschedulable
        cluster_status["gpu_used"] = gpu_used
        cluster_status["gpu_reserved"] = gpu_reserved
        cluster_status["node_status"] = [
            node_status for node_name, node_status in nodes_status.iteritems()
        ]

    except Exception as e:
        print e
    dataHandler = DataHandler()
    cluster_status["AvaliableJobNum"] = dataHandler.GetActiveJobsCount()
    cluster_status["TotalJobNum"] = dataHandler.GetALLJobsCount()
    if "cluster_status" in config and check_cluster_status_change(
            config["cluster_status"], cluster_status):
        logging.info("updating the cluster status...")
        dataHandler.UpdateClusterStatus(cluster_status)
    else:
        logging.info(
            "nothing changed in cluster, skipping the cluster status update..."
        )
    config["cluster_status"] = copy.deepcopy(cluster_status)
    dataHandler.Close()
    return cluster_status