Example #1
0
def TakeJobActions(jobs):
    dataHandler = DataHandler()
    vcList = dataHandler.ListVCs()
    clusterStatus, _ = dataHandler.GetClusterStatus()
    dataHandler.Close()

    cluster_gpu_capacity = clusterStatus["gpu_capacity"]
    cluster_gpu_reserved = clusterStatus["gpu_reserved"]
    globalTotalRes = ResourceInfo(cluster_gpu_capacity)
    globalReservedRes = ResourceInfo(cluster_gpu_reserved)

    vc_resources = {}
    localResInfo = ResourceInfo()
    globalResInfo = ResourceInfo.Difference(globalTotalRes, globalReservedRes)

    priority_dict = get_priority_dict()
    logging.info("Job priority dict: {}".format(priority_dict))

    for vc in vcList:
        vcTotalRes = ResourceInfo(json.loads(vc["quota"]))
        clusterTotalRes = ResourceInfo(clusterStatus["gpu_capacity"])
        clusterReservedRes = ResourceInfo(clusterStatus["gpu_reserved"])
        vcReservedRes = clusterReservedRes.GetFraction(vcTotalRes,
                                                       clusterTotalRes)
        vc_resources[vc["vcName"]] = ResourceInfo.Difference(
            vcTotalRes, vcReservedRes)

    jobsInfo = []
    for job in jobs:
        if job["jobStatus"] in ["queued", "scheduling", "running"]:
            singleJobInfo = {}
            singleJobInfo["job"] = job
            job_params = json.loads(base64.b64decode(job["jobParams"]))
            singleJobInfo["preemptionAllowed"] = job_params[
                "preemptionAllowed"]
            singleJobInfo["jobId"] = job_params["jobId"]
            jobGpuType = "any"
            if "gpuType" in job_params:
                jobGpuType = job_params["gpuType"]
            singleJobInfo["globalResInfo"] = ResourceInfo(
                {jobGpuType: GetJobTotalGpu(job_params)})

            # Job lists will be sorted based on and in the order of below
            # 1. non-preemptible precedes preemptible
            # 2. running precedes scheduling, precedes queued
            # 3. larger priority value precedes lower priority value
            # 4. early job time precedes later job time

            # Non-Preemptible jobs first
            preemptible = 1 if singleJobInfo["preemptionAllowed"] else 0

            # Job status
            job_status = 0
            if job["jobStatus"] == "scheduling":
                job_status = 1
            elif job["jobStatus"] == "queued":
                job_status = 2

            # Priority value
            reverse_priority = get_job_priority(priority_dict,
                                                singleJobInfo["jobId"])
            priority = 999999 - reverse_priority

            # Job time
            job_time = str(job["jobTime"])

            singleJobInfo["sortKey"] = "{}_{}_{:06d}_{}".format(
                preemptible, job_status, priority, job_time)

            singleJobInfo["allowed"] = False
            jobsInfo.append(singleJobInfo)

    jobsInfo.sort(key=lambda x: x["sortKey"])

    logging.info("TakeJobActions : local resources : %s" % (vc_resources))
    logging.info("TakeJobActions : global resources : %s" %
                 (globalResInfo.CategoryToCountMap))

    for sji in jobsInfo:
        logging.info("TakeJobActions : job : %s : %s : %s" %
                     (sji["jobId"], sji["globalResInfo"].CategoryToCountMap,
                      sji["sortKey"]))
        vc_name = sji["job"]["vcName"]
        vc_resource = vc_resources[vc_name]

        if (vc_resource.CanSatisfy(sji["globalResInfo"])):
            vc_resource.Subtract(sji["globalResInfo"])
            globalResInfo.Subtract(sji["globalResInfo"])
            sji["allowed"] = True
            logging.info(
                "TakeJobActions : local assignment : %s : %s" %
                (sji["jobId"], sji["globalResInfo"].CategoryToCountMap))

    for sji in jobsInfo:
        if sji["preemptionAllowed"] and (sji["allowed"] is False):
            if globalResInfo.CanSatisfy(sji["globalResInfo"]):
                logging.info(
                    "TakeJobActions : job : %s : %s" %
                    (sji["jobId"], sji["globalResInfo"].CategoryToCountMap))
                # Strict FIFO policy not required for global (bonus) tokens since these jobs are anyway pre-emptible.
                globalResInfo.Subtract(sji["globalResInfo"])
                sji["allowed"] = True
                logging.info(
                    "TakeJobActions : global assignment : %s : %s" %
                    (sji["jobId"], sji["globalResInfo"].CategoryToCountMap))

    logging.info("TakeJobActions : global resources : %s" %
                 (globalResInfo.CategoryToCountMap))

    for sji in jobsInfo:
        try:
            if sji["job"]["jobStatus"] == "queued" and (sji["allowed"] is
                                                        True):
                SubmitJob(sji["job"])
                logging.info("TakeJobActions : submitting job : %s : %s" %
                             (sji["jobId"], sji["sortKey"]))
            elif sji["preemptionAllowed"] and (
                    sji["job"]["jobStatus"] == "scheduling"
                    or sji["job"]["jobStatus"]
                    == "running") and (sji["allowed"] is False):
                KillJob(sji["job"]["jobId"], "queued")
                logging.info("TakeJobActions : pre-empting job : %s : %s" %
                             (sji["jobId"], sji["sortKey"]))
        except Exception as e:
            logging.error("Process job failed {}".format(sji["job"]),
                          exc_info=True)

    logging.info("TakeJobActions : job desired actions taken")
Example #2
0
def get_cluster_status():
    cluster_status = {}
    gpuStr = "nvidia.com/gpu"
    try:
        output = k8sUtils.kubectl_exec(" get nodes -o yaml")
        nodeInfo = yaml.load(output)
        nodes_status = {}
        user_status = {}

        if "items" in nodeInfo:
            for node in nodeInfo["items"]:
                node_status = {}
                node_status["name"] = node["metadata"]["name"]
                node_status["labels"] = node["metadata"]["labels"]
                node_status["gpuType"] = ""

                node_status["scheduled_service"] = []
                for l, s in node_status["labels"].iteritems():
                    if s == "active" and l != "all" and l != "default":
                        node_status["scheduled_service"].append(l)
                    if l == "gpuType":
                        node_status["scheduled_service"].append(s)
                        node_status["gpuType"] = s

                if (gpuStr in node["status"]["allocatable"]):
                    node_status["gpu_allocatable"] = ResourceInfo({
                        node_status["gpuType"]:
                        int(node["status"]["allocatable"][gpuStr])
                    }).ToSerializable()
                else:
                    node_status["gpu_allocatable"] = ResourceInfo(
                    ).ToSerializable()
                if (gpuStr in node["status"]["capacity"]):
                    node_status["gpu_capacity"] = ResourceInfo({
                        node_status["gpuType"]:
                        int(node["status"]["capacity"][gpuStr])
                    }).ToSerializable()
                else:
                    node_status["gpu_capacity"] = ResourceInfo(
                    ).ToSerializable()
                node_status["gpu_used"] = ResourceInfo().ToSerializable()
                node_status["InternalIP"] = "unknown"
                node_status["pods"] = []
                if "annotations" in node["metadata"]:
                    if "node.alpha/DeviceInformation" in node["metadata"][
                            "annotations"]:
                        node_info = json.loads(
                            node["metadata"]["annotations"]
                            ["node.alpha/DeviceInformation"])
                        if (int(node_info["capacity"]["alpha.gpu/numgpu"]) >
                                ResourceInfo(
                                    node_status["gpu_capacity"]).TotalCount()):
                            node_status["gpu_capacity"] = ResourceInfo({
                                node_status["gpuType"]:
                                int(node_info["capacity"]["alpha.gpu/numgpu"])
                            }).ToSerializable()
                        if (int(node_info["allocatable"]["alpha.gpu/numgpu"]) >
                                ResourceInfo(node_status["gpu_allocatable"]
                                             ).TotalCount()):
                            node_status["gpu_allocatable"] = ResourceInfo({
                                node_status["gpuType"]:
                                int(node_info["allocatable"]
                                    ["alpha.gpu/numgpu"])
                            }).ToSerializable()

                if "addresses" in node["status"]:
                    for addr in node["status"]["addresses"]:
                        if addr["type"] == "InternalIP":
                            node_status["InternalIP"] = addr["address"]

                if "unschedulable" in node["spec"] and node["spec"][
                        "unschedulable"]:
                    node_status["unschedulable"] = True
                else:
                    node_status["unschedulable"] = False

                if "status" in node and "conditions" in node["status"]:
                    for condi in node["status"]["conditions"]:
                        if "type" in condi and condi[
                                "type"] == "Ready" and "status" in condi and condi[
                                    "status"] == "Unknown":
                            node_status["unschedulable"] = True

                nodes_status[node_status["name"]] = node_status

        output = k8sUtils.kubectl_exec(" get pods -o yaml")
        podsInfo = yaml.load(output)
        if "items" in podsInfo:
            for pod in podsInfo["items"]:
                gpus = 0
                username = None
                if "metadata" in pod and "labels" in pod[
                        "metadata"] and "userName" in pod["metadata"]["labels"]:
                    username = pod["metadata"]["labels"]["userName"]
                if "spec" in pod and "nodeName" in pod["spec"]:
                    node_name = pod["spec"]["nodeName"]
                    pod_name = pod["metadata"]["name"]
                    if username is not None:
                        pod_name += " : " + username
                    gpuUsage = get_job_gpu_usage(pod["metadata"]["name"])
                    if gpuUsage is not None:
                        pod_name += " (gpu usage:" + str(gpuUsage) + "%)"
                        if gpuUsage <= 25:
                            pod_name += "!!!!!!"
                    pod_info_cont = {}
                    pod_info_initcont = {}
                    if "annotations" in pod["metadata"]:
                        if "pod.alpha/DeviceInformation" in pod["metadata"][
                                "annotations"]:
                            pod_info = json.loads(
                                pod["metadata"]["annotations"]
                                ["pod.alpha/DeviceInformation"])
                            if "runningcontainer" in pod_info:
                                pod_info_cont = pod_info["runningcontainer"]
                            if "initcontainer" in pod_info:
                                pod_info_initcont = pod_info["initcontainer"]
                    if "containers" in pod["spec"]:
                        for container in pod["spec"]["containers"]:
                            containerGPUs = 0
                            if "resources" in container and "requests" in container[
                                    "resources"] and gpuStr in container[
                                        "resources"]["requests"]:
                                containerGPUs = int(
                                    container["resources"]["requests"][gpuStr])
                            if container["name"] in pod_info_cont:
                                if "requests" in pod_info_cont[container[
                                        "name"]] and "alpha.gpu/numgpu" in pod_info_cont[
                                            container["name"]]["requests"]:
                                    containerGPUs = max(
                                        int(pod_info_cont[container["name"]]
                                            ["requests"]["alpha.gpu/numgpu"]),
                                        containerGPUs)
                            gpus += containerGPUs
                            pod_name += " (gpu #:" + str(containerGPUs) + ")"

                    if node_name in nodes_status:
                        nodes_status[node_name]["gpu_used"] = ResourceInfo(
                            nodes_status[node_name]["gpu_used"]).Add(
                                ResourceInfo(
                                    {nodes_status[node_name]["gpuType"]:
                                     gpus})).ToSerializable()
                        nodes_status[node_name]["pods"].append(pod_name)

                        if username is not None:
                            if username not in user_status:
                                user_status[username] = ResourceInfo(
                                    {nodes_status[node_name]["gpuType"]: gpus})
                            else:
                                user_status[username].Add(
                                    ResourceInfo({
                                        nodes_status[node_name]["gpuType"]:
                                        gpus
                                    }))

        gpu_avaliable = ResourceInfo()
        gpu_reserved = ResourceInfo()
        gpu_capacity = ResourceInfo()
        gpu_unschedulable = ResourceInfo()
        gpu_schedulable = ResourceInfo()
        gpu_used = ResourceInfo()

        for node_name, node_status in nodes_status.iteritems():
            if node_status["unschedulable"]:
                gpu_unschedulable.Add(ResourceInfo(
                    node_status["gpu_capacity"]))
                gpu_reserved.Add(
                    ResourceInfo.Difference(
                        ResourceInfo(node_status["gpu_capacity"]),
                        ResourceInfo(node_status["gpu_used"])))
            else:
                gpu_avaliable.Add(
                    ResourceInfo.Difference(
                        ResourceInfo(node_status["gpu_allocatable"]),
                        ResourceInfo(node_status["gpu_used"])))
                gpu_schedulable.Add(ResourceInfo(node_status["gpu_capacity"]))
                gpu_unschedulable.Add(
                    ResourceInfo.Difference(
                        ResourceInfo(node_status["gpu_capacity"]),
                        ResourceInfo(node_status["gpu_allocatable"])))
                gpu_reserved.Add(
                    ResourceInfo.Difference(
                        ResourceInfo(node_status["gpu_capacity"]),
                        ResourceInfo(node_status["gpu_allocatable"])))

            gpu_used.Add(ResourceInfo(node_status["gpu_used"]))
            gpu_capacity.Add(ResourceInfo(node_status["gpu_capacity"]))

        cluster_status["user_status"] = []
        for user_name, user_gpu in user_status.iteritems():
            cluster_status["user_status"].append({
                "userName":
                user_name,
                "userGPU":
                user_gpu.ToSerializable()
            })

        cluster_status["gpu_avaliable"] = gpu_avaliable.ToSerializable()
        cluster_status["gpu_capacity"] = gpu_capacity.ToSerializable()
        cluster_status["gpu_unschedulable"] = gpu_unschedulable.ToSerializable(
        )
        cluster_status["gpu_used"] = gpu_used.ToSerializable()
        cluster_status["gpu_reserved"] = gpu_reserved.ToSerializable()
        cluster_status["node_status"] = [
            node_status for node_name, node_status in nodes_status.iteritems()
        ]

    except Exception as e:
        logging.exception("get cluster status")

    dataHandler = DataHandler()
    cluster_status["AvaliableJobNum"] = dataHandler.GetActiveJobsCount()

    if "cluster_status" in config and check_cluster_status_change(
            config["cluster_status"], cluster_status):
        logging.info("updating the cluster status...")
        dataHandler.UpdateClusterStatus(cluster_status)
    else:
        logging.info(
            "nothing changed in cluster, skipping the cluster status update..."
        )

    config["cluster_status"] = copy.deepcopy(cluster_status)
    dataHandler.Close()
    return cluster_status
Example #3
0
def TakeJobActions(jobs):
    dataHandler = DataHandler()
    vcList = dataHandler.ListVCs()
    clusterStatus, dummy = dataHandler.GetClusterStatus()
    dataHandler.Close()

    globalTotalRes = ResourceInfo(clusterStatus["gpu_capacity"])
    globalReservedRes = ResourceInfo(clusterStatus["gpu_unschedulable"])

    localResInfo = ResourceInfo()
    globalResInfo = ResourceInfo.Difference(globalTotalRes, globalReservedRes)

    for vc in vcList:
        vcTotalRes = ResourceInfo(json.loads(vc["quota"]), vc["vcName"])
        clusterTotalRes = ResourceInfo(clusterStatus["gpu_capacity"],
                                       vc["vcName"])
        clusterReservedRes = ResourceInfo(clusterStatus["gpu_unschedulable"],
                                          vc["vcName"])
        vcReservedRes = clusterReservedRes.GetFraction(vcTotalRes,
                                                       clusterTotalRes)
        localResInfo.Add(ResourceInfo.Difference(vcTotalRes, vcReservedRes))

    jobsInfo = []
    for job in jobs:
        if job["jobStatus"] == "queued" or job[
                "jobStatus"] == "scheduling" or job["jobStatus"] == "running":
            singleJobInfo = {}
            singleJobInfo["job"] = job
            singleJobInfo["jobParams"] = json.loads(
                base64.b64decode(job["jobParams"]))
            jobGpuType = "any"
            if "gpuType" in singleJobInfo["jobParams"]:
                jobGpuType = singleJobInfo["jobParams"]["gpuType"]
            singleJobInfo["localResInfo"] = ResourceInfo(
                {jobGpuType: GetJobTotalGpu(singleJobInfo["jobParams"])},
                job["vcName"])
            singleJobInfo["globalResInfo"] = ResourceInfo(
                {jobGpuType: GetJobTotalGpu(singleJobInfo["jobParams"])})
            singleJobInfo["sortKey"] = str(job["jobTime"])
            if singleJobInfo["jobParams"]["preemptionAllowed"]:
                singleJobInfo["sortKey"] = "1_" + singleJobInfo["sortKey"]
            else:
                singleJobInfo["sortKey"] = "0_" + singleJobInfo["sortKey"]
            singleJobInfo["allowed"] = False
            jobsInfo.append(singleJobInfo)

    jobsInfo.sort(key=JobInfoSorter)

    logging.info("TakeJobActions : local resources : %s" %
                 (localResInfo.CategoryToCountMap))
    logging.info("TakeJobActions : global resources : %s" %
                 (globalResInfo.CategoryToCountMap))

    for sji in jobsInfo:
        logging.info("TakeJobActions : job : %s : %s : %s" %
                     (sji["jobParams"]["jobName"],
                      sji["localResInfo"].CategoryToCountMap, sji["sortKey"]))
        if sji["jobParams"]["preemptionAllowed"]:
            localResInfo.UnblockResourceCategory(sji["localResInfo"])

        if (localResInfo.CanSatisfy(sji["localResInfo"])):
            localResInfo.Subtract(sji["localResInfo"])
            globalResInfo.Subtract(sji["globalResInfo"])
            sji["allowed"] = True
            logging.info("TakeJobActions : local assignment : %s : %s" %
                         (sji["jobParams"]["jobName"],
                          sji["localResInfo"].CategoryToCountMap))
        elif not sji["jobParams"]["preemptionAllowed"]:
            localResInfo.BlockResourceCategory(
                sji["localResInfo"])  #FIFO scheduling

    #logging.info("TakeJobActions : local resources : %s" % (localResInfo.CategoryToCountMap))
    #logging.info("TakeJobActions : global resources : %s" % (globalResInfo.CategoryToCountMap))

    for sji in jobsInfo:
        if (sji["jobParams"]["preemptionAllowed"] and sji["allowed"] == False):
            if globalResInfo.CanSatisfy(sji["globalResInfo"]):
                logging.info("TakeJobActions : job : %s : %s" %
                             (sji["jobParams"]["jobName"],
                              sji["globalResInfo"].CategoryToCountMap))
                # Strict FIFO policy not required for global (bonus) tokens since these jobs are anyway pre-emptible.
                globalResInfo.Subtract(sji["globalResInfo"])
                sji["allowed"] = True
                logging.info("TakeJobActions : global assignment : %s : %s" %
                             (sji["jobParams"]["jobName"],
                              sji["globalResInfo"].CategoryToCountMap))

    logging.info("TakeJobActions : global resources : %s" %
                 (globalResInfo.CategoryToCountMap))

    for sji in jobsInfo:
        if sji["job"]["jobStatus"] == "queued" and sji["allowed"] == True:
            SubmitJob(sji["job"])
            logging.info("TakeJobActions : submitting job : %s : %s : %s" %
                         (sji["jobParams"]["jobName"],
                          sji["jobParams"]["jobId"], sji["sortKey"]))
        elif sji["jobParams"]["preemptionAllowed"] and (
                sji["job"]["jobStatus"] == "scheduling"
                or sji["job"]["jobStatus"]
                == "running") and sji["allowed"] == False:
            KillJob(sji["job"], "queued")
            logging.info("TakeJobActions : pre-empting job : %s : %s : %s" %
                         (sji["jobParams"]["jobName"],
                          sji["jobParams"]["jobId"], sji["sortKey"]))

    logging.info("TakeJobActions : job desired actions taken")
def GetVC(userName, vcName):
    ret = None  

    clusterStatus, dummy = DataManager.GetClusterStatus()
    clusterTotalRes = ResourceInfo(clusterStatus["gpu_capacity"])
    clusterReservedRes = ResourceInfo(clusterStatus["gpu_unschedulable"])

    user_status = {}

    vcList =  DataManager.ListVCs()
    for vc in vcList:
        if vc["vcName"] == vcName and AuthorizationManager.HasAccess(userName, ResourceType.VC, vcName, Permission.User):
            vcTotalRes = ResourceInfo(json.loads(vc["quota"]))
            vcConsumedRes = ResourceInfo()
            jobs = DataManager.GetAllPendingJobs(vcName)
            for job in jobs:
                if job["jobStatus"] == "running":
                    username = job["userName"]
                    jobParam = json.loads(base64.b64decode(job["jobParams"]))
                    if "gpuType" in jobParam and not jobParam["preemptionAllowed"]:
                        vcConsumedRes.Add(ResourceInfo({jobParam["gpuType"] : GetJobTotalGpu(jobParam)}))
                        if username not in user_status:
                            user_status[username] = ResourceInfo()
                        user_status[username].Add(ResourceInfo({jobParam["gpuType"] : GetJobTotalGpu(jobParam)}))

            vcReservedRes = clusterReservedRes.GetFraction(vcTotalRes, clusterTotalRes)
            vcAvailableRes = ResourceInfo.Difference(ResourceInfo.Difference(vcTotalRes, vcConsumedRes), vcReservedRes)

            vc["gpu_capacity"] = vcTotalRes.ToSerializable()
            vc["gpu_used"] = vcConsumedRes.ToSerializable()
            vc["gpu_unschedulable"] = vcReservedRes.ToSerializable()
            vc["gpu_avaliable"] = vcAvailableRes.ToSerializable()
            vc["AvaliableJobNum"] = len(jobs)          
            vc["node_status"] = clusterStatus["node_status"]
            vc["user_status"] = []
            for user_name, user_gpu in user_status.iteritems():
                # TODO: job_manager.getAlias should be put in a util file
                user_name = user_name.split("@")[0].strip()
                vc["user_status"].append({"userName":user_name, "userGPU":user_gpu.ToSerializable()})

            ret = vc
            break
    return ret
Example #5
0
def GetVC(userName, vcName):
    ret = None

    data_handler = DataHandler()

    cluster_status, _ = data_handler.GetClusterStatus()
    cluster_total = cluster_status["gpu_capacity"]
    cluster_available = cluster_status["gpu_avaliable"]
    cluster_reserved = cluster_status["gpu_reserved"]

    user_status = collections.defaultdict(lambda: ResourceInfo())
    user_status_preemptable = collections.defaultdict(lambda: ResourceInfo())

    vc_list = getClusterVCs()
    vc_info = {}
    vc_usage = collections.defaultdict(
        lambda: collections.defaultdict(lambda: 0))
    vc_preemptable_usage = collections.defaultdict(
        lambda: collections.defaultdict(lambda: 0))

    for vc in vc_list:
        vc_info[vc["vcName"]] = json.loads(vc["quota"])

    active_job_list = data_handler.GetActiveJobList()
    for job in active_job_list:
        jobParam = json.loads(base64.b64decode(job["jobParams"]))
        if "gpuType" in jobParam:
            if not jobParam["preemptionAllowed"]:
                vc_usage[job["vcName"]][jobParam["gpuType"]] += GetJobTotalGpu(
                    jobParam)
            else:
                vc_preemptable_usage[job["vcName"]][
                    jobParam["gpuType"]] += GetJobTotalGpu(jobParam)

    result = quota.calculate_vc_gpu_counts(cluster_total, cluster_available,
                                           cluster_reserved, vc_info, vc_usage)

    vc_total, vc_used, vc_available, vc_unschedulable = result

    for vc in vc_list:
        if vc["vcName"] == vcName and AuthorizationManager.HasAccess(
                userName, ResourceType.VC, vcName, Permission.User):

            num_active_jobs = 0
            for job in active_job_list:
                if job["vcName"] == vcName and job["jobStatus"] == "running":
                    num_active_jobs += 1
                    username = job["userName"]
                    jobParam = json.loads(base64.b64decode(job["jobParams"]))
                    if "gpuType" in jobParam:
                        if not jobParam["preemptionAllowed"]:
                            if username not in user_status:
                                user_status[username] = ResourceInfo()
                            user_status[username].Add(
                                ResourceInfo({
                                    jobParam["gpuType"]:
                                    GetJobTotalGpu(jobParam)
                                }))
                        else:
                            if username not in user_status_preemptable:
                                user_status_preemptable[
                                    username] = ResourceInfo()
                            user_status_preemptable[username].Add(
                                ResourceInfo({
                                    jobParam["gpuType"]:
                                    GetJobTotalGpu(jobParam)
                                }))

            vc["gpu_capacity"] = vc_total[vcName]
            vc["gpu_used"] = vc_used[vcName]
            vc["gpu_preemptable_used"] = vc_preemptable_usage[vcName]
            vc["gpu_unschedulable"] = vc_unschedulable[vcName]
            vc["gpu_avaliable"] = vc_available[vcName]
            vc["AvaliableJobNum"] = num_active_jobs
            vc["node_status"] = cluster_status["node_status"]
            vc["user_status"] = []
            for user_name, user_gpu in user_status.iteritems():
                # TODO: job_manager.getAlias should be put in a util file
                user_name = user_name.split("@")[0].strip()
                vc["user_status"].append({
                    "userName": user_name,
                    "userGPU": user_gpu.ToSerializable()
                })

            vc["user_status_preemptable"] = []
            for user_name, user_gpu in user_status_preemptable.iteritems():
                user_name = user_name.split("@")[0].strip()
                vc["user_status_preemptable"].append({
                    "userName":
                    user_name,
                    "userGPU":
                    user_gpu.ToSerializable()
                })

            try:
                gpu_idle_url = config["gpu_reporter"] + '/gpu_idle'
                gpu_idle_params = {"vc": vcName}
                gpu_idle_response = requests.get(gpu_idle_url,
                                                 params=gpu_idle_params)
                gpu_idle_json = gpu_idle_response.json()
                vc["gpu_idle"] = gpu_idle_json
            except Exception:
                logger.exception("Failed to fetch gpu_idle from gpu-exporter")

            ret = vc
            break
    return ret
Example #6
0
def TakeJobActions(data_handler, redis_conn, launcher, jobs):
    vc_list = data_handler.ListVCs()
    cluster_status, _ = data_handler.GetClusterStatus()
    cluster_total = cluster_status["gpu_capacity"]
    cluster_available = cluster_status["gpu_avaliable"]
    cluster_reserved = cluster_status["gpu_reserved"]

    vc_info = {}
    vc_usage = collections.defaultdict(
        lambda: collections.defaultdict(lambda: 0))

    for vc in vc_list:
        vc_info[vc["vcName"]] = json.loads(vc["quota"])

    active_job_list = data_handler.GetActiveJobList()
    for job in active_job_list:
        jobParam = json.loads(base64.b64decode(job["jobParams"]))
        if "gpuType" in jobParam:
            vc_usage[job["vcName"]][jobParam["gpuType"]] += GetJobTotalGpu(
                jobParam)

    result = quota.calculate_vc_gpu_counts(cluster_total, cluster_available,
                                           cluster_reserved, vc_info, vc_usage)
    vc_total, vc_used, vc_available, vc_unschedulable = result

    cluster_gpu_capacity = cluster_status["gpu_capacity"]
    cluster_gpu_unschedulable = cluster_status["gpu_unschedulable"]
    global_total = ResourceInfo(cluster_gpu_capacity)
    global_unschedulable = ResourceInfo(cluster_gpu_unschedulable)

    vc_resources = {}
    globalResInfo = ResourceInfo.Difference(global_total, global_unschedulable)

    priority_dict = get_priority_dict()
    logging.info("Job priority dict: {}".format(priority_dict))

    for vc in vc_list:
        vc_name = vc["vcName"]
        vc_schedulable = {}
        for gpu_type, total in vc_total[vc_name].items():
            vc_schedulable[
                gpu_type] = total - vc_unschedulable[vc_name][gpu_type]
        vc_resources[vc_name] = ResourceInfo(vc_schedulable)

    jobsInfo = []
    for job in jobs:
        if job["jobStatus"] in ["queued", "scheduling", "running"]:
            singleJobInfo = {}
            singleJobInfo["job"] = job
            job_params = json.loads(base64.b64decode(job["jobParams"]))
            singleJobInfo["preemptionAllowed"] = job_params[
                "preemptionAllowed"]
            singleJobInfo["jobId"] = job_params["jobId"]
            jobGpuType = "any"
            if "gpuType" in job_params:
                jobGpuType = job_params["gpuType"]
            singleJobInfo["globalResInfo"] = ResourceInfo(
                {jobGpuType: GetJobTotalGpu(job_params)})

            # Job lists will be sorted based on and in the order of below
            # 1. non-preemptible precedes preemptible
            # 2. running precedes scheduling, precedes queued
            # 3. larger priority value precedes lower priority value
            # 4. early job time precedes later job time

            # Non-Preemptible jobs first
            preemptible = 1 if singleJobInfo["preemptionAllowed"] else 0

            # Job status
            job_status = 0
            if job["jobStatus"] == "scheduling":
                job_status = 1
            elif job["jobStatus"] == "queued":
                job_status = 2

            # Priority value
            reverse_priority = get_job_priority(priority_dict,
                                                singleJobInfo["jobId"])
            priority = 999999 - reverse_priority

            # Job time
            job_time = str(job["jobTime"])

            singleJobInfo["sortKey"] = "{}_{}_{:06d}_{}".format(
                preemptible, job_status, priority, job_time)

            singleJobInfo["allowed"] = False
            jobsInfo.append(singleJobInfo)

    jobsInfo.sort(key=lambda x: x["sortKey"])

    logging.info("TakeJobActions : local resources : %s" % (vc_resources))
    logging.info("TakeJobActions : global resources : %s" %
                 (globalResInfo.CategoryToCountMap))

    for sji in jobsInfo:
        logging.info("TakeJobActions : job : %s : %s : %s" %
                     (sji["jobId"], sji["globalResInfo"].CategoryToCountMap,
                      sji["sortKey"]))
        vc_name = sji["job"]["vcName"]
        vc_resource = vc_resources[vc_name]

        if (not sji["preemptionAllowed"]) and (vc_resource.CanSatisfy(
                sji["globalResInfo"])):
            vc_resource.Subtract(sji["globalResInfo"])
            globalResInfo.Subtract(sji["globalResInfo"])
            sji["allowed"] = True
            logging.info(
                "TakeJobActions : local assignment : %s : %s" %
                (sji["jobId"], sji["globalResInfo"].CategoryToCountMap))

    for sji in jobsInfo:
        if sji["preemptionAllowed"] and (sji["allowed"] is False):
            if globalResInfo.CanSatisfy(sji["globalResInfo"]):
                logging.info(
                    "TakeJobActions : job : %s : %s" %
                    (sji["jobId"], sji["globalResInfo"].CategoryToCountMap))
                # Strict FIFO policy not required for global (bonus) tokens since these jobs are anyway pre-emptible.
                globalResInfo.Subtract(sji["globalResInfo"])
                sji["allowed"] = True
                logging.info(
                    "TakeJobActions : global assignment : %s : %s" %
                    (sji["jobId"], sji["globalResInfo"].CategoryToCountMap))

    logging.info("TakeJobActions : global resources : %s" %
                 (globalResInfo.CategoryToCountMap))

    for sji in jobsInfo:
        try:
            if sji["job"]["jobStatus"] == "queued" and (sji["allowed"] is
                                                        True):
                launcher.submit_job(sji["job"])
                update_job_state_latency(redis_conn, sji["jobId"],
                                         "scheduling")
                logging.info("TakeJobActions : submitting job : %s : %s" %
                             (sji["jobId"], sji["sortKey"]))
            elif sji["preemptionAllowed"] and (
                    sji["job"]["jobStatus"] == "scheduling"
                    or sji["job"]["jobStatus"]
                    == "running") and (sji["allowed"] is False):
                launcher.kill_job(sji["job"]["jobId"], "queued")
                logging.info("TakeJobActions : pre-empting job : %s : %s" %
                             (sji["jobId"], sji["sortKey"]))
        except Exception as e:
            logging.error("Process job failed {}".format(sji["job"]),
                          exc_info=True)

    logging.info("TakeJobActions : job desired actions taken")