Example #1
0
def create_fake_compute_instances(handle, parentIds, ciCount):
    fakeCIMap = {}
    if ciCount > 0:
        cfe = dcgm_structs_internal.c_dcgmCreateFakeEntities_v2()
        instanceIndex = 0
        for i in range(0, ciCount):
            cfe.entityList[
                cfe.
                numToCreate].parent.entityGroupId = dcgm_fields.DCGM_FE_GPU_I
            if instanceIndex > len(parentIds):
                instanceIndex = 0
            cfe.entityList[
                cfe.numToCreate].parent.entityId = parentIds[instanceIndex]
            instanceIndex = instanceIndex + 1
            cfe.entityList[
                cfe.
                numToCreate].entity.entityGroupId = dcgm_fields.DCGM_FE_GPU_CI
            cfe.numToCreate += 1

        updated = dcgm_agent_internal.dcgmCreateFakeEntities(handle, cfe)
        for i in range(0, updated.numToCreate):
            if updated.entityList[
                    i].entity.entityGroupId == dcgm_fields.DCGM_FE_GPU_CI:
                fakeCIMap[updated.entityList[i].entity.
                          entityId] = updated.entityList[i].parent.entityId

    return fakeCIMap
Example #2
0
def create_fake_gpu_instances(handle, gpuIds, instanceCount):
    cfe = dcgm_structs_internal.c_dcgmCreateFakeEntities_v2()
    cfe.numToCreate = 0
    fakeInstanceMap = {}

    if instanceCount > 0:
        for i in range(0, instanceCount):
            cfe.entityList[
                cfe.numToCreate].parent.entityGroupId = dcgm_fields.DCGM_FE_GPU
            gpuListIndex = cfe.numToCreate % len(gpuIds)
            cfe.entityList[
                cfe.numToCreate].parent.entityId = gpuIds[gpuListIndex]
            cfe.entityList[
                cfe.
                numToCreate].entity.entityGroupId = dcgm_fields.DCGM_FE_GPU_I
            cfe.numToCreate += 1

        # Create the instances first so we can control which GPU the compute instances are placed on
        updated = dcgm_agent_internal.dcgmCreateFakeEntities(handle, cfe)
        for i in range(0, updated.numToCreate):
            if updated.entityList[
                    i].entity.entityGroupId == dcgm_fields.DCGM_FE_GPU_I:
                fakeInstanceMap[updated.entityList[
                    i].entity.entityId] = updated.entityList[i].parent.entityId

    return fakeInstanceMap
Example #3
0
def create_fake_gpus(handle, gpuCount):
    cfe = dcgm_structs_internal.c_dcgmCreateFakeEntities_v2()
    cfe.numToCreate = 0
    fakeGpuList = []

    for i in range(0, gpuCount):
        cfe.entityList[
            cfe.numToCreate].entity.entityGroupId = dcgm_fields.DCGM_FE_GPU
        cfe.numToCreate += 1

    updated = dcgm_agent_internal.dcgmCreateFakeEntities(handle, cfe)
    for i in range(0, updated.numToCreate):
        if updated.entityList[
                i].entity.entityGroupId == dcgm_fields.DCGM_FE_GPU:
            fakeGpuList.append(updated.entityList[i].entity.entityId)

    return fakeGpuList
Example #4
0
def ensure_instance_ids(handle, gpuId, minInstances, minCIs):
    hierarchy = dcgm_agent.dcgmGetGpuInstanceHierarchy(handle)
    legalGpu = False
    instanceMap = {}
    ciMap = {}
    legalInstances = []

    for i in range(0, hierarchy.count):
        entity = hierarchy.entityList[i]
        if entity.entity.entityGroupId == dcgm_fields.DCGM_FE_GPU_I:
            if entity.parent.entityId == gpuId:
                legalGpu = True
                instanceMap[entity.entity.entityId] = entity.parent.entityId
            else:
                legalGpu = False
        elif entity.entity.entityGroupId == dcgm_fields.DCGM_FE_GPU_CI and legalGpu:
            ciMap[entity.entity.entityId] = entity.parent.entityId
            legalInstances.append(entity.parent.entityId)

    instancesNeeded = minInstances - len(instanceMap)
    cisNeeded = minCIs - len(ciMap)

    cfe = dcgm_structs_internal.c_dcgmCreateFakeEntities_v2()
    cfe.numToCreate = 0

    if instancesNeeded > 0:
        for i in range(0, instancesNeeded):
            cfe.entityList[
                cfe.numToCreate].parent.entityGroupId = dcgm_fields.DCGM_FE_GPU
            cfe.entityList[cfe.numToCreate].parent.entityId = gpuId
            cfe.entityList[
                cfe.
                numToCreate].entity.entityGroupId = dcgm_fields.DCGM_FE_GPU_I
            cfe.numToCreate += 1

        # Create the instances first so we can control which GPU the compute instances are placed on
        updated = dcgm_agent_internal.dcgmCreateFakeEntities(handle, cfe)
        for i in range(0, updated.numToCreate):
            if updated.entityList[
                    i].entity.entityGroupId == dcgm_fields.DCGM_FE_GPU_I:
                instanceMap[updated.entityList[i].entity.
                            entityId] = updated.entityList[i].parent.entityId
                legalInstances.append(updated.entityList[i].entity.entityId)

    if cisNeeded > 0:
        cfe = dcgm_structs_internal.c_dcgmCreateFakeEntities_v2()
        for i in range(0, cisNeeded):
            cfe.entityList[
                cfe.
                numToCreate].parent.entityGroupId = dcgm_fields.DCGM_FE_GPU_I
            instanceIndex = i
            if instanceIndex > len(legalInstances):
                instanceIndex = 0
            cfe.entityList[cfe.numToCreate].parent.entityId = legalInstances[
                instanceIndex]
            cfe.entityList[
                cfe.
                numToCreate].entity.entityGroupId = dcgm_fields.DCGM_FE_GPU_CI
            cfe.numToCreate += 1

        updated = dcgm_agent_internal.dcgmCreateFakeEntities(handle, cfe)
        for i in range(0, updated.numToCreate):
            if updated.entityList[
                    i].entity.entityGroupId == dcgm_fields.DCGM_FE_GPU_CI:
                ciMap[updated.entityList[i].entity.
                      entityId] = updated.entityList[i].parent.entityId

    return instanceMap, ciMap