Ejemplo n.º 1
0
def dcgmFieldGroupGetInfo(dcgm_handle, fieldGroupId):
    c_fieldGroupInfo = dcgm_structs.c_dcgmFieldGroupInfo_v1()
    c_fieldGroupInfo.version = dcgm_structs.dcgmFieldGroupInfo_version1
    c_fieldGroupInfo.fieldGroupId = fieldGroupId
    fn = dcgmFP("dcgmFieldGroupGetInfo")
    ret = fn(dcgm_handle, byref(c_fieldGroupInfo))
    dcgm_structs._dcgmCheckReturn(ret)
    return c_fieldGroupInfo
Ejemplo n.º 2
0
def dcgmGetValuesSince_v2(dcgm_handle, groupId, fieldGroupId, sinceTimestamp,
                          enumCB, userData):
    fn = dcgmFP("dcgmGetValuesSince_v2")
    c_nextSinceTimestamp = c_int64()
    ret = fn(dcgm_handle, groupId, fieldGroupId, c_int64(sinceTimestamp),
             byref(c_nextSinceTimestamp), enumCB, py_object(userData))
    dcgm_structs._dcgmCheckReturn(ret)
    return c_nextSinceTimestamp.value
Ejemplo n.º 3
0
def dcgmGetAllDevices(dcgm_handle):
    c_count = c_uint()
    gpuid_list = c_uint * dcgm_structs.DCGM_MAX_NUM_DEVICES
    c_gpuid_list = gpuid_list()
    fn = dcgmFP("dcgmGetAllDevices")
    ret = fn(dcgm_handle, c_gpuid_list, byref(c_count))
    dcgm_structs._dcgmCheckReturn(ret)
    return [c_gpuid_list[i] for i in range(c_count.value)[0:int(c_count.value)]]
Ejemplo n.º 4
0
def dcgmProfUnwatchFields(dcgmHandle, groupId):
    msg = dcgm_structs.c_dcgmProfUnwatchFields_v1()
    msg.version = dcgm_structs.dcgmProfUnwatchFields_version1
    msg.groupId = groupId
    fn = dcgmFP("dcgmProfUnwatchFields")
    ret = fn(dcgmHandle, byref(msg))
    dcgm_structs._dcgmCheckReturn(ret)
    return msg
Ejemplo n.º 5
0
def dcgmGroupGetAllIds(dcgmHandle):
    fn = dcgmFP("dcgmGroupGetAllIds")
    c_count = c_uint()
    groupIdList = c_void_p * dcgm_structs.DCGM_MAX_NUM_GROUPS
    c_groupIdList = groupIdList()
    ret = fn(dcgmHandle, c_groupIdList, byref(c_count))
    dcgm_structs._dcgmCheckReturn(ret)
    return map(None, c_groupIdList[0:int(c_count.value)])
Ejemplo n.º 6
0
def dcgmProfGetSupportedMetricGroups(dcgmHandle, groupId):
    msg = dcgm_structs.c_dcgmProfGetMetricGroups_v2()
    msg.version = dcgm_structs.dcgmProfGetMetricGroups_version1
    msg.groupId = groupId
    fn = dcgmFP("dcgmProfGetSupportedMetricGroups")
    ret = fn(dcgmHandle, byref(msg))
    dcgm_structs._dcgmCheckReturn(ret)
    return msg
Ejemplo n.º 7
0
def dcgmDeleteMigEntity(dcgm_handle, entityGroupId, entityId, flags):
    fn = dcgmFP("dcgmDeleteMigEntity")
    dme = dcgm_structs.c_dcgmDeleteMigEntity_v1()
    dme.version = dcgm_structs.c_dcgmDeleteMigEntity_version1
    dme.entityGroupId = entityGroupId
    dme.entityId = entityId
    dme.flags = flags
    ret = fn(dcgm_handle, byref(dme))
    dcgm_structs._dcgmCheckReturn(ret)
Ejemplo n.º 8
0
def dcgmIntrospectGetHostengineCpuUtilization(dcgm_handle, waitIfNoData=True):
    fn = dcgmFP("dcgmIntrospectGetHostengineCpuUtilization")

    cpuUtil = dcgm_structs.c_dcgmIntrospectCpuUtil_v1()
    cpuUtil.version = dcgm_structs.dcgmIntrospectCpuUtil_version1

    ret = fn(dcgm_handle, byref(cpuUtil), waitIfNoData)
    dcgm_structs._dcgmCheckReturn(ret)
    return cpuUtil
Ejemplo n.º 9
0
def dcgmIntrospectGetHostengineMemoryUsage(dcgm_handle, waitIfNoData=True):
    fn = dcgmFP("dcgmIntrospectGetHostengineMemoryUsage")

    memInfo = dcgm_structs.c_dcgmIntrospectMemory_v1()
    memInfo.version = dcgm_structs.dcgmIntrospectMemory_version1

    ret = fn(dcgm_handle, byref(memInfo), waitIfNoData)
    dcgm_structs._dcgmCheckReturn(ret)
    return memInfo
Ejemplo n.º 10
0
def dcgmConnect_v2(ip_address,
                   connectParams,
                   version=dcgm_structs.c_dcgmConnectV2Params_version):
    connectParams.version = version
    dcgm_handle = c_void_p()
    fn = dcgmFP("dcgmConnect_v2")
    ret = fn(ip_address, byref(connectParams), byref(dcgm_handle))
    dcgm_structs._dcgmCheckReturn(ret)
    return dcgm_handle
Ejemplo n.º 11
0
def dcgmJobGetStats(dcgm_handle, jobid):
    fn = dcgmFP("dcgmJobGetStats")
    jobInfo = dcgm_structs.c_dcgmJobInfo_v3()

    jobInfo.version = dcgm_structs.dcgmJobInfo_version3

    ret = fn(dcgm_handle, jobid, byref(jobInfo))
    dcgm_structs._dcgmCheckReturn(ret)
    return jobInfo
Ejemplo n.º 12
0
def dcgmEntityGetLatestValues(dcgmHandle, entityGroup, entityId, fieldIds):
    fn = dcgmFP("dcgmEntityGetLatestValues")
    field_values = (dcgm_structs.c_dcgmFieldValue_v1 * len(fieldIds))()
    id_values = (c_uint16 * len(fieldIds))(*fieldIds)
    ret = fn(dcgmHandle, c_uint(entityGroup),
             dcgm_fields.c_dcgm_field_eid_t(entityId), id_values,
             c_uint(len(fieldIds)), field_values)
    dcgm_structs._dcgmCheckReturn(ret)
    return field_values
Ejemplo n.º 13
0
def dcgmFieldGroupCreate(dcgm_handle, fieldIds, fieldGroupName):
    c_field_group_id = c_void_p()
    c_num_field_ids = c_int32(len(fieldIds))
    c_field_ids = (c_uint16 * len(fieldIds))(*fieldIds)
    fn = dcgmFP("dcgmFieldGroupCreate")
    ret = fn(dcgm_handle, c_num_field_ids, byref(c_field_ids), fieldGroupName,
             byref(c_field_group_id))
    dcgm_structs._dcgmCheckReturn(ret)
    return c_field_group_id
Ejemplo n.º 14
0
def dcgmGetEntityGroupEntities(dcgm_handle, entityGroup, flags):
    capacity = dcgm_structs.DCGM_GROUP_MAX_ENTITIES
    c_count = c_int32(capacity)
    entityIds = c_uint32 * capacity
    c_entityIds = entityIds()
    fn = dcgmFP("dcgmGetEntityGroupEntities")
    ret = fn(dcgm_handle, entityGroup, c_entityIds, byref(c_count), flags)
    dcgm_structs._dcgmCheckReturn(ret)
    return c_entityIds[0:int(c_count.value)]
Ejemplo n.º 15
0
def dcgmGetPidInfo(dcgm_handle, groupId, pid):
    fn = dcgmFP("dcgmGetPidInfo")
    pidInfo = dcgm_structs.c_dcgmPidInfo_v2()

    pidInfo.version = dcgm_structs.dcgmPidInfo_version2
    pidInfo.pid = pid

    ret = fn(dcgm_handle, groupId, byref(pidInfo))
    dcgm_structs._dcgmCheckReturn(ret)
    return pidInfo
Ejemplo n.º 16
0
def dcgmCreateMigEntity(dcgm_handle, parentId, profile, createOption, flags):
    fn = dcgmFP("dcgmCreateMigEntity")
    cme = dcgm_structs.c_dcgmCreateMigEntity_v1()
    cme.version = dcgm_structs.c_dcgmCreateMigEntity_version1
    cme.parentId = parentId
    cme.createOption = createOption
    cme.profile = profile
    cme.flags = flags
    ret = fn(dcgm_handle, byref(cme))
    dcgm_structs._dcgmCheckReturn(ret)
Ejemplo n.º 17
0
def dcgmEntitiesGetLatestValues(dcgmHandle, entities, fieldIds, flags):
    fn = dcgmFP("dcgmEntitiesGetLatestValues")
    numFvs = len(fieldIds) * len(entities)
    field_values = (dcgm_structs.c_dcgmFieldValue_v2 * numFvs)()
    entities_values = (dcgm_structs.c_dcgmGroupEntityPair_t *
                       len(entities))(*entities)
    field_id_values = (c_uint16 * len(fieldIds))(*fieldIds)
    ret = fn(dcgmHandle, entities_values, c_uint(len(entities)),
             field_id_values, c_uint(len(fieldIds)), flags, field_values)
    dcgm_structs._dcgmCheckReturn(ret)
    return field_values
Ejemplo n.º 18
0
def dcgmIntrospectGetFieldsMemoryUsage(dcgm_handle,
                                       introspectContext,
                                       waitIfNoData=True):
    fn = dcgmFP("dcgmIntrospectGetFieldsMemoryUsage")

    memInfo = dcgm_structs.c_dcgmIntrospectFullMemory_v1()
    memInfo.version = dcgm_structs.dcgmIntrospectFullMemory_version1

    ret = fn(dcgm_handle, byref(introspectContext), byref(memInfo),
             waitIfNoData)
    dcgm_structs._dcgmCheckReturn(ret)
    return memInfo
Ejemplo n.º 19
0
def dcgmHealthCheck(dcgm_handle,
                    groupId,
                    version=dcgm_structs.dcgmHealthResponse_version4):
    if version != dcgm_structs.dcgmHealthResponse_version4:
        dcgm_structs._dcgmCheckReturn(dcgm_structs.DCGM_ST_VER_MISMATCH)

    c_results = dcgm_structs.c_dcgmHealthResponse_v4()
    c_results.version = dcgm_structs.dcgmHealthResponse_version4
    fn = dcgmFP("dcgmHealthCheck")
    ret = fn(dcgm_handle, groupId, byref(c_results))
    dcgm_structs._dcgmCheckReturn(ret)
    return c_results
Ejemplo n.º 20
0
def dcgmPolicyGet(dcgm_handle, group_id, count, status_handle):
    fn = dcgmFP("dcgmPolicyGet")
    policy_array = count * dcgm_structs.c_dcgmPolicy_v1

    c_policy_values = policy_array()

    for index in range(0, count):
        c_policy_values[index].version = dcgm_structs.dcgmPolicy_version1

    ret = fn(dcgm_handle, group_id, count, c_policy_values, status_handle)
    dcgm_structs._dcgmCheckReturn(ret)
    return c_policy_values[0:count]
Ejemplo n.º 21
0
def dcgmIntrospectGetFieldsExecTime(dcgm_handle,
                                    introspectContext,
                                    waitIfNoData=True):
    fn = dcgmFP("dcgmIntrospectGetFieldsExecTime")

    execTime = dcgm_structs.c_dcgmIntrospectFullFieldsExecTime_v2()
    execTime.version = dcgm_structs.dcgmIntrospectFullFieldsExecTime_version2

    ret = fn(dcgm_handle, byref(introspectContext), byref(execTime),
             waitIfNoData)
    dcgm_structs._dcgmCheckReturn(ret)
    return execTime
Ejemplo n.º 22
0
def dcgmHealthSet_v2(dcgm_handle, groupId, systems, updateInterval, maxKeepAge):
    params = dcgm_structs.c_dcgmHealthSetParams_v2()
    params.version = dcgm_structs.dcgmHealthSetParams_version2
    params.groupId = groupId
    params.systems = systems
    params.updateInterval = updateInterval
    params.maxKeepAge = maxKeepAge

    fn = dcgmFP("dcgmHealthSet_v2")
    ret = fn(dcgm_handle, byref(params))
    dcgm_structs._dcgmCheckReturn(ret)
    return ret
Ejemplo n.º 23
0
def dcgmConfigGet(dcgm_handle, group_id, reqCfgType, count, status_handle):
    fn = dcgmFP("dcgmConfigGet")

    config_values_array = count * dcgm_structs.c_dcgmDeviceConfig_v1
    c_config_values = config_values_array()

    for index in range(0, count):
        c_config_values[index].version = dcgm_structs.dcgmDeviceConfig_version1

    ret = fn(dcgm_handle, group_id, reqCfgType, count, c_config_values,
             status_handle)
    dcgm_structs._dcgmCheckReturn(ret)
    return map(None, c_config_values[0:count])
Ejemplo n.º 24
0
    def GetLatestValues(self, fieldGroup):
        """
        Get the latest values for a fieldGroup and store them to the
        .values member variable

        Note: This class does not automatically watch fieldGroup. You must do
        that ahead of time with dcgmGroup.samples.WatchFields()
        """
        ret = dcgm_agent.dcgmGetLatestValues_v2(
            self._handle, self._groupId, fieldGroup.fieldGroupId,
            helper_dcgm_field_values_since_entity_callback, self)
        # Will throw exception on error
        dcgm_structs._dcgmCheckReturn(ret)
Ejemplo n.º 25
0
def dcgmGetFieldSummary(dcgmHandle, fieldId, entityGroupType, entityId,
                        summaryMask, startTime, endTime):
    fn = dcgmFP("dcgmGetFieldSummary")
    request = dcgm_structs.c_dcgmFieldSummaryRequest_v1()
    request.version = dcgm_structs.dcgmFieldSummaryRequest_version1
    request.fieldId = fieldId
    request.entityGroupType = entityGroupType
    request.entityId = entityId
    request.summaryTypeMask = summaryMask
    request.startTime = startTime
    request.endTime = endTime
    ret = fn(dcgmHandle, byref(request))
    dcgm_structs._dcgmCheckReturn(ret)
    return request
Ejemplo n.º 26
0
def helperDiagCheckReturn(ret, response):
    try:
        dcgm_structs._dcgmCheckReturn(ret)
    except dcgm_structs.DCGMError as e:
        if response.systemError.msg != "":
            # Add systemError information to the raised exception.
            import sys
            info = "%s" % response.systemError.msg
            e.SetAdditionalInfo(info)
            raise e  # pylint: disable=E0710
        else:
            raise

    return response
Ejemplo n.º 27
0
def dcgmGroupGetInfo(dcgm_handle,
                     group_id,
                     version=dcgm_structs.c_dcgmGroupInfo_version2):
    fn = dcgmFP("dcgmGroupGetInfo")

    # support the old version of the request since the host engine does
    if version == dcgm_structs.c_dcgmGroupInfo_version2:
        device_values = dcgm_structs.c_dcgmGroupInfo_v2()
        device_values.version = dcgm_structs.c_dcgmGroupInfo_version2
    else:
        dcgm_structs._dcgmCheckReturn(dcgm_structs.DCGM_ST_VER_MISMATCH)

    ret = fn(dcgm_handle, group_id, byref(device_values))
    dcgm_structs._dcgmCheckReturn(ret)
    return device_values
Ejemplo n.º 28
0
def dcgmProfWatchFields(dcgmHandle, fieldIds, groupId, updateFreq, maxKeepAge,
                        maxKeepSamples):
    msg = dcgm_structs.c_dcgmProfWatchFields_v1()
    msg.version = dcgm_structs.dcgmProfWatchFields_version1
    msg.groupId = groupId
    msg.updateFreq = updateFreq
    msg.maxKeepAge = maxKeepAge
    msg.maxKeepSamples = maxKeepSamples
    msg.numFieldIds = c_uint32(len(fieldIds))
    for i, fieldId in enumerate(fieldIds):
        msg.fieldIds[i] = fieldId

    fn = dcgmFP("dcgmProfWatchFields")
    ret = fn(dcgmHandle, byref(msg))
    dcgm_structs._dcgmCheckReturn(ret)
    return msg
Ejemplo n.º 29
0
    def _WatchFieldGroup(self):
        """
        Initiate the host engine watch on the fields
        """
        ret = dcgm_agent.dcgmWatchFields(self._handle, self._groupId,
                                         self._fieldGroup.fieldGroupId,
                                         self._updateFreq, self._maxKeepAge,
                                         self._maxKeepSamples)
        # Will throw exception on error
        dcgm_structs._dcgmCheckReturn(ret)

        # Force an update of the fields so that we can fetch initial values
        ret = dcgm_agent.dcgmUpdateAllFields(self._handle, 1)
        # Will throw exception on error
        dcgm_structs._dcgmCheckReturn(ret)
        # initial update will fetch from startTimestamp
        self.GetMore()
Ejemplo n.º 30
0
    def GetMore(self):
        """
        Method to cause more field values to be retrieved from DCGM. Returns
        the number of field values that were retrieved
        """
        beforeCount = self._numValuesSeen

        # If we're in manual mode, force an update
        if self._oprationMode == dcgm_structs.DCGM_OPERATION_MODE_MANUAL:
            ret = dcgm_agent.dcgmUpdateAllFields(self._handle, 1)
            # Will throw exception on error
            dcgm_structs._dcgmCheckReturn(ret)

        self._nextSinceTimestamp = dcgm_agent.dcgmGetValuesSince_v2(
            self._handle, self._groupId, self._fieldGroup.fieldGroupId,
            self._nextSinceTimestamp,
            helper_dcgm_field_values_since_entity_callback, self)
        afterCount = self._numValuesSeen
        return afterCount - beforeCount