def helper_test_inject_instance_fields(handle, gpuIds): instances, cis = ensure_instance_ids(handle, gpuIds[0], 1, 1) firstInstanceId = instances.keys()[0] lastCIId = cis.keys()[0] # Set up the watches on these groups groupId = dcgm_agent.dcgmGroupCreate(handle, dcgm_structs.DCGM_GROUP_EMPTY, 'tien') fieldGroupId = dcgm_agent.dcgmFieldGroupCreate( handle, [dcgm_fields.DCGM_FI_DEV_ECC_DBE_VOL_TOTAL], 'kal') dcgm_agent.dcgmGroupAddEntity(handle, groupId, dcgm_fields.DCGM_FE_GPU, gpuIds[0]) dcgm_agent.dcgmGroupAddEntity(handle, groupId, dcgm_fields.DCGM_FE_GPU_I, firstInstanceId) dcgm_agent.dcgmGroupAddEntity(handle, groupId, dcgm_fields.DCGM_FE_GPU_CI, lastCIId) dcgm_agent.dcgmWatchFields(handle, groupId, fieldGroupId, 1, 100, 100) dcgm_internal_helpers.inject_value( handle, gpuIds[0], dcgm_fields.DCGM_FI_DEV_ECC_DBE_VOL_TOTAL, 2, 5, isInt=True, verifyInsertion=True, entityType=dcgm_fields.DCGM_FE_GPU) # Read the values to make sure they were stored properly entities = [ dcgm_structs.c_dcgmGroupEntityPair_t(), dcgm_structs.c_dcgmGroupEntityPair_t(), dcgm_structs.c_dcgmGroupEntityPair_t() ] entities[0].entityGroupId = dcgm_fields.DCGM_FE_GPU_I entities[0].entityId = firstInstanceId entities[1].entityGroupId = dcgm_fields.DCGM_FE_GPU_CI entities[1].entityId = lastCIId entities[2].entityGroupId = dcgm_fields.DCGM_FE_GPU entities[2].entityId = gpuIds[0] fieldIds = [dcgm_fields.DCGM_FI_DEV_ECC_DBE_VOL_TOTAL] values = dcgm_agent.dcgmEntitiesGetLatestValues(handle, entities, fieldIds, 0) for v in values: if v.entityGroupId == dcgm_fields.DCGM_FE_GPU: assert v.value.i64 == 2, "Failed to inject value 2 for entity %u from group %u" % ( v.entityId, v.entityGroupId) else: from dcgm_structs import DCGM_ST_NO_DATA assert ( v.status == DCGM_ST_NO_DATA ), "Injected meaningless value %u for entity %u from group %u" % ( v.value.i64, v.entityId, v.entityGroupId)
def __init__(self, dcgmHandle, name="", fieldIds=[], fieldGroupId=None): self.name = name self.fieldIds = fieldIds self._dcgmHandle = dcgmHandle if fieldGroupId is not None: self.fieldGroupId = fieldGroupId else: self.fieldGroupId = None #Assign here so the destructor doesn't fail if the call below fails self.fieldGroupId = dcgm_agent.dcgmFieldGroupCreate( self._dcgmHandle.handle, fieldIds, name)
def test_dcgm_connection_client_cleanup(handle, gpuIds): ''' Make sure that resources that were allocated by a client are cleaned up ''' fieldGroupFieldIds = [ dcgm_fields.DCGM_FI_DEV_GPU_TEMP, ] #Get a 2nd connection which we'll check for cleanup. Use the raw APIs so we can explicitly cleanup connectParams = dcgm_structs.c_dcgmConnectV2Params_v1() connectParams.version = dcgm_structs.c_dcgmConnectV2Params_version connectParams.persistAfterDisconnect = 0 cleanupHandle = dcgm_agent.dcgmConnect_v2('localhost', connectParams) groupName = 'clientcleanupgroup' groupId = dcgm_agent.dcgmGroupCreate(cleanupHandle, dcgm_structs.DCGM_GROUP_EMPTY, groupName) fieldGroupName = 'clientcleanupfieldgroup' fieldGroupId = dcgm_agent.dcgmFieldGroupCreate(cleanupHandle, fieldGroupFieldIds, fieldGroupName) #Disconnect our second handle. This should cause the cleanup to occur dcgm_agent.dcgmDisconnect(cleanupHandle) time.sleep(1.0) #Allow connection cleanup to occur since it's asynchronous #Try to retrieve the field group info. This should throw an exception with test_utils.assert_raises( dcgm_structs.dcgmExceptionClass(dcgm_structs.DCGM_ST_NO_DATA)): fieldGroupInfo = dcgm_agent.dcgmFieldGroupGetInfo(handle, fieldGroupId) #Try to retrieve the group info. This should throw an exception with test_utils.assert_raises( dcgm_structs.dcgmExceptionClass( dcgm_structs.DCGM_ST_NOT_CONFIGURED)): groupInfo = dcgm_agent.dcgmGroupGetInfo(handle, groupId)