Example #1
0
def test_dcgm_connection_versions(handle):
    '''
    Test that different versions of dcgmConnect_v2 work
    '''
    localhostStr = "127.0.0.1"

    v1Struct = dcgm_structs.c_dcgmConnectV2Params_v1()
    v1Struct.version = dcgm_structs.c_dcgmConnectV2Params_version1
    #These APIs throw exceptions on error
    v1Handle = dcgm_agent.dcgmConnect_v2(
        localhostStr, v1Struct, dcgm_structs.c_dcgmConnectV2Params_version1)

    v2Struct = dcgm_structs.c_dcgmConnectV2Params_v2()
    v2Struct.version = dcgm_structs.c_dcgmConnectV2Params_version2
    #These APIs throw exceptions on error
    v2Handle = dcgm_agent.dcgmConnect_v2(
        localhostStr, v2Struct, dcgm_structs.c_dcgmConnectV2Params_version2)

    #Do a basic request with each handle
    gpuIds = dcgm_agent.dcgmGetAllSupportedDevices(v1Handle)
    gpuIds2 = dcgm_agent.dcgmGetAllSupportedDevices(v2Handle)

    #Clean up the handles
    dcgm_agent.dcgmDisconnect(v1Handle)
    dcgm_agent.dcgmDisconnect(v2Handle)
Example #2
0
def vtDcgmConnect_v2(ip_address, connectParams, versionTest):
    connectParams = dcgm_structs.c_dcgmConnectV2Params_v1()
    connectParams.version = dcgm_structs.make_dcgm_version(connectParams, 1)
    logger.debug("Structure version: %d" % connectParams.version)
    connectParams.version = versionTest
    dcgm_handle = c_void_p()
    fn = dcgmFP("dcgmConnect_v2")
    ret = fn(ip_address, byref(connectParams), byref(dcgm_handle))
    dcgm_structs._dcgmCheckReturn(ret)
    return dcgm_handle
Example #3
0
def test_dcgm_connect_validate(handle, gpuIds):
    """
    Validates structure version
    """
    fieldGroupFieldIds = [
        dcgm_fields.DCGM_FI_DEV_GPU_TEMP,
    ]
    connectParams = dcgm_structs.c_dcgmConnectV2Params_v1()
    connectParams.persistAfterDisconnect = 0

    with test_utils.assert_raises(
            dcgmExceptionClass(dcgm_structs.DCGM_ST_VER_MISMATCH)):
        versionTest = 0  #invalid version
        ret = vtDcgmConnect_v2('localhost', connectParams, versionTest)

    with test_utils.assert_raises(
            dcgmExceptionClass(dcgm_structs.DCGM_ST_VER_MISMATCH)):
        versionTest = 50  #random number version
        ret = vtDcgmConnect_v2('localhost', connectParams, versionTest)
Example #4
0
def test_dcgm_connection_client_cleanup(handle, gpuIds):
    '''
    Make sure that resources that were allocated by a client are cleaned up
    '''
    fieldGroupFieldIds = [
        dcgm_fields.DCGM_FI_DEV_GPU_TEMP,
    ]

    #Get a 2nd connection which we'll check for cleanup. Use the raw APIs so we can explicitly cleanup
    connectParams = dcgm_structs.c_dcgmConnectV2Params_v1()
    connectParams.version = dcgm_structs.c_dcgmConnectV2Params_version
    connectParams.persistAfterDisconnect = 0
    cleanupHandle = dcgm_agent.dcgmConnect_v2('localhost', connectParams)

    groupName = 'clientcleanupgroup'
    groupId = dcgm_agent.dcgmGroupCreate(cleanupHandle,
                                         dcgm_structs.DCGM_GROUP_EMPTY,
                                         groupName)

    fieldGroupName = 'clientcleanupfieldgroup'
    fieldGroupId = dcgm_agent.dcgmFieldGroupCreate(cleanupHandle,
                                                   fieldGroupFieldIds,
                                                   fieldGroupName)

    #Disconnect our second handle. This should cause the cleanup to occur
    dcgm_agent.dcgmDisconnect(cleanupHandle)

    time.sleep(1.0)  #Allow connection cleanup to occur since it's asynchronous

    #Try to retrieve the field group info. This should throw an exception
    with test_utils.assert_raises(
            dcgm_structs.dcgmExceptionClass(dcgm_structs.DCGM_ST_NO_DATA)):
        fieldGroupInfo = dcgm_agent.dcgmFieldGroupGetInfo(handle, fieldGroupId)

    #Try to retrieve the group info. This should throw an exception
    with test_utils.assert_raises(
            dcgm_structs.dcgmExceptionClass(
                dcgm_structs.DCGM_ST_NOT_CONFIGURED)):
        groupInfo = dcgm_agent.dcgmGroupGetInfo(handle, groupId)