Example #1
0
def test_dcgm_connection_versions(handle):
    '''
    Test that different versions of dcgmConnect_v2 work
    '''
    localhostStr = "127.0.0.1"

    v1Struct = dcgm_structs.c_dcgmConnectV2Params_v1()
    v1Struct.version = dcgm_structs.c_dcgmConnectV2Params_version1
    #These APIs throw exceptions on error
    v1Handle = dcgm_agent.dcgmConnect_v2(
        localhostStr, v1Struct, dcgm_structs.c_dcgmConnectV2Params_version1)

    v2Struct = dcgm_structs.c_dcgmConnectV2Params_v2()
    v2Struct.version = dcgm_structs.c_dcgmConnectV2Params_version2
    #These APIs throw exceptions on error
    v2Handle = dcgm_agent.dcgmConnect_v2(
        localhostStr, v2Struct, dcgm_structs.c_dcgmConnectV2Params_version2)

    #Do a basic request with each handle
    gpuIds = dcgm_agent.dcgmGetAllSupportedDevices(v1Handle)
    gpuIds2 = dcgm_agent.dcgmGetAllSupportedDevices(v2Handle)

    #Clean up the handles
    dcgm_agent.dcgmDisconnect(v1Handle)
    dcgm_agent.dcgmDisconnect(v2Handle)
Example #2
0
 def __StopDcgm__(self):
     '''
     Shuts down either the hostengine or the embedded server
     '''
     if self.isEmbedded:
         dcgm_agent.dcgmStopEmbedded(self.handle)
     else:
         dcgm_agent.dcgmDisconnect(self.handle)
Example #3
0
def test_dcgm_connection_client_cleanup(handle, gpuIds):
    '''
    Make sure that resources that were allocated by a client are cleaned up
    '''
    fieldGroupFieldIds = [
        dcgm_fields.DCGM_FI_DEV_GPU_TEMP,
    ]

    #Get a 2nd connection which we'll check for cleanup. Use the raw APIs so we can explicitly cleanup
    connectParams = dcgm_structs.c_dcgmConnectV2Params_v1()
    connectParams.version = dcgm_structs.c_dcgmConnectV2Params_version
    connectParams.persistAfterDisconnect = 0
    cleanupHandle = dcgm_agent.dcgmConnect_v2('localhost', connectParams)

    groupName = 'clientcleanupgroup'
    groupId = dcgm_agent.dcgmGroupCreate(cleanupHandle,
                                         dcgm_structs.DCGM_GROUP_EMPTY,
                                         groupName)

    fieldGroupName = 'clientcleanupfieldgroup'
    fieldGroupId = dcgm_agent.dcgmFieldGroupCreate(cleanupHandle,
                                                   fieldGroupFieldIds,
                                                   fieldGroupName)

    #Disconnect our second handle. This should cause the cleanup to occur
    dcgm_agent.dcgmDisconnect(cleanupHandle)

    time.sleep(1.0)  #Allow connection cleanup to occur since it's asynchronous

    #Try to retrieve the field group info. This should throw an exception
    with test_utils.assert_raises(
            dcgm_structs.dcgmExceptionClass(dcgm_structs.DCGM_ST_NO_DATA)):
        fieldGroupInfo = dcgm_agent.dcgmFieldGroupGetInfo(handle, fieldGroupId)

    #Try to retrieve the group info. This should throw an exception
    with test_utils.assert_raises(
            dcgm_structs.dcgmExceptionClass(
                dcgm_structs.DCGM_ST_NOT_CONFIGURED)):
        groupInfo = dcgm_agent.dcgmGroupGetInfo(handle, groupId)
Example #4
0
def _test_connection_helper(domainSocketName):
    #Make sure the library is initialized
    dcgm_agent.dcgmInit()
    #First, try the raw method of using the dcgm_agent API directly
    v2Struct = dcgm_structs.c_dcgmConnectV2Params_v2()
    v2Struct.version = dcgm_structs.c_dcgmConnectV2Params_version2
    v2Struct.addressIsUnixSocket = 1
    v2Handle = dcgm_agent.dcgmConnect_v2(
        domainSocketName, v2Struct,
        dcgm_structs.c_dcgmConnectV2Params_version2)
    #Use the handle, which will throw an exception on error
    gpuIds2 = dcgm_agent.dcgmGetAllSupportedDevices(v2Handle)
    dcgm_agent.dcgmDisconnect(v2Handle)

    #Now use the DcgmHandle method
    dcgmHandle = pydcgm.DcgmHandle(unixSocketPath=domainSocketName)
    dcgmSystem = dcgmHandle.GetSystem()

    gpuIds = dcgmSystem.discovery.GetAllGpuIds()

    #Try to disconnect cleanly from our domain socket
    del (dcgmHandle)
    dcgmHandle = None