Beispiel #1
0
def test_dcgm_connection_versions(handle):
    '''
    Test that different versions of dcgmConnect_v2 work
    '''
    localhostStr = "127.0.0.1"

    v1Struct = dcgm_structs.c_dcgmConnectV2Params_v1()
    v1Struct.version = dcgm_structs.c_dcgmConnectV2Params_version1
    #These APIs throw exceptions on error
    v1Handle = dcgm_agent.dcgmConnect_v2(
        localhostStr, v1Struct, dcgm_structs.c_dcgmConnectV2Params_version1)

    v2Struct = dcgm_structs.c_dcgmConnectV2Params_v2()
    v2Struct.version = dcgm_structs.c_dcgmConnectV2Params_version2
    #These APIs throw exceptions on error
    v2Handle = dcgm_agent.dcgmConnect_v2(
        localhostStr, v2Struct, dcgm_structs.c_dcgmConnectV2Params_version2)

    #Do a basic request with each handle
    gpuIds = dcgm_agent.dcgmGetAllSupportedDevices(v1Handle)
    gpuIds2 = dcgm_agent.dcgmGetAllSupportedDevices(v2Handle)

    #Clean up the handles
    dcgm_agent.dcgmDisconnect(v1Handle)
    dcgm_agent.dcgmDisconnect(v2Handle)
Beispiel #2
0
    def __init__(self, handle=None, ipAddress=None,
                 opMode=dcgm_structs.DCGM_OPERATION_MODE_AUTO, persistAfterDisconnect=False,
                 unixSocketPath=None, timeoutMs=0):
        '''
        Constructor

        handle is an existing handle from dcgmInit(). Pass None if you want this object to handle DCGM initialization for you
        ipAddress is the host to connect to. None = start embedded host engine
        opMode is a dcgm_structs.DCGM_OPERATION_MODE_* constant for how the host engine should run (embedded mode only)
        persistAfterDisconnect (TCP-IP connections only) is whether the host engine should persist all of our watches
                               after we disconnect. 1=persist our watches. 0=clean up after our connection
        unixSocketPath is a path to a path on the local filesystem that is a unix socket that the host engine is listening on.
                       This option is mutually exclusive with ipAddress
        timeoutMs is how long to wait for TCP/IP or Unix domain connections to establish in ms. 0=Default timeout (5000ms)
        '''
        self._handleCreated = False
        self._persistAfterDisconnect = persistAfterDisconnect
        
        if handle is not None:
            self.handle = handle
            return

        self._ipAddress = ipAddress
        
        #Can't provide both unix socket and ip address
        if ipAddress is not None and unixSocketPath is not None:
            raise dcgm_structs.dcgmExceptionClass(dcgm_structs.DCGM_ST_BADPARAM)

        #Initialize the DCGM client library
        dcgm_structs._dcgmInit()
        dcgm_agent.dcgmInit() #Not harmful to call this multiple times in a process

        #If neither ipAddress nor unixSocketPath are present, start an embedded host engine
        if ipAddress is None and unixSocketPath is None:
            self.handle = dcgm_agent.dcgmStartEmbedded(opMode)
            self.isEmbedded = True
            self._handleCreated = True
            return        
        
        #Set up connection parameters. We're connecting to something
        connectParams = dcgm_structs.c_dcgmConnectV2Params_v2()
        connectParams.version = dcgm_structs.c_dcgmConnectV2Params_version
        connectParams.timeoutMs = timeoutMs
        if self._persistAfterDisconnect:
            connectParams.persistAfterDisconnect = 1
        else:
            connectParams.persistAfterDisconnect = 0
        
        if ipAddress is not None:
            connectToAddress = ipAddress
            connectParams.addressIsUnixSocket = 0
        else:
            connectToAddress = unixSocketPath
            connectParams.addressIsUnixSocket = 1
        
        self.handle = dcgm_agent.dcgmConnect_v2(connectToAddress, connectParams)
        self.isEmbedded = False
        self._handleCreated = True
Beispiel #3
0
def test_dcgm_connection_client_cleanup(handle, gpuIds):
    '''
    Make sure that resources that were allocated by a client are cleaned up
    '''
    fieldGroupFieldIds = [
        dcgm_fields.DCGM_FI_DEV_GPU_TEMP,
    ]

    #Get a 2nd connection which we'll check for cleanup. Use the raw APIs so we can explicitly cleanup
    connectParams = dcgm_structs.c_dcgmConnectV2Params_v1()
    connectParams.version = dcgm_structs.c_dcgmConnectV2Params_version
    connectParams.persistAfterDisconnect = 0
    cleanupHandle = dcgm_agent.dcgmConnect_v2('localhost', connectParams)

    groupName = 'clientcleanupgroup'
    groupId = dcgm_agent.dcgmGroupCreate(cleanupHandle,
                                         dcgm_structs.DCGM_GROUP_EMPTY,
                                         groupName)

    fieldGroupName = 'clientcleanupfieldgroup'
    fieldGroupId = dcgm_agent.dcgmFieldGroupCreate(cleanupHandle,
                                                   fieldGroupFieldIds,
                                                   fieldGroupName)

    #Disconnect our second handle. This should cause the cleanup to occur
    dcgm_agent.dcgmDisconnect(cleanupHandle)

    time.sleep(1.0)  #Allow connection cleanup to occur since it's asynchronous

    #Try to retrieve the field group info. This should throw an exception
    with test_utils.assert_raises(
            dcgm_structs.dcgmExceptionClass(dcgm_structs.DCGM_ST_NO_DATA)):
        fieldGroupInfo = dcgm_agent.dcgmFieldGroupGetInfo(handle, fieldGroupId)

    #Try to retrieve the group info. This should throw an exception
    with test_utils.assert_raises(
            dcgm_structs.dcgmExceptionClass(
                dcgm_structs.DCGM_ST_NOT_CONFIGURED)):
        groupInfo = dcgm_agent.dcgmGroupGetInfo(handle, groupId)
Beispiel #4
0
def _test_connection_helper(domainSocketName):
    #Make sure the library is initialized
    dcgm_agent.dcgmInit()
    #First, try the raw method of using the dcgm_agent API directly
    v2Struct = dcgm_structs.c_dcgmConnectV2Params_v2()
    v2Struct.version = dcgm_structs.c_dcgmConnectV2Params_version2
    v2Struct.addressIsUnixSocket = 1
    v2Handle = dcgm_agent.dcgmConnect_v2(
        domainSocketName, v2Struct,
        dcgm_structs.c_dcgmConnectV2Params_version2)
    #Use the handle, which will throw an exception on error
    gpuIds2 = dcgm_agent.dcgmGetAllSupportedDevices(v2Handle)
    dcgm_agent.dcgmDisconnect(v2Handle)

    #Now use the DcgmHandle method
    dcgmHandle = pydcgm.DcgmHandle(unixSocketPath=domainSocketName)
    dcgmSystem = dcgmHandle.GetSystem()

    gpuIds = dcgmSystem.discovery.GetAllGpuIds()

    #Try to disconnect cleanly from our domain socket
    del (dcgmHandle)
    dcgmHandle = None