def main(): #Make sure logging stuff is bootstrapped try: option_parser.parse_options() option_parser.options.no_logging = True #Don't log anything heHandle = None heAppRunner = None dcgm_structs._LoadDcgmLibrary() if g_embeddedMode: host = 0 else: #Start host engine heAppRunner = apps.NvHostEngineApp() heAppRunner.start(timeout=1000000000) time.sleep(2.0) host = "127.0.0.1" heHandle = dcgm_agent.dcgmInit() pssObj = ProcessStatsStress(g_embeddedMode, heHandle) pssObj.Run() del (pssObj) #Force destructor heAppRunner.wait() except Exception as e: raise finally: apps.AppRunner.clean_all() if heHandle is not None: dcgm_agent.dcgmShutdown()
assert config_values[x].mComputeMode == expected_compute_mode, "The compute mode value for gpuID %d is incorrect."\ " Returned: %d Expected: %d" \ % (x, config_values[x].mComputeMode, expected_compute_mode) assert config_values[x].mEccMode == expected_ecc, "The ecc mode value for gpuID %d is incorrect."\ " Returned: %d Expected: %d" \ % (x, config_values[x].mEccMode, expected_ecc) pass ret = dcgm_agent.dcgmStatusDestroy(status_handle) assert (ret == dcgm_structs.DCGM_ST_OK ), "Failed to remove status handler, error: %s" % ret dcgm_structs._LoadDcgmLibrary() handle = dcgm_agent.dcgmInit() devices = dcgm_agent.dcgmGetAllDevices(handle) validDevices = list() for x in devices: fvSupported = dcgm_agent_internal.dcgmGetLatestValuesForFields( handle, x, [ dcgm_fields.DCGM_FI_DEV_RETIRED_DBE, ]) if (fvSupported[0].value.i64 != dcgmvalue.DCGM_INT64_NOT_SUPPORTED): validDevices.append(x) if (len(validDevices) == 0): print "Can only run if at least one GPU with ECC is present" sys.exit(1)