def _DownloadChapToVC(host, username, password): '''Download chap to the local machine''' chap_url = 'http://engweb.eng.vmware.com/~tim/chap' chapPath = '/root/chap' chap_download_cmd = "wget -O %s %s" % (chapPath, chap_url) chap_grant_perm = 'chmod 777 %s' % chapPath try: startTime = time.time() #print("THREAD - main - Downloading chap from Tim server to VC %s"%host) (ret, stdout, stderr) = RunCmdOverSSH(chap_download_cmd, host, username, password) #print("THREAD - main - %s"%str(stdout)) if ret == 0: DownloadTime = time.time() - startTime #print("THREAD - main - Time taken to download chap : %d sec" % DownloadTime) #print("THREAD - main - Granting permission to chap") (ret, stdout, stderr) = RunCmdOverSSH(chap_grant_perm, host, username, password) if ret == 0: #print("THREAD - main - Granting permission to chap success") return chapPath else: #print("THREAD - main - Chap downloading failed from Tim Server. Following alternate path") _AlternateChapInstall(host, username, password) except Exception as e: #print(" Error while retrieving chap from Tim's Server %s : %s" % (chap_url,str(e))) return None #os.chmod(chapPath, 0777) return chapPath
def instalVpxdSymbol(vc, vcUser, vcPwd, version, build): installState = False debugFileName = "VMware-vpxd-debuginfo-" + str(version) + "-" + str( build) + ".x86_64.rpm" debugFilePath = "http://build-squid.eng.vmware.com/build/mts/release/bora-"+str(build)+\ "/publish/"+debugFileName getDebugFileCmd = 'wget -O /var/core/%s %s' % (debugFileName, debugFilePath) #print("VC %s Trying to get debug files from buildweb :%s " % (vc,getDebugFileCmd)) (ret, stdout, stderr) = RunCmdOverSSH(getDebugFileCmd, vc, vcUser, vcPwd, timeout=3600) if ret != 0: raise Exception("Failed to get debug file %s to VC %s due to %s" % (debugFileName, vc, str(stderr))) else: pass #print("VC %s Debug file downloaded"%vc) installRpmCmd = "rpm -i /var/core/" + debugFileName #print("Installing Debug Symbols : " + debugFileName) (ret, stdout, stderr) = RunCmdOverSSH(installRpmCmd, vc, vcUser, vcPwd, timeout=1800) if ret != 0: raise Exception("Failed to install %s in VC %s due to %s" % (debugFileName, vc, str(stderr))) else: installState = True return installState
def RunPGAuditTool(vcHost, vcUser, vcPwd, log): '''Install and Enable Postgres Audit tool''' from customSSH import RunCmdOverSSH mounted = MountVimDevTools(vcHost, vcUser, vcPwd, log) if mounted is False: log.info("Unable to mount %s" % VIMDEVTOOLSPATH) return 1 createDir_cmd = "mkdir -p /storage/pg_audit" (ret,stdout,stderr) = RunCmdOverSSH(createDir_cmd, vcHost, vcUser, vcPwd) log.info("cmd: %s,ret:%d,stdout:%s,stderr%s" % (createDir_cmd, ret, stdout, stderr)) if ret !=0: log.info("Creating direcotry failed, command run: %s" % createDir_cmd) return 1 copyTool_cmd = "cp %s/pg_audit/Prod/* /storage/pg_audit" % VIMDEVTOOLSPATH (ret,stdout,stderr) = RunCmdOverSSH(copyTool_cmd, vcHost, vcUser, vcPwd) log.info("cmd: %s,ret:%d,stdout:%s,stderr%s" % (copyTool_cmd, ret, stdout, stderr)) if ret !=0: log.info("Copying pgaudit tool to VC host failed, command run: %s" % copyTool_cmd) return 1 chmod_cmd = "chmod 755 /storage/pg_audit/*.sh" (ret,stdout,stderr) = RunCmdOverSSH(chmod_cmd, vcHost, vcUser, vcPwd) log.info("cmd: %s,ret:%d,stdout:%s,stderr%s" % (chmod_cmd, ret, stdout, stderr)) if ret !=0: log.info("Could not change file persmission, command run: %s" % chmod_cmd) return 1 totalRetryInstall = 3 numretry = 0 while (numretry < totalRetryInstall): #Retry installing pgaudit tool in case it fails. We saw deadlock issues #in past when installing pgaudit tool. install_cmd = "/storage/pg_audit/main.sh install" (ret,stdout,stderr) = RunCmdOverSSH(install_cmd, vcHost, vcUser, vcPwd) log.info("cmd: %s,ret:%d,stdout:%s,stderr%s" % (install_cmd, ret, stdout, stderr)) if ret !=0: log.info("Failed to install postgres audit tool, command run: %s, "\ "ATTEMPT# %s" % (install_cmd, numretry)) time.sleep(60) numretry += 1 else: break #Wait for a minute before retrying to install pgaudit tool if (numretry >= totalRetryInstall): log.info("Failed to install postgres audit tool, giving up ... ") return 1 enableCDC_cmd = "/storage/pg_audit/main.sh enable" (ret,stdout,stderr) = RunCmdOverSSH(enableCDC_cmd, vcHost, vcUser, vcPwd) log.info("cmd: %s,ret:%d,stdout:%s,stderr%s" % (enableCDC_cmd, ret, stdout, stderr)) if ret !=0: log.info("Failed to enable postgres audit tool: %s" % enableCDC_cmd) return 1 return 0
def RunChap(service, ChapCmd, vc, vcUser, vcPwd): '''Run chap on VC Host and log the output''' ret = None stdout = None stderr = None print("Running chap on service %s" % service) (ret, stdout, stderr) = RunCmdOverSSH(ChapCmd, vc, vcUser, vcPwd, timeout=3600) # Remove below comment #log.info("ah64 command ran: %s" % ah64Cmd) s = "allocations" #log.debug("THREAD - %s - Returned: %s" % (service,str(ret))) #log.debug("THREAD - %s - Output: %s" % (service,str(stdout))) #log.debug("THREAD - %s - Error: %s" % (service,str(stderr))) if stdout and s in stdout: #print('\n' + ('*' * 25) + 'CHAP OUTPUT %s'%service + ('*' * 25) + '\n') #print("THREAD - %s - RETURN VALUE = %d" %(service, ret)) print("THREAD - %s - STDOUT : \n %s" % (service, stdout)) print(("THREAD - %s - STDERR: \n %s \n" % (service, stderr) + ('*' * 60))) else: print("THREAD - %s - STDERR: \n %s \n" % (service, "CHAP didn't yield a success result.") + ('*' * 60)) return (ret, stdout, stderr)
def _AnalysisSteps(host, username, password, serviceName, dumpDir, jarPath, jmapPath, heap_analysis_pool, heap_analysis_result_pool, hprofname=None): try: _CreateAnalysisDirectory(dumpDir, serviceName, host, username, password) if hprofname: # This Snip triggers if HPROF is specified by User. new_location = dumpDir + serviceName try: # Copy the User Specified HPROF to Analysis Location. copyHprof = "cp " + hprofname + " " + new_location (ret, stdout, stderr) = RunCmdOverSSH(copyHprof, host, username, password) only_hprof = hprofname.split("/")[-1] hprofname = new_location + "/" + only_hprof except Exception, e: errorMsg = "Failure working with user specified HPROF %s" % str( e) final_result_dict[serviceName] = errorMsg else:
def _CreateAnalysisDirectory(dumpDir, serviceName, host, username, password): try: remdDirCmd = "rm -rf " + dumpDir + serviceName ###print("THREAD - %s - Removing Analysis Directory." % (serviceName)) (ret, stdout, stderr) = RunCmdOverSSH(remdDirCmd, host, username, password) ###print("THREAD - %s - Creating Analysis Directory return code %s." % (serviceName, ret)) createDirCmd = "mkdir -p " + dumpDir + serviceName ###print("THREAD - %s - Creating Analysis Directory." % (serviceName)) (ret, stdout, stderr) = RunCmdOverSSH(createDirCmd, host, username, password) ###print("THREAD - %s - Creating Analysis Directory return code %s." % (serviceName,ret)) changePermissionDir = "chmod 777 " + dumpDir + serviceName ###print("THREAD - %s - Giving permission to Analysis Directory." % (serviceName)) (ret, stdout, stderr) = RunCmdOverSSH(changePermissionDir, host, username, password) ###print("THREAD - %s - Giving permission to Analysis Directory return code %s." % (serviceName, ret)) except Exception, e: raise Exception("Problem while Creation of Analysis Directory: %s" % str(e))
def RunAh64(ah64Cmd, vc, vcUser, vcPwd, vcVersion, vcBuild, corefile, getSymReqs=False): '''Run ah64 on VC Host and print the output''' runTime = datetime.datetime.now().strftime("%d-%m-%y:%H:%M:%S") #print("%s VC %s vcBuild is %s" % (runTime,vc,vcBuild)) numRetry = 1 TotalTry = 3 ret = None stdout = None stderr = None while (numRetry <= TotalTry): (ret, stdout, stderr) = RunCmdOverSSH(ah64Cmd, vc, vcUser, vcPwd, timeout=3600) # Remove below comment #print("ah64 command ran: %s" % ah64Cmd) s = "Symbolic information is not yet available" #print("VC %s Returned: %s" %(vc,str(ret))) #print("VC %s Output: %s" % (vc,str(stdout))) #print("VC %s Error: %s" % (vc,str(stderr))) if stdout and s in stdout: #print("VC %s Found string in the ah64 output: '%s'. Will attempt to generate symbols if needed." % (vc,s)) if getSymReqs is True: try: GetVpxdSymbols(vc, vcUser, vcPwd, corefile, vcVersion, vcBuild) except Exception as e: #print("VC %s Exception raised while getting symbols for vpxd: %s " % (vc,str(e))) #print("VC %s Traceback: %s" % (vc,traceback.format_exc())) raise if (s not in str(stdout)) or getSymReqs is False: ##print ('\n' + ('*'*25) + ' VC %s AH64 OUTPUT'%vc + ('*'*25) + '\n') #print (" RETURN VALUE = %d" %ret) ##print ("STDOUT : \n %s" %stdout) print(("STDERR: \n %s \n" % stderr) + ('*' * 60)) break numRetry += 1 if numRetry > TotalTry: #print ("Could not run Ah64 successfully on VC %s with necessary symbols after %s attempts, GIVING UP." % (vc,TotalTry)) raise Exception( "Could not run Ah64 successfully with necessary symbols ") return (ret, stdout, stderr)
def _AlternateChapInstall(host, username, password): chapPath = '/root/chap' altChapUrl = "wget -O %s https://10.172.46.209/rip/static/Corefiles/chap --no-check-certificate" % chapPath (ret, stdout, stderr) = RunCmdOverSSH(altChapUrl, host, username, password) if ret == 0: #print("THREAD - MAIN - Chap import successful from local server.") try: changePermissionDir = "chmod 777 %s" % chapPath (ret, stdout, stderr) = RunCmdOverSSH(changePermissionDir, host, username, password) if ret == 0: #print("THREAD - main - Granting permission to chap.") (ret, stdout, stderr) = RunCmdOverSSH(changePermissionDir, host, username, password) if ret == 0: #print("THREAD - main - Granting permission to chap success.") return chapPath except Exception, e: #print("THREAD - MAIN - Permission to chap failed %s."%str(e)) return None
def _PushMemoryJar(dumpDir, host, username, password): remJar = "rm -rf " + dumpDir + "/mat " + dumpDir + "/Mem*" ##print ("Removing existing JAR if any if exits.") (ret, stdout, stderr) = RunCmdOverSSH(remJar, host, username, password) getmemJarPath = "wget https://10.172.46.209/rip/static/Corefiles/MemoryAnalyzer_Linux.zip --no-check-certificate -P" + dumpDir (ret, stdout, stderr) = RunCmdOverSSH(getmemJarPath, host, username, password) if ret == 0: ##print("THREAD - MAIN - Memory Jar import successful.") ##print("THREAD - MAIN - Unzip Memory jars.") try: unizpMemJar = "unzip " + dumpDir + "MemoryAnalyzer_Linux.zip -d " + dumpDir ###print("THREAD - MAIN - Unzip Memory jars command %s"%unizpMemJar) (ret, stdout, stderr) = RunCmdOverSSH(unizpMemJar, host, username, password) if ret == 0: jarPath = dumpDir + "mat/" ###print("THREAD - MAIN - The memory jar path is %s"%jarPath) return jarPath except Exception, e: ###print("THREAD - MAIN - Unzip Memory jars failed.") raise Exception(str(e))
def MountVimDevTools(vcHost, vcUser, vcPwd, log): '''Mount tools folder on cloudvm''' from customSSH import RunCmdOverSSH remoteToolsPath = "pa-group2.eng.vmware.com:/ifs/pa-group2/vimdevtools" #Create path for nfs mount cmd = 'mkdir -p %s' % VIMDEVTOOLSPATH log.info("Cloudvm: %s, command to be run: %s" % (vcHost,cmd)) (ret, stdout, stderr) = RunCmdOverSSH(cmd, vcHost, vcUser, vcPwd) log.info("ret=%s, stdout = %s ,stderr = %s" % (ret, stdout,stderr)) if stderr: log.info("Could not create required directory, Failed to run command %s" % cmd) return False cmd= "mount -t nfs %s %s -o nolock" % (remoteToolsPath, VIMDEVTOOLSPATH) log.info("cloudvm: %s, Command to be run: %s" % (vcHost,cmd)) (ret, stdout, stderr) = RunCmdOverSSH(cmd, vcHost, vcUser, vcPwd) log.info("ret=%s, stdout = %s ,stderr = %s" % (ret, stdout,stderr)) if stderr: log.info("Could not mount successfully, Failed to run command %s" % cmd) return False log.info("Tools folder mounted succesfully to %s" % VIMDEVTOOLSPATH) return True
def GetESXMemoryStats(host, user, pwd, log): from customSSH import RunCmdOverSSH memStatsCmd = 'memstats -r comp-stats -u mb -s total' (ret, stdout, stderr) = RunCmdOverSSH(memStatsCmd, host, user, pwd, timeout=8) if stdout is None: totalMemory = -1 log.info("Failed to query ESX memory size for host %s: ret=%s, stdout=%s, stderr=%s" % (host,ret,stdout, stderr)) else: try: splitStr = stdout.split('\n') totalMemory = int(splitStr[-2].strip()) except Exception as e: totalMemory = -1 log.info("Exception while getting esx memory for host=%s: %s" % (host, str(e))) return totalMemory
def GenerateCoreFile(vcHost, vcUser, vcPwd, log, suffix=''): '''Generate a cloudvm core file using gcore''' from customSSH import RunCmdOverSSH suffix = '-'+str(suffix) if suffix != '' else '' corePath = '/var/core/liveVpxdCore'+suffix cmd = ("gcore -o %s `pidof vpxd`" % corePath) log.info("cloudvm: %s, command to be run: %s" % (vcHost, cmd)) (ret, stdout, stderr) = RunCmdOverSSH(cmd , vcHost, vcUser, vcPwd, 180) log.info("Return value : %d" % ret) if ret != 0 or not stdout: return None match = re.search("Saved corefile (/var/core/liveVpxdCore(-\d*)?\.(\d)*)", stdout) if not match: log.info("Could not find core file name in stdout: %s" % stdout) return None coreFileName = match.group(1) log.info("core file full path is %s" % coreFileName) return coreFileName
def _TakeHeapDump(serviceName, host, username, password, jmapPath, dumpDir): try: solutionUser = solutionUserDict[serviceName] hprofFile = dumpDir + serviceName + "/" + serviceName + ".hprof" dumpCmd = "sudo -u "+ solutionUser + " " + jmapPath +"jmap -dump:format=b,file="+ \ hprofFile +" `pidof "+ serviceName +".launcher`" ###print("THREAD - %s - Issuing dump command. %s"%(serviceName,dumpCmd)) (ret, stdout, stderr) = RunCmdOverSSH(dumpCmd, host, username, password, timeout=72000) ###print("THREAD - %s - Issuing dump command return code. %s" % (serviceName, str(ret))) if ret == 0: ##print("THREAD - %s - Heap dump successful."%(serviceName)) return hprofFile else: raise Exception(str(stderr)) except Exception, e: raise Exception(str(e))
def GetDebugFileType(f, vc, vcUser, vcPwd): '''Get file type for a given file in VC''' checkFileCmd = 'file %s' % f #print("Check for file type in VC (developers build). cmd: %s" % checkFileCmd) (ret, stdout, stderr) = RunCmdOverSSH(checkFileCmd, vc, vcUser, vcPwd, timeout=1800) #print("ret=%d, stdout=%s, stderr=%s" % (ret, stdout, stderr)) fileInfo = {'name': '%s' % f, 'exists': True, 'ftype': ''} if 'No such file or directory' in stdout: fileInfo['exists'] = False fileInfo['ftype'] = None elif 'broken symbolic link' in stdout: fileInfo['ftype'] = 'brokenSymbolicLink' elif stdout.startswith('symbolic link to'): fileInfo['ftype'] = 'symbolicLink' else: fileInfo['ftype'] = 'regular' return fileInfo
def mem_analysis_handler(host, username, password, service, chapPath, core_analysis_pool, core_analysis_result_pool): try: generate_core_cmd = "/usr/lib/vmware-vmon/vmon-cli -d %s" % service #print("THREAD- %s - Will run command %s" % (service, generate_core_cmd)) (ret, stdout, stderr) = RunCmdOverSSH(generate_core_cmd, host, username, password) #print("THREAD- %s - Generate core for service returned: %s" % (service,str(ret))) s = "Completed dump service livecore request" core_file_path = None if stdout and s in stdout and ret == 0: core_file_path = stdout.split()[-1] #print("THREAD- %s - The core file for service is at %s" % (service, core_file_path)) elif ret is None: #print("THREAD- %s - The core file for service is taking time." % service) long_running_dict[ service] = "Timeout while generating core. Proceed manually." else: #print("THREAD- %s - Error: %s" % (service, str(stderr))) if ret == 4: #print("THREAD- %s - It seems the service is not running on the appliance." % (service)) no_service_running_dict[service] = "Service not running on VC" if core_file_path: #print('THREAD %s - Starting Analysis of core file ' % service) core_analysis_result_pool.append( core_analysis_pool.apply_async( core_analysis_handler, (service, chapPath, core_file_path, host, username, password))) else: exception_service_dict[ service] = "Core file could not be generated." except Exception, e: #print("THREAD- %s - Exception while Generating cores in VC for %s service %s"%(host,service,str(e))) exception_service_dict[service] = str(e)
def CheckDebugFilesInDevBuild(vc, vcUser, vcPwd, version, build, corefile): '''Check if dev build style VC has required files to generate debug symbols''' vpxdInfo = GetDebugFileType('/usr/lib/vmware-vpx/vpxd', vc, vcUser, vcPwd) vpxdDebugInfo = GetDebugFileType( '/usr/lib/debug/usr/lib/vmware-vpx/vpxd.debug', vc, vcUser, vcPwd) """ buildVpxdMsg = "Please make sure to build vpxd target. For vcenter: "\ "'scons PRODUCT=vcenter vpxd'. Run load-vc after building"\ " vpxd" """ buildVpxdMsg = "Attempting to Install vpxd Symbols." #If vpxd exists as a file and vpxd.debug does not, suggest that symbols #should be installed if vpxdInfo['exists'] and not vpxdDebugInfo['exists']: """ #print('**File %s exists but file %s does not exist. Please make sure '\ 'symbols are installed. %s **' %(vpxdInfo['name'], vpxdDebugInfo['name'],buildVpxdMsg)) """ installState = instalVpxdSymbol(vc, vcUser, vcPwd, version, build) return installState #If vpxd is a link and vpxd.debug does not exist, that probably means that #load-vc was run but didn't complete properly. if vpxdInfo['ftype'] == 'symbolicLink' and not vpxdDebugInfo['exists']: """ #print('**%s file is a link, %s does not exist.load-vc probably failed'\ 'to set up links properly. %s**' % (vpxdInfo['name'], vpxdDebugInfo['name'],buildVpxdMsg)) """ installState = instalVpxdSymbol(vc, vcUser, vcPwd, version, build) return installState #If either symbolic link is broken, flag that broken link if vpxdInfo['ftype'] == 'brokenSymbolicLink': """ #print('**Symbolic link broken for %s. Please check your tree**' % vpxdInfo['name']) """ installState = instalVpxdSymbol(vc, vcUser, vcPwd, version, build) return installState if vpxdDebugInfo['ftype'] == 'brokenSymbolicLink': """ #print('**Symbolic link broken for %s. Please check your tree**' % vpxdDebugInfo['name']) """ installState = instalVpxdSymbol(vc, vcUser, vcPwd, version, build) return installState #If one is a file and one is a link,the symbols are probably not consistent #with the binaries if vpxdInfo['ftype'] != vpxdDebugInfo['ftype']: """ #print('**The file type for files are not same.File type for file %s'\ ' is %s. File type for file %s is %s.This suggests that the '\ 'symbols are probably not consistent with the binaries**' % (vpxdInfo['name'], vpxdInfo['ftype'], vpxdDebugInfo['name'], vpxdDebugInfo['ftype'])) """ installState = instalVpxdSymbol(vc, vcUser, vcPwd, version, build) return installState #If both the files are either symbolic link or both are regular files,can #proceed with the checks if ((vpxdInfo['ftype'] != '') and (vpxdInfo['ftype'] == vpxdDebugInfo['ftype'])): #print("Both files have same file type: %s.Will try to generate debugsymbols on this VC" % vpxdInfo['ftype']) symDefGenCmd = "echo source %s.symreqs | gdb -c %s /usr/lib/vmware-vpx/vpxd" % ( corefile, corefile) (ret, stdout, stderr) = RunCmdOverSSH(symDefGenCmd, vc, vcUser, vcPwd, timeout=600) print "Coming Here" if ret == 0: return True else: return False
def GetVpxdSymbols(vc, vcUser, vcPwd, corefile, version, vcBuild): # check if symdefs file for the pid already exists, use that file. #print("Checking if there is an existing usable symdef file....") pidCmd = 'pidof vpxd' #print("Get pid of vpxd. cmd: %s" % pidCmd) (ret, stdout, stderr) = RunCmdOverSSH(pidCmd, vc, vcUser, vcPwd, timeout=3600) #print("ret=%d, stdout=%s, stderr=%s" % (ret, stdout, stderr)) vpxdPid = stdout #vpxdPid = "9020" #Remove this .. Debug Only dirListCmd = 'ls /var/core' #print("Listing files in remote dir. cmd: %s" % dirListCmd) (ret, stdout, stderr) = RunCmdOverSSH(dirListCmd, vc, vcUser, vcPwd, timeout=3600) #(ret, stdout, stderr) = RunCmdOverSSH(vc,vcLocalUser,vcLocalPwd,dirListCmd) #print("ret=%d, stdout=%s, stderr=%s" % (ret, stdout, stderr)) files = stdout.split('\n') symDefFound = False symDefFile = None for f in files: if re.match('livecore(.)*\.%s\.symdefs' % vpxdPid, f): symDefFound = True #print("Found an existing symdefs file:%s for pid=%s. Will try touse it." % (f,vpxdPid)) symDefFile = f break if symDefFile: createSymlinkCmd = 'ln -s /var/core/%s %s.symdefs' % (symDefFile, corefile) #print("Creating symlink to existing symdef file. cmd: %s"% createSymlinkCmd) (ret, stdout, stderr) = RunCmdOverSSH(createSymlinkCmd, vc, vcUser, vcPwd, timeout=3600) #print("ret=%d, stdout=%s, stderr=%s" % (ret, stdout, stderr)) return True if vcBuild and version: #Check if the correct debug files exists #print("This is developer's build...") #print("Initiating Symdef file generation..") reqdFileExists = CheckDebugFilesInDevBuild(vc, vcUser, vcPwd, version, vcBuild, corefile) if not reqdFileExists: raise Exception("Files necessary on the dev build VC does not exist,"\ "Please check logs for details") else: raise Exception( "VC Build is not specified for VC %s. Symdefs file could not be generated." "Memory growth Analysis is quitting now.") return True
def CheckMemGrowth(vc, vcUser, vcPwd, vcLocalUser, vcLocalPwd, vcVersion, vcBuild): '''Check for memory growth in VC''' MemGrowthMasterDict = {} try: try: remoteAh64Path = _DownloadAH64ToVC(vc, vcUser, vcPwd) except Exception as e: #print("Exception raised while getting ah64 : %s" % str(e)) #print("Traceback: %s" % traceback.format_exc()) raise # Inventory Object map with (obj type , obj name) records invtObjMap = {'vim.Datastore': 'Datastore', 'vim.Folder': 'Folder', \ 'vim.VirtualMachine': 'Vm', 'vim.HostSystem': 'Host', 'vim.Network': 'Network'} try: #print("Getting connection to Vcenter") si = GetSI(vc, vcLocalUser, vcLocalPwd) atexit.register(Disconnect, si) #print("Successfully got connection to VC %s"% vc) except Exception, e: return "Error while connecting: " + str(e) #print("Getting Inventory Objects count in VC using VMODL Query %s" % vc) invtCounts, moIdList = GetObjectsCountInVCInventory(si, invtObjMap) #print("Inventory Object count in %s VC is %s" % (vc, str(invtCounts))) totalRetry = 2 numRetry = 1 moCounts = None while (numRetry <= totalRetry): try: generate_core_cmd = "/usr/lib/vmware-vmon/vmon-cli -d vpxd" (ret, stdout, stderr) = RunCmdOverSSH(generate_core_cmd, vc, vcUser, vcPwd, timeout=1800) s = "Completed dump service livecore request" corefile = None if stdout and s in stdout and ret == 0: corefile = stdout.split()[-1] #print("THREAD- %s - The core file for service is at %s" % (vc, corefile)) except Exception as e: return "Exception raised while generating VC %s core: %s" % ( vc, str(e)) #print("Getting Managed Object count in VC from core file %s" % vc) moCounts = GetVCMoCounts(vc, vcUser, vcPwd, remoteAh64Path, corefile, vcVersion, vcBuild, invtObjMap) #print("Managed Object count in VC %s is %s" % (vc, moCounts)) if not moCounts: errMsg = ('\nFailed to run ah64 on %s, Managed Objects were returned '\ 'as None' % vc) #print("%s" % errMsg) return "%s" % errMsg countsMismatch, diffCounts = CompareCounts(moCounts, invtCounts) MemGrowthDict = {} if countsMismatch: #print("Managed Object counts and Inventory counts did not match, ATTEMPT# %s" %numRetry) #print("Extra objects found at the end of ATTEMPT# %s: %s" % (numRetry, diffCounts)) MemGrowthDict["MOR in VC"] = sorted(invtCounts.items()) MemGrowthDict["MOR in Core"] = vc, sorted(moCounts.items()) MemGrowthMasterDict[numRetry] = MemGrowthDict time.sleep(5) numRetry += 1 else: MemGrowthDict["MOR in VC"] = sorted(invtCounts.items()) MemGrowthDict["MOR in Core"] = vc, sorted(moCounts.items()) MemGrowthMasterDict[numRetry] = MemGrowthDict break if numRetry > totalRetry: memoryGrowthMsg = "MEMORY GROWTH FOUND" MemGrowthMasterDict["Analysis"] = memoryGrowthMsg #print("%s" % memoryGrowthMsg) else: noMemoryGrowthMsg = ( 'VC: %s - No Memory Growth found after ATTEMPT# %s' % (vc, numRetry)) MemGrowthMasterDict["Analysis"] = noMemoryGrowthMsg
def _AnalyzeHeapDump(serviceName, jarPath, dumpDir, hprofFile, host, username, password): # Logic tp parse the hprof output. resultDict = {} pattern1 = """ .*?(One | [\d,]+)\s+instance[s]{,1}.*?of #Find the Number of Instance .*?\"(.*?)\" #Find the instance name .*?\"(.*?)\" #Who invoked the class .*?([,\d]+) #Bytes occupied """ pattern2 = """ .*?The\s+class\s+\"(.*?)\" #The instance Name .*?\"(.*?)\" #Who invoked the class .*?(\d+.*)?\( #Bytes occupied .*?(\w+)\s+(?=instance).* #Number of Instance """ scriptPath = jarPath + "ParseHeapDump.sh" #Adding -vmargs -Xmx4g -XX:-UseGCOverheadLimit it seems vsphere-ui analysis is hitting the JVM overlimit generateAnalysis = scriptPath + " " + hprofFile if serviceName == "vsphere-ui": generateAnalysis = generateAnalysis + " -vmargs -Xmx4g -XX:-UseGCOverheadLimit" ###print("THREAD - %s - Generating Required Files for Heap Analysis." % (serviceName)) try: ## SSH Code for Files (ret, stdout, stderr) = RunCmdOverSSH(generateAnalysis, host, username, password, timeout=72000) if ret == 0: #Only in case of Return code Go Further to Generate Leak uspect try: leakSuspectCmd = scriptPath + " " + hprofFile + " " + "org.eclipse.mat.api:suspects" #print("THREAD - %s - Finding Leak Suspects in the Heap." % (serviceName)) ## SSH Code for Leak Suspects (ret, stdout, stderr) = RunCmdOverSSH(leakSuspectCmd, host, username, password, timeout=72000) if ret == 0: try: unzipSupects = "unzip " + dumpDir + serviceName + "/" + serviceName + \ "_Leak_Suspects.zip -d " + dumpDir + serviceName + "/" ## SSH Code for Unzipping Leak Suspects #print("THREAD - %s - Unzip command. %s" % (serviceName,unzipSupects)) (ret, stdout, stderr) = RunCmdOverSSH(unzipSupects, host, username, password, timeout=72000) if ret == 0: stdout = None ret = None stderr = None try: readIndexFile = "cat " + dumpDir + serviceName + "/index.html" #print("THREAD - %s - Reading Leak Suspects in the Heap Analyzer output." % (serviceName)) # SSH Code to read the Leak Suspects (ret, stdout, stderr) = RunCmdOverSSH(readIndexFile, host, username, password, timeout=72000) if ret == 0: ###print("THREAD - %s - Parsing leak analyzer output." % (serviceName)) # resultDict[serviceName] = serviceName tree = lxml.html.fromstring(stdout) problem_statement_box = tree.find_class( 'important') suspectCount = 0 if not problem_statement_box: leakSuspect = "Leak Suspect " + str( suspectCount) resultDict[ leakSuspect] = "No Leaks observed" else: for a_problem in problem_statement_box: data = a_problem.text_content() dataToWork = data.encode( 'ascii', 'ignore').decode('ascii') match1 = re.match( pattern1, dataToWork, re.X) match2 = re.match( pattern2, dataToWork, re.X) if match1: suspectCount = suspectCount + 1 leakSuspect = "Leak Suspect " + str( suspectCount) resultDict[leakSuspect] = { 'Number of Instance': match1.group(1), 'Instance Name': match1.group(2), 'Loaded By': match1.group(3), 'Bytes': match1.group(4) } elif match2: suspectCount = suspectCount + 1 leakSuspect = "Leak Suspect " + str( suspectCount) resultDict[leakSuspect] = { 'Number of Instance': match2.group(4), 'Instance Name': match2.group(1), 'Loaded By': match2.group(2), 'Bytes': match2.group(3) } else: leakSuspect = "Leak Suspect " + str( suspectCount) resultDict[ leakSuspect] = "No Leaks observed" else: #print("Failure while reading leak analyzer output. %s" % str(stderr) + serviceName) resultDict["Error"] = resultDict.get( "Error", " " ) + "Failure while reading leak analyzer output. %s" % str( stderr) except Exception, e: #Exception while parsing Leak Suspects #print("Failure while reading leak analyzer output. %s" % str(stderr) + serviceName) resultDict["Error"] = resultDict.get( serviceName, " " ) + " Failure while reading leak analyzer output. %s" % str( e) else: #Do not Proceed if there failure while unzipping Leak Suspects #print("Failure while Unzipping Leak Suspect. %s" % str(stderr) + serviceName) resultDict["Error"] = resultDict.get( serviceName, " " ) + "Failure while Unzipping Leak Suspect . %s" % str( stderr) except Exception, e: #Exception while Unzipping the Leak Suspects resultDict["Error"] = resultDict.get( "Error", " " ) + "Failure while Unzipping Leak Suspect . %s" % str( e) else: #Do not Proceed if there failure while getting Leak Suspects
heap_analysis_pool.close() heap_analysis_pool.join() except Exception, e: finalReturnResult["Failure"] = str(e) return finalReturnResult heapModuleResult = dict(final_result_dict) finalReturnResult["Result"] = heapModuleResult # Get Uptime Days and Hours try: uptimecmd = "uptime -p" (ret, stdout, stderr) = RunCmdOverSSH(uptimecmd, host, username, password, timeout=1800) if ret != 0: finalReturnResult["Uptime"] = str(stderr) else: finalReturnResult["Uptime"] = str(stdout) except Exception, e: finalReturnResult[ "Uptime"] = "Could not obtain duration of uptime %s." % str(e) #Get Build Details try: uptimecmd = "grep 'BUILDNUMBER' /etc/vmware/.buildInfo | cut -d\":\" -f2" (ret, stdout, stderr) = RunCmdOverSSH(uptimecmd,