def checkStatShareResTotal(res_name,num,timeout=30): res_name=str(res_name) timeout=int(timeout) num=int(num) try: while timeout>0: stdout0, stderr0, exitcode0=execCommand("ps -ef|grep $JHSCHEDULER_TOP") print stdout0, stderr0, exitcode0 print timeout stdout2, stderr2, exitcode2=execCommand("jhosts -s",timeout=30) print stdout2, stderr2, exitcode2 stdout, stderr, exitcode=execCommand("jhosts -s %s|sed -n '$p'|awk '{print $2}'"%res_name,timeout=30) print "stdout=%s,stderr=%s,exitcode=%s"%(stdout, stderr, exitcode) if stdout and stdout.strip() and stdout.strip().isdigit() and int(stdout.strip())==num: return stdout else: timeout=timeout-1 try: time.sleep(2) except KeyboardInterrupt: print '' raise RuntimeError except KeyboardInterrupt: print '' raise RuntimeError if timeout==0: raise RuntimeError
def checkClusterStatus(hoststatus='ok', checkTime=60): ''' check cluster all of hosts status is ok in the special time. ''' spendTime = int(checkTime) try: while True: print spendTime stdout0, stderr0, exitcode0 = execCommand("jjobs -u all", timeout=60) print "this is jjobs -u all output:begin" print stdout0, stderr0, exitcode0 print "jjobs -u all:end" stdout, stderr, exitcode = execCommand("jhosts stat -l", timeout=60) print "jhosts stat -l:\n%s" % stdout stat_list_a = re.findall(r'\s+\bStatus\s*=\s*(.*)\s*\n', stdout) stdout, stderr, exitcode = execCommand("jhosts -l", timeout=60) print "jhosts -l:\n%s" % stdout stat_list_b = re.findall(r'\s+\bStatus\s*=\s*(.*)\s*\n', stdout) print stat_list_a print stat_list_b host_num_a = 0 host_num_b = 0 host_num_a = len(stat_list_a) host_num_b = len(stat_list_b) if host_num_a != host_num_a or host_num_a == 0 or host_num_b == 0: time.sleep(1) spendTime -= int(1) else: i = 0 while i < host_num_a: if stat_list_a[i].upper() == str(hoststatus).upper( ) and stat_list_b[i].upper() == str(hoststatus).upper(): i = i + 1 if i == host_num_a: try: time.sleep(2) except KeyboardInterrupt: print '' raise RuntimeError print "the cluster status is ok" return "cluster_ok" else: try: time.sleep(1) except KeyboardInterrupt: print '' raise RuntimeError spendTime -= int(1) break if spendTime == int(0): print "check cluster status fail" raise RuntimeError except KeyboardInterrupt: print '' raise RuntimeError
def postJadmin(timeout=60): timeout = int(timeout) killAllJob() stdout, stderr, exitcode = execCommand("su jhadmin -c 'jadmin hopen all'", timeout) stdout1, stderr1, exitcode1 = execCommand( "su jhadmin -c 'jadmin qopen all'", timeout) stdout2, stderr2, exitcode2 = execCommand( "su jhadmin -c 'jadmin qact all'", timeout) if exitcode or exitcode1 or exitcode2: print "the error info of postJadmin is %s" % stderr
def getStatShareResTotal(res): res=str(res) #this code for test:total begin stdoutx, stderrx, exitcodex=execCommand("jhosts -s %s'"%res,timeout=20) print stdoutx, stderrx, exitcodex #this code for test:total end stdout, stderr, exitcode=execCommand("jhosts -s %s|sed -n '$p'|awk '{print $2}'"%res,timeout=60) if exitcode: print 'exec "jhosts -s %s" failed'%res raise RuntimeError else: return stdout.strip()
def compareMem(master, slave, timeout=60): ''' the first host must be the one that run the autotest. ''' print "begin compareMem" i = int(0) timeout = int(timeout) while True: cmd1 = "jhosts -l %s" % master stdout, stderr, exitcode = execCommand(cmd1, timeout) cmd2 = "jhosts -l %s" % slave stdout2, stderr2, exitcode2 = execCommand(cmd2, timeout) print stderr, stderr2 if stderr or stderr2: raise RuntimeError else: ut_tmp = re.findall( r'\bResource.mem\s*=\s*Total:\s*(.+)\s*,\s*Reserved:', stdout) ut_tmp2 = re.findall( r'\bResource.mem\s*=\s*Total:\s*(.+)\s*,\s*Reserved:', stdout2) checkRange = int(5) ut1 = bit_change(str(ut_tmp[0])) ut2 = bit_change(str(ut_tmp2[0])) if (ut1 > int(-1)) and (ut2 > int(-1)): res_compare = (ut2 - ut1) if res_compare >= int(0): if res_compare > checkRange: return slave, master else: if i == int(0): resumeMem("50") try: time.sleep(2) except KeyboardInterrupt: raise RuntimeError else: if res_compare < int(-checkRange): return master, slave else: if i == int(0): resumeMem("50") try: time.sleep(2) except KeyboardInterrupt: raise RuntimeError else: raise checkError("the format of param is error") i = i + 1
def usedUt(host, file, ut_stat, timeout=60): print "begin usedUt" timeout = int(timeout) cmd = file.replace('\n', '') + "&" exitcode = os.system(cmd) if exitcode: raise RuntimeError i = int(0) while i < timeout: print i stdout, stderr, exitcode = execCommand("jhosts -l %s" % host, timeout) ut_tmp = re.findall( r'\bResource.ut\s*=\s*Total:\s*(.+)\s*,\s*Reserved:', stdout) print "ut_tmp %s" % ut_tmp print "ut_stat %s" % ut_stat ut = ut_tmp[0].strip().strip("%") ut_stat = str(ut_stat).strip().strip("%") if int(ut) > int(ut_stat): try: time.sleep(10) except KeyboardInterrupt: raise RuntimeError return ut_tmp else: try: time.sleep(1) except KeyboardInterrupt: raise RuntimeError i = i + 1
def getServicePid(serv, cmd='jservice list', timeout=60): timeout = int(timeout) cmd = cmd.replace("\n", "") stdout, stderr, exitcode = execCommand(cmd, timeout) if exitcode == 0: service_info = stdout.strip("\n").strip("").split('Service:') print service_info length = len(service_info) j = 0 #print length for i in range(0, length): if serv in service_info[i]: print service_info[i] pid = re.findall(r'\n\s*PID\s*=\s*(\d+)', service_info[i]) print pid if pid: return pid[0].strip("\n").strip(" ") else: return pid else: j = j + 1 if j == length: print "execute the command of %s failed " % cmd raise RuntimeError else: print "execute the command of %s failed " % cmd raise RuntimeError
def getJobOutput(job): jobid = job.jobId jobuser = job.jobUser cmd = "su %s -c 'jctrl peek %s'" % (jobuser, jobid) stdout, stderr, exitcode = execCommand(cmd, timeout=60) #print exitcode return stdout.replace('<< output from stdout >>', '')
def queryMsgInfo(cmd): cmd = cmd.replace('\n', '') stdout, stderr, exitcode = execCommand(cmd) if stdout: msg = msgLib() msg.setBasicInfo(stdout) return msg
def checkHistContain(cmd, num, checkTime=60): ''' check host status in the special time. E.g:hostName = 'win7' status = 'closed' checkTime = 10 ''' checktime = int(checkTime) num = int(num) cmd = cmd.replace("\n", "") try: while True: print checktime stdout, stderr, exitcode = execCommand(cmd, timeout=60) print stdout, stderr, exitcode if stdout.find("Pending: Requeue the job for the next run") != -1: num1 = stdout.count( "Pending: Requeue the job for the next run") print num1 if num1 != num: time.sleep(1) checktime -= int(1) else: print num1 print stdout return stdout else: time.sleep(1) checktime -= int(1) if checktime == int(0): print "check string of 'Pending: Requeue the job for the next run' failed" raise RuntimeError except KeyboardInterrupt: print '' raise RuntimeError
def killUtProc(cmd, timeout=60): timeout = int(timeout) cmd_tmp = cmd.replace('\n', '') while True: cmd1 = "ps -ef|grep %s " % cmd_tmp + " -wc" stdout0, stderr0, exitcode0 = execCommand(cmd1, timeout) num = int(stdout0) - int(2) if num > int(0): cmd = "ps -ef|grep %s |awk '{print $2}'|sed -n '1p'" % cmd_tmp stdout, stderr, exitcode = execCommand(cmd) if exitcode: raise RuntimeError else: pid = stdout.replace('\n', '').strip() killProcTree(pid, includingParent=True) else: return
def checkRestartClusterStatus(checkTime=60): ''' check cluster all of hosts status is ok in the special time. ''' spendTime = int(checkTime) try: while True: print spendTime stdout, stderr, exitcode = execCommand("jhosts stat -l", timeout=60) print "jhosts stat -l:\n%s" % stdout stat_list_a = re.findall(r'\s+\bStatus\s*=\s*(.*)\s*\n', stdout) stdout, stderr, exitcode = execCommand("jhosts -l", timeout=60) print "jhosts -l:\n%s" % stdout stat_list_b = re.findall(r'\s+\bStatus\s*=\s*(.*)\s*\n', stdout) print stat_list_a print stat_list_b host_num_a = 0 host_num_b = 0 host_num_a = len(stat_list_a) host_num_b = len(stat_list_b) if host_num_a != host_num_a or host_num_a == 0 or host_num_b == 0: time.sleep(1) spendTime -= int(1) else: i = 0 while i < host_num_a: if (stat_list_a[i].upper() == "OK" or stat_list_a[i].upper() == "CLOSED_FULL") and ( stat_list_a[i].upper() == "OK" or stat_list_a[i].upper() == "CLOSED_FULL"): i = i + 1 if i == host_num_a: time.sleep(2) print "the cluster status is ok" return "the cluster status is ok" else: time.sleep(1) spendTime -= int(1) break if spendTime == int(0): print "check cluster status fail" raise RuntimeError except KeyboardInterrupt: print '' raise RuntimeError
def runCommandA(cmd_args, timeout=60, env=None, logger=None): timeout = int(timeout) out = list[0] stdout, stderr, exitcode = execCommand(cmd_args, timeout) print "the result of runCommand is \nstdout=%s\nstderr=%s\nexitcode=%s" % ( stdout, stderr, exitcode) out[0] = stderr out[1] = stdout out[2] = exitcode return out
def jadminSched(timeout=60): timeout = int(timeout) stdout, stderr, result2 = execCommand("echo y|jadmin schedreconfig", timeout) #print result2 if result2 != 0: print "execute jadmin schedreconfig failed" raise RuntimeError else: print "execute jadmin schedreconfig sucessed"
def jadminJhds(timeout=60): timeout = int(timeout) stdout, stderr, result1 = execCommand("echo y|jadmin jhdsreconfig", timeout) #print result1 if result1 != 0: print "execute jadmin jhdsreconfig failed" raise RuntimeError else: print "execute jadmin jhdsreconfig sucessed"
def queryUsrGroup(grpName): ''' query usergroup info from the param 'grpName' and return a usergroup object. ''' stdout, stderr, exitcode = execCommand("jugroup -g %s" % (grpName), timeout=60) #print exitcode ug = userGroupLib() ug.setGrUserBasicInfo(stdout) return ug
def queryJobInfo(jobId, user="******"): ''' query the output of command 'jjobs -l jobid' and return a job object. ''' cmd = "su %s -c 'jjobs -l %s'" % (user, str(jobId)) stdout, stderr, exitcode = execCommand(cmd, timeout=60) #print exitcode job = jobLib() job.setBasicInfo(stdout) return job
def queryUserJobInfo(command): ''' query the output of command 'jjobs -l' and return a list of all job objects. ''' cmd_tmp = command.replace('\n', '') stdout, stderr, exitcode = execCommand(cmd_tmp, timeout=60) #print exitcode jobId = re.findall(r'(?<=\b)Job ID:(.*)\n', stdout) print jobId return jobId
def queryClusterInfo(): ''' get cluster info from command 'jversion jcluster' and return a cluster object. ''' cmd = "jcluster ;jversion" stdout, stderr, exitcode=execCommand(cmd,timeout=60) #print exitcode clst = clusterLib() clst.setClusterInfo(stdout) return clst
def jserviceRestartAll(timeout=60): timeout = int(timeout) stdout, stderr, result = execCommand("echo y|jservice restart all", timeout) #print result2 if result != 0: print "execute jservice restart all failed" raise RuntimeError else: print "execute jservice restart all sucessed"
def getHostGroupMem(groupName): ''' query the output of command 'jhostgroup' and return a hostgroup object. ''' host_group_mem={} stdout, stderr, exitcode=execCommand("jhostgroup -r %s"%groupName,timeout=60) #print exitcode if stdout: host_group_mem = re.findall(r'\s*\bHosts\s*=\s*(.*)\s*\n',stdout) return host_group_mem else: raise RuntimeError
def runCommand(cmd_args, timeout=60, env=None, logger=None): timeout = int(timeout) stdout, stderr, exitcode = execCommand(cmd_args, timeout) print "the result of runCommand is \nstdout=%s\nstderr=%s\nexitcode=%s" % ( stdout, stderr, exitcode) if stderr: print exitcode stderr = stderr.strip() return stderr else: stdout = stdout.strip() return stdout
def queryUserInfo(userName): ''' get user info from the param 'userName' and return a user object. ''' user = userLib() stdout, stderr, exitcode = execCommand("jusers %s" % (userName), timeout=30) if userName != '': user.setUsrBasicInfo(stdout) else: print "invalid user name" return user
def queryAllUserGroup(): ''' query all usergroup info and return a list of usergroup object. ''' usrgroup = [] #acccording to the output get group name stdout, stderr, exitcode = execCommand("jusergroup -l ", timeout=60) #print exitcode grp_all_name = re.findall(r'\bGroup:\s+(.+)\s*\n', stdout) for i in range(len(grp_all_name)): usrgroup.append(queryUsrGroup(grp_all_name[i])) return usrgroup
def getJobIdAll(command): lista = [] cmd_tmp = command.replace('\n', '') stdout, stderr, exitcode = execCommand("%s|sed 1d|awk '{print $1}'" % cmd_tmp, timeout=60) print stdout, stderr, exitcode if exitcode == 0 and stdout: print stdout lista = stdout.strip().split("\n") for i in lista: i.strip() return lista
def checkUt(host): host = "jhosts -l %s" % host stdout, stderr, exitcode = execCommand(host) if stderr: raise RuntimeError else: host_tmp = re.findall(r'\bHost:\s*(.+)\s*\n', stdout) ut_tmp = re.findall( r'\bResource.ut\s*=\s*Total:\s*(.+)\s*,\s*Reserved:', stdout) if ut_tmp: return ut_tmp[0] else: return ut_tmp
def QueryAllHostInfo(): ''' query the output of command 'jhosts -l' and return a list of all hosts objects. ''' hosts=[] hostsname=[] stdout, stderr, exitcode=execCommand("jhosts -l ",timeout=60) #print exitcode hostsname = re.findall(r'\bHost:\s*(.*)\n', stdout) for i in range(len(hostsname)): hosts.append(QueryHostInfo(hostsname[i])) i=i+1 return hosts
def querySubmitInfo(string): ''' query the output of command 'jsub job' and return a job object. E.g: string = 'jsub -P "test_project" -J "test_name" -R "rusage[mem=100]" sleep 10000' ''' cmd = string.replace('\n', '') stdout, stderr, exitcode = execCommand(cmd, timeout=60) print stdout print stderr job = jobLib() job.setSubmitJob(stdout) return job
def QueryHostStaticInfo(hostname): ''' query the output of command 'jhosts -l hostname' and return a host object. ''' cmd="jhhosts metrics %s"%hostname stdout, stderr, exitcode=execCommand(cmd,timeout=60) #print exitcode if exitcode: return "" else: host = hostLib() host.setHostStatBasInfo(stdout) return host
def getAndCheckJobExecHost(jobid, timeout=30): timeout = int(timeout) for i in range(1, timeout): cmd = "jjobs -l " + str(jobid) stdout, stderr, exitcode = execCommand(cmd, timeout=60) print stdout job = queryJobInfo(jobid) exec_host = getJobExecHost(job) if exec_host: return exec_host else: time.sleep(1) if i == (timeout - 1): raise IndexError("cannot get the exec host %s" % jobid)