Exemple #1
0
def checkStatShareResTotal(res_name,num,timeout=30):
    res_name=str(res_name)
    timeout=int(timeout)
    num=int(num)
    try:
        while timeout>0:
            stdout0, stderr0, exitcode0=execCommand("ps -ef|grep $JHSCHEDULER_TOP")
            print stdout0, stderr0, exitcode0
            print timeout
            stdout2, stderr2, exitcode2=execCommand("jhosts -s",timeout=30)
            print stdout2, stderr2, exitcode2
            stdout, stderr, exitcode=execCommand("jhosts -s %s|sed -n '$p'|awk '{print $2}'"%res_name,timeout=30)
            print "stdout=%s,stderr=%s,exitcode=%s"%(stdout, stderr, exitcode)
            if stdout and stdout.strip() and stdout.strip().isdigit() and int(stdout.strip())==num:
                return stdout
            else:
                timeout=timeout-1
                try:
                    time.sleep(2)
                except KeyboardInterrupt:
                    print ''
                    raise RuntimeError
    except KeyboardInterrupt:
        print ''
        raise RuntimeError
    if timeout==0:
        raise RuntimeError
Exemple #2
0
def checkClusterStatus(hoststatus='ok', checkTime=60):
    '''
    check cluster all of hosts status is ok in the special time.
    '''
    spendTime = int(checkTime)
    try:
        while True:
            print spendTime
            stdout0, stderr0, exitcode0 = execCommand("jjobs -u all",
                                                      timeout=60)
            print "this is jjobs -u all output:begin"
            print stdout0, stderr0, exitcode0
            print "jjobs -u all:end"
            stdout, stderr, exitcode = execCommand("jhosts stat -l",
                                                   timeout=60)
            print "jhosts stat -l:\n%s" % stdout
            stat_list_a = re.findall(r'\s+\bStatus\s*=\s*(.*)\s*\n', stdout)
            stdout, stderr, exitcode = execCommand("jhosts -l", timeout=60)
            print "jhosts -l:\n%s" % stdout
            stat_list_b = re.findall(r'\s+\bStatus\s*=\s*(.*)\s*\n', stdout)
            print stat_list_a
            print stat_list_b
            host_num_a = 0
            host_num_b = 0
            host_num_a = len(stat_list_a)
            host_num_b = len(stat_list_b)
            if host_num_a != host_num_a or host_num_a == 0 or host_num_b == 0:
                time.sleep(1)
                spendTime -= int(1)
            else:
                i = 0
                while i < host_num_a:
                    if stat_list_a[i].upper() == str(hoststatus).upper(
                    ) and stat_list_b[i].upper() == str(hoststatus).upper():
                        i = i + 1
                        if i == host_num_a:
                            try:
                                time.sleep(2)
                            except KeyboardInterrupt:
                                print ''
                                raise RuntimeError
                            print "the cluster status is ok"
                            return "cluster_ok"
                    else:
                        try:
                            time.sleep(1)
                        except KeyboardInterrupt:
                            print ''
                            raise RuntimeError
                        spendTime -= int(1)
                        break
            if spendTime == int(0):
                print "check cluster status fail"
                raise RuntimeError
    except KeyboardInterrupt:
        print ''
        raise RuntimeError
Exemple #3
0
def postJadmin(timeout=60):
    timeout = int(timeout)
    killAllJob()
    stdout, stderr, exitcode = execCommand("su jhadmin -c 'jadmin hopen all'",
                                           timeout)
    stdout1, stderr1, exitcode1 = execCommand(
        "su jhadmin -c 'jadmin qopen all'", timeout)
    stdout2, stderr2, exitcode2 = execCommand(
        "su jhadmin -c 'jadmin qact all'", timeout)
    if exitcode or exitcode1 or exitcode2:
        print "the error info of postJadmin is %s" % stderr
Exemple #4
0
def getStatShareResTotal(res):
    res=str(res)
    #this code for test:total begin
    stdoutx, stderrx, exitcodex=execCommand("jhosts -s %s'"%res,timeout=20)
    print stdoutx, stderrx, exitcodex
    #this code for test:total end
    stdout, stderr, exitcode=execCommand("jhosts -s %s|sed -n '$p'|awk '{print $2}'"%res,timeout=60)
    if exitcode:
        print 'exec "jhosts -s %s" failed'%res
        raise RuntimeError
    else:
        return stdout.strip()
Exemple #5
0
def compareMem(master, slave, timeout=60):
    '''
    the first host must be the one that run the autotest.
    '''
    print "begin compareMem"
    i = int(0)
    timeout = int(timeout)
    while True:
        cmd1 = "jhosts -l %s" % master
        stdout, stderr, exitcode = execCommand(cmd1, timeout)
        cmd2 = "jhosts -l %s" % slave
        stdout2, stderr2, exitcode2 = execCommand(cmd2, timeout)
        print stderr, stderr2
        if stderr or stderr2:
            raise RuntimeError
        else:
            ut_tmp = re.findall(
                r'\bResource.mem\s*=\s*Total:\s*(.+)\s*,\s*Reserved:', stdout)
            ut_tmp2 = re.findall(
                r'\bResource.mem\s*=\s*Total:\s*(.+)\s*,\s*Reserved:', stdout2)
            checkRange = int(5)
            ut1 = bit_change(str(ut_tmp[0]))
            ut2 = bit_change(str(ut_tmp2[0]))
            if (ut1 > int(-1)) and (ut2 > int(-1)):
                res_compare = (ut2 - ut1)
                if res_compare >= int(0):
                    if res_compare > checkRange:
                        return slave, master
                    else:
                        if i == int(0):
                            resumeMem("50")
                        try:
                            time.sleep(2)
                        except KeyboardInterrupt:
                            raise RuntimeError
                else:
                    if res_compare < int(-checkRange):
                        return master, slave
                    else:
                        if i == int(0):
                            resumeMem("50")
                        try:
                            time.sleep(2)
                        except KeyboardInterrupt:
                            raise RuntimeError
            else:
                raise checkError("the format of param is error")
            i = i + 1
Exemple #6
0
def usedUt(host, file, ut_stat, timeout=60):
    print "begin usedUt"
    timeout = int(timeout)
    cmd = file.replace('\n', '') + "&"
    exitcode = os.system(cmd)
    if exitcode:
        raise RuntimeError
    i = int(0)
    while i < timeout:
        print i
        stdout, stderr, exitcode = execCommand("jhosts -l %s" % host, timeout)
        ut_tmp = re.findall(
            r'\bResource.ut\s*=\s*Total:\s*(.+)\s*,\s*Reserved:', stdout)
        print "ut_tmp %s" % ut_tmp
        print "ut_stat %s" % ut_stat
        ut = ut_tmp[0].strip().strip("%")
        ut_stat = str(ut_stat).strip().strip("%")
        if int(ut) > int(ut_stat):
            try:
                time.sleep(10)
            except KeyboardInterrupt:
                raise RuntimeError
            return ut_tmp
        else:
            try:
                time.sleep(1)
            except KeyboardInterrupt:
                raise RuntimeError
            i = i + 1
Exemple #7
0
def getServicePid(serv, cmd='jservice list', timeout=60):
    timeout = int(timeout)
    cmd = cmd.replace("\n", "")
    stdout, stderr, exitcode = execCommand(cmd, timeout)
    if exitcode == 0:
        service_info = stdout.strip("\n").strip("").split('Service:')
        print service_info
        length = len(service_info)
        j = 0
        #print length
        for i in range(0, length):
            if serv in service_info[i]:
                print service_info[i]
                pid = re.findall(r'\n\s*PID\s*=\s*(\d+)', service_info[i])
                print pid
                if pid:
                    return pid[0].strip("\n").strip(" ")
                else:
                    return pid
            else:
                j = j + 1
        if j == length:
            print "execute the command of %s failed " % cmd
            raise RuntimeError
    else:
        print "execute the command of %s failed " % cmd
        raise RuntimeError
Exemple #8
0
def getJobOutput(job):
    jobid = job.jobId
    jobuser = job.jobUser
    cmd = "su %s -c 'jctrl peek %s'" % (jobuser, jobid)
    stdout, stderr, exitcode = execCommand(cmd, timeout=60)
    #print exitcode
    return stdout.replace('<< output from stdout >>', '')
Exemple #9
0
def queryMsgInfo(cmd):
    cmd = cmd.replace('\n', '')
    stdout, stderr, exitcode = execCommand(cmd)
    if stdout:
        msg = msgLib()
        msg.setBasicInfo(stdout)
        return msg
Exemple #10
0
def checkHistContain(cmd, num, checkTime=60):
    '''
    check host status in the special time.
    E.g:hostName = 'win7' status = 'closed' checkTime = 10
    '''
    checktime = int(checkTime)
    num = int(num)
    cmd = cmd.replace("\n", "")
    try:
        while True:
            print checktime
            stdout, stderr, exitcode = execCommand(cmd, timeout=60)
            print stdout, stderr, exitcode
            if stdout.find("Pending: Requeue the job for the next run") != -1:
                num1 = stdout.count(
                    "Pending: Requeue the job for the next run")
                print num1
                if num1 != num:
                    time.sleep(1)
                    checktime -= int(1)
                else:
                    print num1
                    print stdout
                    return stdout
            else:
                time.sleep(1)
                checktime -= int(1)
            if checktime == int(0):
                print "check string of 'Pending: Requeue the job for the next run' failed"
                raise RuntimeError
    except KeyboardInterrupt:
        print ''
        raise RuntimeError
Exemple #11
0
def killUtProc(cmd, timeout=60):
    timeout = int(timeout)
    cmd_tmp = cmd.replace('\n', '')
    while True:
        cmd1 = "ps -ef|grep %s " % cmd_tmp + " -wc"
        stdout0, stderr0, exitcode0 = execCommand(cmd1, timeout)
        num = int(stdout0) - int(2)
        if num > int(0):
            cmd = "ps -ef|grep %s |awk '{print $2}'|sed -n '1p'" % cmd_tmp
            stdout, stderr, exitcode = execCommand(cmd)
            if exitcode:
                raise RuntimeError
            else:
                pid = stdout.replace('\n', '').strip()
                killProcTree(pid, includingParent=True)
        else:
            return
Exemple #12
0
def checkRestartClusterStatus(checkTime=60):
    '''
    check cluster all of hosts status is ok in the special time.
    '''
    spendTime = int(checkTime)
    try:
        while True:
            print spendTime
            stdout, stderr, exitcode = execCommand("jhosts stat -l",
                                                   timeout=60)
            print "jhosts stat -l:\n%s" % stdout
            stat_list_a = re.findall(r'\s+\bStatus\s*=\s*(.*)\s*\n', stdout)
            stdout, stderr, exitcode = execCommand("jhosts -l", timeout=60)
            print "jhosts -l:\n%s" % stdout
            stat_list_b = re.findall(r'\s+\bStatus\s*=\s*(.*)\s*\n', stdout)
            print stat_list_a
            print stat_list_b
            host_num_a = 0
            host_num_b = 0
            host_num_a = len(stat_list_a)
            host_num_b = len(stat_list_b)
            if host_num_a != host_num_a or host_num_a == 0 or host_num_b == 0:
                time.sleep(1)
                spendTime -= int(1)
            else:
                i = 0
                while i < host_num_a:
                    if (stat_list_a[i].upper() == "OK"
                            or stat_list_a[i].upper() == "CLOSED_FULL") and (
                                stat_list_a[i].upper() == "OK"
                                or stat_list_a[i].upper() == "CLOSED_FULL"):
                        i = i + 1
                        if i == host_num_a:
                            time.sleep(2)
                            print "the cluster status is ok"
                            return "the cluster status is ok"
                    else:
                        time.sleep(1)
                        spendTime -= int(1)
                        break
            if spendTime == int(0):
                print "check cluster status fail"
                raise RuntimeError
    except KeyboardInterrupt:
        print ''
        raise RuntimeError
Exemple #13
0
def runCommandA(cmd_args, timeout=60, env=None, logger=None):
    timeout = int(timeout)
    out = list[0]
    stdout, stderr, exitcode = execCommand(cmd_args, timeout)
    print "the result of runCommand is \nstdout=%s\nstderr=%s\nexitcode=%s" % (
        stdout, stderr, exitcode)
    out[0] = stderr
    out[1] = stdout
    out[2] = exitcode
    return out
Exemple #14
0
def jadminSched(timeout=60):
    timeout = int(timeout)
    stdout, stderr, result2 = execCommand("echo y|jadmin schedreconfig",
                                          timeout)
    #print result2
    if result2 != 0:
        print "execute jadmin schedreconfig failed"
        raise RuntimeError
    else:
        print "execute jadmin schedreconfig sucessed"
Exemple #15
0
def jadminJhds(timeout=60):
    timeout = int(timeout)
    stdout, stderr, result1 = execCommand("echo y|jadmin jhdsreconfig",
                                          timeout)
    #print result1
    if result1 != 0:
        print "execute jadmin jhdsreconfig failed"
        raise RuntimeError
    else:
        print "execute jadmin jhdsreconfig sucessed"
def queryUsrGroup(grpName):
    '''
    query usergroup info from the param 'grpName' and return a usergroup object.
    '''
    stdout, stderr, exitcode = execCommand("jugroup -g %s" % (grpName),
                                           timeout=60)
    #print exitcode
    ug = userGroupLib()
    ug.setGrUserBasicInfo(stdout)
    return ug
Exemple #17
0
def queryJobInfo(jobId, user="******"):
    '''
    query the output of command 'jjobs -l jobid' and return a job object.
    '''
    cmd = "su %s -c 'jjobs -l %s'" % (user, str(jobId))
    stdout, stderr, exitcode = execCommand(cmd, timeout=60)
    #print exitcode
    job = jobLib()
    job.setBasicInfo(stdout)
    return job
Exemple #18
0
def queryUserJobInfo(command):
    '''
    query the output of command 'jjobs -l' and return a list of all job objects.
    '''
    cmd_tmp = command.replace('\n', '')
    stdout, stderr, exitcode = execCommand(cmd_tmp, timeout=60)
    #print exitcode
    jobId = re.findall(r'(?<=\b)Job ID:(.*)\n', stdout)
    print jobId
    return jobId
def queryClusterInfo():
    '''
    get cluster info from command 'jversion jcluster' and return a cluster object.
    '''
    cmd = "jcluster ;jversion"
    stdout, stderr, exitcode=execCommand(cmd,timeout=60)
    #print exitcode
    clst = clusterLib()
    clst.setClusterInfo(stdout)
    return clst
Exemple #20
0
def jserviceRestartAll(timeout=60):
    timeout = int(timeout)
    stdout, stderr, result = execCommand("echo y|jservice restart all",
                                         timeout)
    #print result2
    if result != 0:
        print "execute jservice restart all failed"
        raise RuntimeError
    else:
        print "execute jservice restart all sucessed"
Exemple #21
0
def getHostGroupMem(groupName):
    '''
    query the output of command 'jhostgroup' and return a hostgroup object.
    '''
    host_group_mem={}
    stdout, stderr, exitcode=execCommand("jhostgroup -r %s"%groupName,timeout=60)
    #print exitcode
    if stdout:
        host_group_mem = re.findall(r'\s*\bHosts\s*=\s*(.*)\s*\n',stdout)
        return host_group_mem
    else:
        raise RuntimeError
Exemple #22
0
def runCommand(cmd_args, timeout=60, env=None, logger=None):
    timeout = int(timeout)
    stdout, stderr, exitcode = execCommand(cmd_args, timeout)
    print "the result of runCommand is \nstdout=%s\nstderr=%s\nexitcode=%s" % (
        stdout, stderr, exitcode)
    if stderr:
        print exitcode
        stderr = stderr.strip()
        return stderr
    else:
        stdout = stdout.strip()
        return stdout
Exemple #23
0
def queryUserInfo(userName):
    '''
    get user info from the param 'userName' and return a user object.
    '''
    user = userLib()
    stdout, stderr, exitcode = execCommand("jusers %s" % (userName),
                                           timeout=30)
    if userName != '':
        user.setUsrBasicInfo(stdout)
    else:
        print "invalid user name"
    return user
def queryAllUserGroup():
    '''
    query all usergroup info and return a list of usergroup object.
    '''
    usrgroup = []
    #acccording to the output get group name
    stdout, stderr, exitcode = execCommand("jusergroup -l ", timeout=60)
    #print exitcode
    grp_all_name = re.findall(r'\bGroup:\s+(.+)\s*\n', stdout)
    for i in range(len(grp_all_name)):
        usrgroup.append(queryUsrGroup(grp_all_name[i]))
    return usrgroup
Exemple #25
0
def getJobIdAll(command):
    lista = []
    cmd_tmp = command.replace('\n', '')
    stdout, stderr, exitcode = execCommand("%s|sed 1d|awk '{print $1}'" %
                                           cmd_tmp,
                                           timeout=60)
    print stdout, stderr, exitcode
    if exitcode == 0 and stdout:
        print stdout
        lista = stdout.strip().split("\n")
        for i in lista:
            i.strip()
    return lista
Exemple #26
0
def checkUt(host):
    host = "jhosts -l %s" % host
    stdout, stderr, exitcode = execCommand(host)
    if stderr:
        raise RuntimeError
    else:
        host_tmp = re.findall(r'\bHost:\s*(.+)\s*\n', stdout)
        ut_tmp = re.findall(
            r'\bResource.ut\s*=\s*Total:\s*(.+)\s*,\s*Reserved:', stdout)
        if ut_tmp:
            return ut_tmp[0]
        else:
            return ut_tmp
Exemple #27
0
def QueryAllHostInfo():
    '''
    query the output of command 'jhosts -l' and return a list of all hosts objects.
    '''
    hosts=[]
    hostsname=[]
    stdout, stderr, exitcode=execCommand("jhosts -l ",timeout=60)
    #print exitcode
    hostsname = re.findall(r'\bHost:\s*(.*)\n', stdout)
    for i in range(len(hostsname)):
        hosts.append(QueryHostInfo(hostsname[i]))
        i=i+1
    return hosts
Exemple #28
0
def querySubmitInfo(string):
    '''
    query the output of command 'jsub job' and return a job object.
    E.g:
    string = 'jsub -P "test_project" -J "test_name" -R "rusage[mem=100]" sleep 10000'
    '''
    cmd = string.replace('\n', '')
    stdout, stderr, exitcode = execCommand(cmd, timeout=60)
    print stdout
    print stderr
    job = jobLib()
    job.setSubmitJob(stdout)
    return job
Exemple #29
0
def QueryHostStaticInfo(hostname):
    '''
    query the output of command 'jhosts -l hostname' and return a host object.
    '''
    cmd="jhhosts metrics %s"%hostname
    stdout, stderr, exitcode=execCommand(cmd,timeout=60)
    #print exitcode
    if exitcode:
        return ""
    else:
        host = hostLib()
        host.setHostStatBasInfo(stdout)
        return host
Exemple #30
0
def getAndCheckJobExecHost(jobid, timeout=30):
    timeout = int(timeout)
    for i in range(1, timeout):
        cmd = "jjobs -l " + str(jobid)
        stdout, stderr, exitcode = execCommand(cmd, timeout=60)
        print stdout
        job = queryJobInfo(jobid)
        exec_host = getJobExecHost(job)
        if exec_host:
            return exec_host
        else:
            time.sleep(1)
        if i == (timeout - 1):
            raise IndexError("cannot get the exec host %s" % jobid)