Beispiel #1
0
def main(args):
    caseName = os.path.basename(inspect.getfile(inspect.currentframe())).split('.')[0]
    logging.getLogger(caseName).info(caseDescription)
    clusterObj = base.getClusterObj(caseName, args)
    #client = clusterObj.getClients()[0]
    nodeObj = clusterObj.getFirstAvaNode(caseName)
        
    logging.getLogger(caseName).info("start to check cluster status before case running")
    status = clusterObj.getStatus(caseName, nodeObj, timeOut)
    if(status == 'HEALTH_OK'):
        logging.getLogger(caseName).info("health status is OK")
        
    else:
        logging.getLogger(caseName).error("health status is error")
        exit(-1)
        
    logging.getLogger(caseName).info("\nStep1: start IO from clients")
    for client in clusterObj.getClients():
        client.checkIOError(caseName)
    for client in clusterObj.getClients():         
        if(client.checkIOProcess(caseName ) == "error"):
            base.startIO(caseName, client, 'nbd') 
    sleep(60)
    
    logging.getLogger(caseName).info("\nStep2: pause all osds")
    clusterObj.pauseOsd(caseName)
    status = clusterObj.getStatus(caseName, nodeObj, timeOut)
    if(status == 'HEALTH_OK'):
        logging.getLogger(caseName).info("pause cluster successfully")
    else:
        logging.getLogger(caseName).error("status is %s"%status)                
        logging.getLogger(caseName).error("print log for another 10 minutes")
        status = clusterObj.getStatus(caseName, nodeObj, timeOut)
        if(status == 'HEALTH_OK'):
            logging.getLogger(caseName).info("resume cluster successfully")
            
        else:
            logging.getLogger(caseName).error("%s  runs failed"%caseName)
            exit(-1)
    for client in clusterObj.getClients():
        client.checkIOError(caseName)
    for client in clusterObj.getClients():         
        if(client.checkIOProcess(caseName ) == "error"):
            base.startIO(caseName, client, 'nbd') 
    logging.getLogger(caseName).info("\nStep3: resume all osds")
    clusterObj.resumeOsd(caseName)
    for client in clusterObj.getClients():
        client.checkIOError(caseName)
    for client in clusterObj.getClients():         
        if(client.checkIOProcess(caseName ) == "error"):
            base.startIO(caseName, client, 'nbd') 
    logging.getLogger(caseName).info("\nCase runs successfully")
def main(args):
    caseName = os.path.basename(inspect.getfile(
        inspect.currentframe())).split('.')[0]
    logging.getLogger(caseName).info(caseDescription)
    logging.getLogger(caseName).info("the timeout is %d" % timeOut)
    clusterObj = base.getClusterObj(caseName, args)
    clusterObj.initOsdProcess(caseName)
    nodeList = clusterObj.getNodes()
    #client = clusterObj.getClients()[0]

    logging.getLogger(caseName).info(
        "start to check cluster status before case running")
    status = clusterObj.getStatus(caseName,
                                  clusterObj.getFirstAvaNode(caseName),
                                  timeOut)
    if (status == 'HEALTH_OK'):
        logging.getLogger(caseName).info("health status is OK")
    else:
        logging.getLogger(caseName).error("health status is error")
        exit(-1)

    logging.getLogger(caseName).info("\nStep 1: start IO from clients")
    for client in clusterObj.getClients():
        client.checkIOError(caseName)
    for client in clusterObj.getClients():
        if (client.checkIOProcess(caseName) == "error"):
            base.startIO(caseName, client, 'nbd')
    sleep(60)

    logging.getLogger(caseName).info("\nStep 2: stop osd and check IO")
    #logging.getLogger(caseName).info("\n%d"%len(nodeList))
    for nodeObj in nodeList:
        logging.getLogger(caseName).info("\nNow operate osd on %s" %
                                         (nodeObj.gethostName()))
        for osdObj in nodeObj.getOsds():
            #out the osd
            logging.getLogger(caseName).info("\nNow operate " + osdObj.getid())
            #stop osd service

            logging.getLogger(caseName).info("Set the " + osdObj.getid() +
                                             " pid for kill")
            nodeObj.setOsdPid(caseName)
            logging.getLogger(caseName).info("shutdown " + osdObj.getid() +
                                             " by kill")
            osdObj.shutdown(caseName, nodeObj)
            for client in clusterObj.getClients():
                client.checkIOError(caseName)
            #start osd service
            logging.getLogger(caseName).info("start " + osdObj.getid())
            osdObj.start(caseName, nodeObj)
            returnCode = osdObj.checkIfOsdStart(caseName, nodeObj)
            tryCount = 0
            while (returnCode == 0 and tryCount < 10):
                returnCode = osdObj.checkIfOsdStart(caseName, nodeObj)
                tryCount = tryCount + 1
            if (tryCount == 10):
                logging.getLogger(caseName).error("%s cannot start" %
                                                  osdObj.getid())

            #check ceph health
            sleep(30)
            for client in clusterObj.getClients():
                client.checkIOError(caseName)
            status = clusterObj.getStatus(caseName, nodeObj, timeOut)
            if (status == 'HEALTH_OK'):
                logging.getLogger(caseName).info(
                    "stop %s in cluster successfully" % osdObj.getid())
            else:
                logging.getLogger(caseName).error("status is %s" % status)
                logging.getLogger(caseName).error(
                    "print log for another 10 minutes")
                status = clusterObj.getStatus(caseName, nodeObj, timeOut)
                if (status == 'HEALTH_OK'):
                    logging.getLogger(caseName).info(
                        "stop %s in cluster successfully" % osdObj.getid())
                    break
                else:
                    logging.getLogger(caseName).error("%s  runs failed" %
                                                      caseName)
                    exit(-1)

            for client in clusterObj.getClients():
                client.checkIOError(caseName)
            for client in clusterObj.getClients():
                if (client.checkIOProcess(caseName) == "error"):
                    base.startIO(caseName, client, 'nbd')

    logging.getLogger(caseName).info("%s runs complete" % caseName)
def main(args):
    caseName = os.path.basename(inspect.getfile(inspect.currentframe())).split('.')[0]
    logging.getLogger(caseName).info(caseDescription)
    clusterObj = base.getClusterObj(caseName, args)
    nodeList = clusterObj.getNodes()
    #client = clusterObj.getClients()[0]
    
    logging.getLogger(caseName).info("start to check cluster status before case running")
    status = clusterObj.getStatus(caseName, clusterObj.getFirstAvaNode(caseName), timeOut)
    if(status == 'HEALTH_OK'):
        logging.getLogger(caseName).info("health status is OK")
    else:
        logging.getLogger(caseName).error("health status is error")
        exit(-1)
        
    logging.getLogger(caseName).info("\nStep 1: start IO from clients")
    for client in clusterObj.getClients():
        client.checkIOError(caseName)
    for client in clusterObj.getClients():         
        if(client.checkIOProcess(caseName ) == "error"):
            base.startIO(caseName, client, 'nbd')
    '''
    for client in clusterObj.getClients():
        base.startIO(caseName, client, 'nbd')
    '''
    sleep(60)
    logging.getLogger(caseName).info("\nStep 2: Out the osd and check IO")
    for nodeObj in nodeList:
        for osdObj in nodeObj.getOsds():
            #out the osd
            logging.getLogger(caseName).info("\nNow operate "+nodeObj.gethostName())
            logging.getLogger(caseName).info(len(nodeObj.getOsds()))
            logging.getLogger(caseName).info("\nNow operate "+osdObj.getid())
            logging.getLogger(caseName).info("out "+osdObj.getid())
            osdObj.outCluster(caseName, nodeObj)
            logging.getLogger(caseName).info("check if IO error")
            for client in clusterObj.getClients():
                client.checkIOError(caseName)
            #stop osd service
            logging.getLogger(caseName).info("Set the "+osdObj.getid()+" pid for kill")
            nodeObj.setOsdPid(caseName)

            osdObj.inCluster(caseName, nodeObj)
            #check ceph health
            sleep(30)
            for client in clusterObj.getClients():
                client.checkIOError(caseName)
            status = clusterObj.getStatus(caseName, nodeObj, timeOut)
            if(status == 'HEALTH_OK'):
                logging.getLogger(caseName).info("stop %s in cluster successfully"%osdObj.getid())
            else:
                logging.getLogger(caseName).error("status is %s"%status)                
                logging.getLogger(caseName).error("print log for another 10 minutes")
                status = clusterObj.getStatus(caseName, nodeObj, timeOut)
                if(status == 'HEALTH_OK'):
                    logging.getLogger(caseName).info("stop %s in cluster successfully"%osdObj.getid())
                    break
                else:
                    logging.getLogger(caseName).error("%s  runs failed"%caseName)
                    exit(-1)
            for client in clusterObj.getClients():
                client.checkIOError(caseName)
            for client in clusterObj.getClients():         
                if(client.checkIOProcess(caseName ) == "error"):
                    base.startIO(caseName, client, 'nbd') 
                           
 
    logging.getLogger(caseName).info("%s runs complete"%caseName)      
    
    
                
def main(args):
    caseName = os.path.basename(inspect.getfile(inspect.currentframe())).split('.')[0]
    logging.getLogger(caseName).info(caseDescription)
    clusterObj = base.getClusterObj(caseName, args)
    clusterObj.initOsdProcess(caseName)
    #client = clusterObj.getClients()[0]
    #nodeObj = clusterObj.getFirstAvaNode(caseName)
    for client in clusterObj.getClients():
        client.checkIOError(caseName)
    for client in clusterObj.getClients():         
        if(client.checkIOProcess(caseName ) == "error"):
            base.startIO(caseName, client, 'nbd') 
    sleep(60)
    logging.getLogger(caseName).info("start to check cluster status before case running")
    status = clusterObj.getStatus(caseName, clusterObj.getFirstAvaNode(caseName), timeOut)
    if(status == 'HEALTH_OK'):
        logging.getLogger(caseName).info("health status is OK")
    else:
        logging.getLogger(caseName).error("health status is error")
        exit(-1)
        
    for nodeObj in clusterObj.getNodes():
        nodeObj.setOsdPid(caseName)
        for osdObj in nodeObj.getOsds():
            osdObj.forceKill(caseName, nodeObj)
    
    for monObj in clusterObj.getMonitors():
        monObj.shutdown(caseName)
        monObj.start(caseName)
    for monObj in clusterObj.getMonitors():
        monObj.setMonPid(caseName)
        monObj.forceKill(caseName)
        
    #TBD:check IO
    for client in clusterObj.getClients():
        client.checkIOError(caseName)
    for client in clusterObj.getClients():         
        if(client.checkIOProcess(caseName ) == "error"):
            base.startIO(caseName, client, 'nbd') 
    for nodeObj in clusterObj.getNodes():
        for osdObj in nodeObj.getOsds():
            osdObj.start(caseName, nodeObj)
    
    for monObj in clusterObj.getMonitors():
        monObj.start(caseName)
    
    logging.getLogger(caseName).info("sleep 10 mins to wait cluster recover")    
    sleep(600)
    for client in clusterObj.getClients():
        client.checkIOError(caseName)
    if(status == 'HEALTH_OK'):
        logging.getLogger(caseName).info("health status is OK")
    else:
        logging.getLogger(caseName).error("health status is error")
        exit(-1)   
    for client in clusterObj.getClients():
        base.stopIO(caseName, client) 
    logging.getLogger(caseName).info("case runs complete")
        
        
        
Beispiel #5
0
def main(args):
    caseName = os.path.basename(inspect.getfile(
        inspect.currentframe())).split('.')[0]
    logging.getLogger(caseName).info(caseDescription)
    clusterObj = base.getClusterObj(caseName, args)

    logging.getLogger(caseName).info(
        "start to check cluster status before case running")
    status = clusterObj.getStatus(caseName,
                                  clusterObj.getFirstAvaNode(caseName),
                                  timeOut)
    if (status == 'HEALTH_OK'):
        logging.getLogger(caseName).info("health status is OK")

    else:
        logging.getLogger(caseName).error("health status is error")
        exit(-1)

    #client = clusterObj.getClients()[0]
    nodeObj = clusterObj.getFirstAvaNode(caseName)
    for client in clusterObj.getClients():
        client.checkIOError(caseName)
    for client in clusterObj.getClients():
        if (client.checkIOProcess(caseName) == "error"):
            base.startIO(caseName, client, 'nbd')
    sleep(60)

    monitors = clusterObj.getMonitors()
    monitors[0].setMonPid(caseName)
    monitors[0].shutdown(caseName)
    sleep(30)
    #TBD:check if io process is still exist
    '''
    if(client.checkIOProcess(caseName, pidList) == 'Error') :
        logging.getLogger(caseName).error("some process is wrong")
    '''
    monitors[0].start(caseName)
    monitors[0].checkIfMonStart(caseName)
    sleep(30)
    status = clusterObj.getStatus(caseName, nodeObj, timeOut)
    if (status == 'HEALTH_OK'):
        logging.getLogger(caseName).info(
            "stop mon service on %s in cluster successfully" %
            nodeObj.gethostName())
    else:
        logging.getLogger(caseName).error("status is %s" % status)
        logging.getLogger(caseName).error("%s  runs failed" % caseName)
        status = clusterObj.getStatus(caseName, nodeObj, timeOut)
        if (status == 'HEALTH_OK'):
            logging.getLogger(caseName).info("stop in cluster successfully")
        else:
            logging.getLogger(caseName).error("%s  runs failed" % caseName)
            exit(-1)

    #logging.getLogger(caseName).info("\nstop IO from clients")
    #sleep(60)
    for client in clusterObj.getClients():
        client.checkIOError(caseName)
    for client in clusterObj.getClients():
        if (client.checkIOProcess(caseName) == "error"):
            base.startIO(caseName, client, 'nbd')
    logging.getLogger(caseName).info("\ncase runs complete")
Beispiel #6
0
def main(args):
    caseName = os.path.basename(inspect.getfile(
        inspect.currentframe())).split('.')[0]
    logging.getLogger(caseName).info(caseDescription)
    clusterObj = base.getClusterObj(caseName, args)
    nodeList = clusterObj.getNodes()
    #client = clusterObj.getClients()[0]
    #stop osd process and start with ceph-osd -i
    clusterObj.initOsdProcess(caseName)

    logging.getLogger(caseName).info(
        "start to check cluster status before case running")
    status = clusterObj.getStatus(caseName,
                                  clusterObj.getFirstAvaNode(caseName),
                                  timeOut)
    if (status == 'HEALTH_OK'):
        logging.getLogger(caseName).info("health status is OK")
    else:
        logging.getLogger(caseName).error("health status is error")
        exit(-1)

    logging.getLogger(caseName).info("\nStep1: start IO from clients")
    for client in clusterObj.getClients():
        client.checkIOError(caseName)
    for client in clusterObj.getClients():
        if (client.checkIOProcess(caseName) == "error"):
            base.startIO(caseName, client, 'nbd')
    sleep(60)
    logging.getLogger(caseName).info("\nStep2: kill three osds ")
    for nodeObj in nodeList:
        osdObjList = nodeObj.getOsds()
        #out the osd
        logging.getLogger(caseName).info("\nNow operate " +
                                         nodeObj.gethostName())
        #stop osd service
        #logging.getLogger(caseName).info("Set the "+osdObj.getid()+" pid for kill")
        nodeObj.setOsdPid(caseName)
        logging.getLogger(caseName).info("shutdown three osds on node " +
                                         nodeObj.gethostName())

        osdObjList[0].forceKill(caseName, nodeObj)
        osdObjList[1].forceKill(caseName, nodeObj)
        osdObjList[2].forceKill(caseName, nodeObj)
        #start osd service
        for client in clusterObj.getClients():
            client.checkIOError(caseName)
        logging.getLogger(caseName).info("start osd on node " +
                                         nodeObj.gethostName())
        osdObjList[0].start(caseName, nodeObj)
        osdObjList[1].start(caseName, nodeObj)
        osdObjList[2].start(caseName, nodeObj)
        returnCode = osdObjList[0].checkIfOsdStart(caseName, nodeObj)
        tryCount = 0
        while (returnCode == 0 and tryCount < 10):
            returnCode = osdObjList[0].checkIfOsdStart(caseName, nodeObj)
            tryCount = tryCount + 1
        if (tryCount == 10):
            logging.getLogger(caseName).error("%s cannot start" %
                                              osdObjList[0].getid())

        returnCode = osdObjList[1].checkIfOsdStart(caseName, nodeObj)
        tryCount = 0
        while (returnCode == 0 and tryCount < 10):
            returnCode = osdObjList[1].checkIfOsdStart(caseName, nodeObj)
            tryCount = tryCount + 1
        if (tryCount == 10):
            logging.getLogger(caseName).error("%s cannot starte" %
                                              osdObjList[1].getid())

        returnCode = osdObjList[2].checkIfOsdStart(caseName, nodeObj)
        tryCount = 0
        while (returnCode == 0 and tryCount < 10):
            returnCode = osdObjList[2].checkIfOsdStart(caseName, nodeObj)
            tryCount = tryCount + 1
        if (tryCount == 10):
            logging.getLogger(caseName).error("%s cannot starte" %
                                              osdObjList[2].getid())
        #check ceph health
        sleep(30)
        for client in clusterObj.getClients():
            client.checkIOError(caseName)
        status = clusterObj.getStatus(caseName, nodeObj, timeOut)
        if (status == 'HEALTH_OK'):
            logging.getLogger(caseName).info(
                "stop three osds in cluster successfully")
        else:
            logging.getLogger(caseName).error("status is %s" % status)
            logging.getLogger(caseName).error("%s  runs failed" % caseName)
            status = clusterObj.getStatus(caseName, nodeObj, timeOut)
            if (status == 'HEALTH_OK'):
                logging.getLogger(caseName).info(
                    "kill in cluster successfully")
            else:
                logging.getLogger(caseName).error("%s  runs failed" % caseName)
                exit(-1)
        for client in clusterObj.getClients():
            client.checkIOError(caseName)
        for client in clusterObj.getClients():
            if (client.checkIOProcess(caseName) == "error"):
                base.startIO(caseName, client, 'nbd')

    logging.getLogger(caseName).info("%s runs complete" % caseName)
def main(args):
    caseName = os.path.basename(inspect.getfile(
        inspect.currentframe())).split('.')[0]
    logging.getLogger(caseName).info(caseDescription)
    clusterObj = base.getClusterObj(caseName, args)
    avaiNode = clusterObj.getFirstAvaNode(caseName)
    #client = clusterObj.getClients()[0]

    logging.getLogger(caseName).info(
        "start to check cluster status before case running")
    status = clusterObj.getStatus(caseName,
                                  clusterObj.getFirstAvaNode(caseName),
                                  timeOut)
    if (status == 'HEALTH_OK'):
        logging.getLogger(caseName).info("health status is OK")
    else:
        logging.getLogger(caseName).error("health status is error")
        exit(-1)

    logging.getLogger(caseName).info("\nStep1: start IO from clients")
    for client in clusterObj.getClients():
        client.checkIOError(caseName)
    for client in clusterObj.getClients():
        if (client.checkIOProcess(caseName) == "error"):
            base.startIO(caseName, client, 'nbd')

    avaiNode.uploadScript(caseName)
    osdlist = avaiNode.getOsds()
    for osdObj in osdlist:
        osdObj.forceKill(caseName, avaiNode)
        osdObj.userStart(caseName, avaiNode)

    sleep(60)
    logging.getLogger(caseName).info(
        "\nStep2: remove osd and create them 10 times")
    for i in range(10):
        avaiNode.setOsdDisk(caseName)
        disks = []
        logging.getLogger(caseName).info("start to delete osd on node %s " %
                                         avaiNode.gethostName())
        for osdObj in avaiNode.getOsds():
            disks.append(osdObj.getDisk())
            osdObj.delete(caseName, avaiNode)
            status = clusterObj.getStatus(caseName, avaiNode, timeOut)
            logging.getLogger(caseName).info(
                "sleep 600s to wait the pg transfer successfully")
            sleep(600)
            if (status == 'HEALTH_OK'):
                logging.getLogger(caseName).info("%s delete succesfully" %
                                                 osdObj.getid())
            else:
                logging.getLogger(caseName).error("status is %s" % status)
                logging.getLogger(caseName).error("%s  runs failed" % caseName)
                status = clusterObj.getStatus(caseName, avaiNode, timeOut)
                if (status == 'HEALTH_OK'):
                    logging.getLogger(caseName).info(
                        "stop in cluster successfully")
                else:
                    logging.getLogger(caseName).error("%s  runs failed" %
                                                      caseName)
                    exit(-1)
        for client in clusterObj.getClients():
            client.checkIOError(caseName)
        clusterObj.updateCluster(avaiNode)
        logging.getLogger(caseName).info(
            "all osds on node %s delete succesfully" % avaiNode.gethostName())

        logging.getLogger(caseName).info("start to create osd on node %s " %
                                         avaiNode.gethostName())
        for disk in disks:
            avaiNode.createOsd(caseName, disk)
            status = clusterObj.getStatus(caseName, avaiNode, timeOut)
            if (status == 'HEALTH_OK'):
                logging.getLogger(caseName).info("%s create succesfully" %
                                                 osdObj.getid())
            else:
                logging.getLogger(caseName).error("status is %s" % status)
                logging.getLogger(caseName).error("%s  runs failed" % caseName)
                status = clusterObj.getStatus(caseName, avaiNode, timeOut)
                if (status == 'HEALTH_OK'):
                    logging.getLogger(caseName).info(
                        "stop in cluster successfully")
                else:
                    logging.getLogger(caseName).error("%s  runs failed" %
                                                      caseName)
                    exit(-1)

            for client in clusterObj.getClients():
                client.checkIOError(caseName)
            for client in clusterObj.getClients():
                if (client.checkIOProcess(caseName) == "error"):
                    base.startIO(caseName, client, 'nbd')

        clusterObj.updateCluster(avaiNode)
        logging.getLogger(caseName).info(
            "all osd need to create on node %s create succesfully" %
            avaiNode.gethostName())

    logging.getLogger(caseName).info("case runs complete")
    return 1