Exemple #1
0
 def failureSimulation(self,failure_interval, wait_before_start, servers, min_servers, servers_to_fail_simultaneously, kill_method, initial_clean, junit_report):
   """
   Run the failure loop for a given role
   """
   if(servers == ""):
     print "--servers not specified!\n\n"
     return
   
   logging.debug("Failure interval: " + str(failure_interval))
   logging.debug("Wait before start: " + str(wait_before_start)) 
   logging.debug("Server list: " + servers)
   logging.debug("Minimum number of servers: " + str(min_servers))
   logging.debug("Number of servers to fail simultaneously: " + str(servers_to_fail_simultaneously)) 
   logging.debug("Kill method: " + kill_method)
   logging.debug("Initial clean: " + str(initial_clean)) 
   logging.debug("Role name: "+ self.roleName)
   logging.debug("Junit Report: "+ junit_report)
   
   testCases = []
   testNum = 0
   
   cluster = Cluster()
   serverArray = servers.split(",")
   cluster = cluster.getServersByHostname(serverArray)
   
   if min_servers >= cluster.getNumServers():
     raise ValueError("Minimum Number of servers is too high!\nMinimum Servers to stay up: "
                      +str(min_servers)+"\nNumber of "+self.roleName+" servers in cluster: "+str(cluster.getNumServers()))
     exit(-1)
   
   if servers_to_fail_simultaneously > cluster.getNumServers() - min_servers:
     raise ValueError("--servers_to_fail_simultaneously is set too high")
     exit(-1)   
     
   if initial_clean:
     cluster.cleanProcess(self.roleName)
   
   while True:
     start = time()
     logging.debug("Sleeping for "+str(failure_interval)+" seconds")
     sleep(failure_interval)
     
     #pick random servers to kill
     serversToKill = sample(serverArray, servers_to_fail_simultaneously)
     logging.debug("Servers selected to kill: "+ ','.join(serversToKill))
     
     #Stop the running process based on kill_method
     for hostname in serversToKill:
       if kill_method == "restart" :
         logging.debug("Shutting down "+self.roleName + " on " +hostname)
         cluster.shutdownProcessOnHost(self.roleName, hostname)
       elif kill_method == "kill":
         logging.debug("Killing "+self.roleName + " on " +hostname)
         cluster.killProcessOnHost(self.roleName, hostname)
       else:
         if randint(0,1) == 0:
           logging.debug("Shutting down "+self.roleName + " on " +hostname)
           cluster.shutdownProcessOnHost(self.roleName, hostname)
         else:
           logging.debug("Killing "+self.roleName + " on " +hostname)
           cluster.killProcessOnHost(self.roleName, hostname)
     
     #Ensure the process has stopped
     for hostname in serversToKill:
       #Create basis for test case
       tc = TestCase('Test'+str(testNum), self.roleName+'FailureSimulator', time()-start, 
                     'Shutting down '+self.roleName+" with kill_method "+kill_method+" on host "+hostname, '')
       #If the process is still running, then try killing it one more time
       if(cluster.isProcessRunningOnHost(self.roleName, hostname)):
         logging.debug("Killing "+self.roleName + " on " +hostname+" one last time")
         cluster.killProcessOnHost(self.roleName, hostname)
         #If the process is *still* running then report a failure
         if(cluster.isProcessRunningOnHost(self.roleName, hostname)):
           tc.add_failure_info(self.roleName+" process is still running on"+hostname, "")
       testCases.append(tc)
       testNum+=1
       
     #Start the process again
     start = time()
     sleep(wait_before_start)
     for hostname in serversToKill:
       logging.debug("Starting " + self.roleName + " on " + hostname)
       cluster.startProcessOnHost(self.roleName, hostname)
     
     #Ensure the process has started, otherwise report a failure
     for hostname in serversToKill:
       tc = TestCase('Test'+str(testNum), self.roleName+'FailureSimulator', time()-start, 
                     'Starting '+self.roleName+" on host: "+hostname, '')
       if( not cluster.isProcessRunningOnHost(self.roleName, hostname)):
         tc.add_failure_info(self.roleName+" process is still running on"+hostname, "")
       testCases.append(tc)
       testNum+=1
     
     if(not junit_report == "" ):
       logging.debug("Writing junit report to: "+junit_report)
       if not os.path.exists(junit_report):
         os.makedirs(junit_report)
       f = open(junit_report,'w')
       ts = TestSuite(self.roleName+" Test Suite", testCases)
       f.write(TestSuite.to_xml_string([ts]))
       f.close()