def main(): success,explanation_str = send_gmail.init_gmail() if not success: integrationtestlib.log(explanation_str) sys.exit(0) command = "df -h | grep '/dev/sda' | awk '{print $5}'" command_output_fd = os.popen(command) # Get the output and get rid of the extra lines and % sign. disk_use_percent = int(command_output_fd.read().split()[0][:-1]) hostname = socket.gethostname() + ".cs.washington.edu" subject = "High disk usage" if disk_use_percent > 95: message = "CRITICAL: Very High Disk Usage on %s: %s percent used" % ( hostname, disk_use_percent) integrationtestlib.notify(message, subject) irc_seattlebot.send_msg(message) elif disk_use_percent > 90: message = "WARNING: High disk usage on %s: %s percent used" % ( hostname, disk_use_percent) integrationtestlib.notify(message, subject)
def download_and_unpack_seattle(): """ <Purpose> Downloads and unpacks Seattle <Arguments> None. <Exceptions> None. <Side Effects> Downloads the seattle installer tar-gzip file and unpacks it. If the file already exists in the test directory, we overwrite it. <Returns> None. """ if os.path.isfile(test_directory+"/seattle_linux.tgz"): os.remove(test_directory+"/seattle_linux.tgz") integrationtestlib.log("downloading distro for seattle_install_tester...") os.system("wget --no-check-certificate " + seattle_linux_url) integrationtestlib.log("unpacking...") os.system("tar -xzvf " + test_directory + "/seattle_linux.tgz")
def main(): # initialize the gmail module success, explanation_str = send_gmail.init_gmail() if not success: integrationtestlib.log(explanation_str) sys.exit(0) # PART 1 verify that there are at least 10 nat forwarders running on # each key notify_str = '' for nat_forwarder_key in nat_forwarder_keys: integrationtestlib.log("Looking up nat forwarders for " + repr(nat_forwarder_key)) nat_forwarders = [] try: nat_forwarders = advertise.advertise_lookup(nat_forwarder_key) except Exception, e: integrationtestlib.handle_exception( "Got exception when looking up nat forwarders", NAT_TEST_FAIL_NOTICE) return if len(nat_forwarders) < 10: notify_str += ('WARNING: only ' + str(len(nat_forwarders)) + ' nat forwarders are advertising under the key: ' + repr(nat_forwarder_key) + '\n' + "Advertising forwarders: " + str(nat_forwarders) + '\n')
def uninstall_remove(): """ <Purpose> Uninstalls a Seattle installation and removes the Seattle directory <Arguments> None. <Exceptions> None. <Side Effects> Uninstalls Seattle and removes its directory <Returns> None. """ # uninstall integrationtestlib.log("uninstalling") os.system("cd " + prefix + "/seattle/ && chmod +x ./uninstall.sh && ./uninstall.sh") # remove all traces integrationtestlib.log("removing all files") os.system("rm -Rf " + prefix + "/seattle/") os.system("rm -Rf " + prefix + "/seattle_linux.tgz") return
def main(): """ <Purpose> Initialize the gmail info that is needed to send email notifications. Call the function to check if enough opendht servers are running. <Arguments> None <Exceptions> None <Side_Effects> None <Return> None """ # Setup the gmail user/password to use when sending email. success, explanation_str = send_gmail.init_gmail() if not success: integrationtestlib.log(explanation_str) sys.exit(0) # Run the test to see how many servers are up. check_opendht_servers()
def main(): # Modify the global versions of these variables. global TEMP_DIR, XMLRPC_PROXY # Setup the integration test library. This must be done before changing # directories. success, explanation = send_gmail.init_gmail() if not success: integrationtestlib.log("Failed to execute init_gmail(): " + explanation) sys.exit(1) # Add any extra error log recipients. integrationtestlib.notify_list.extend(NOTIFY_LIST) # Establish a temporary directory to do our work in. TEMP_DIR = tempfile.mkdtemp() os.chdir(TEMP_DIR) # Each test can reuse this proxy. XMLRPC_PROXY = xmlrpclib.ServerProxy(XMLRPC_PROXY_URL) # The main event! try: test_results = run_tests() report_results(test_results) finally: # Remove the temporary directory we've been working in. shutil.rmtree(TEMP_DIR)
def main(): # Modify the global versions of these variables. global TEMP_DIR, XMLRPC_PROXY # Setup the integration test library. This must be done before changing # directories. success, explanation = send_gmail.init_gmail() if not success: integrationtestlib.log('Failed to execute init_gmail(): ' + explanation) sys.exit(1) # Add any extra error log recipients. integrationtestlib.notify_list.extend(NOTIFY_LIST) # Establish a temporary directory to do our work in. TEMP_DIR = tempfile.mkdtemp() os.chdir(TEMP_DIR) # Each test can reuse this proxy. XMLRPC_PROXY = xmlrpclib.ServerProxy(XMLRPC_PROXY_URL) # The main event! try: test_results = run_tests() report_results(test_results) finally: # Remove the temporary directory we've been working in. shutil.rmtree(TEMP_DIR)
def main(): # initialize the gmail module success,explanation_str = send_gmail.init_gmail() if not success: integrationtestlib.log(explanation_str) sys.exit(0) # PART 1 verify that there are at least 10 nat forwarders running on # each key notify_str = '' for nat_forwarder_key in nat_forwarder_keys: integrationtestlib.log("Looking up nat forwarders for " + repr(nat_forwarder_key)) nat_forwarders = [] try: nat_forwarders = advertise.advertise_lookup(nat_forwarder_key) except Exception, e: integrationtestlib.handle_exception("Got exception when looking up nat forwarders", NAT_TEST_FAIL_NOTICE) return if len(nat_forwarders) < 10: notify_str += ('WARNING: only '+ str(len(nat_forwarders)) + ' nat forwarders are advertising under the key: ' + repr(nat_forwarder_key) + '\n' + "Advertising forwarders: " + str(nat_forwarders) + '\n')
def build_installers(): return_dict = dict() # Build data for complex installers. vessels = [ {'percentage': 60, 'owner': 'alex', 'users': ['bob']}, {'percentage': 20, 'owner': 'carl', 'users': ['darren']}, ] user_data = { 'alex': {'public_key': '12345 54321'}, 'darren': {'public_key': '67890 09876'}, } #integrationtestlib.log('Building installer for operating system \'' + os_name + '\'') try: build_results = XMLRPC_PROXY.build_installers(vessels, user_data) except: integrationtestlib.log('Failed to build installer') return False, None return True, build_results
def run_tests(): test_results = dict() integrationtestlib.log('Preparing an installer for all platforms.') build_success, build_results = build_installers() for os_name in PLATFORMS: integrationtestlib.log('Testing an installer for operating system \'' + os_name + '\'') if build_success: installer_url = build_results['installers'][os_name] else: installer_url = None results = dict() results['download'], installer_filename = fetch_installer( installer_url) results['vessel_info'] = verify_vessel_info(installer_filename, os_name) results['decompress'] = decompress_installer(installer_filename, os_name) test_results[os_name] = results return test_results
def lookup_timedout(): """ <Purpose> Waits for lookup_done_event and notifies the folks on the notify_list (global var) of the lookup timeout. <Arguments> None. <Exceptions> None. <Side Effects> Sends an email to the notify_list folks <Returns> None. """ integrationtestlib.log("in lookup_timedout()") notify_msg = "Centralized lookup failed -- lookup_timedout() fired after 30 sec." # wait for the event to be set, timeout after 30 minutes wait_time = 1800 tstamp_before_wait = nonportable.getruntime() lookup_done_event.wait(wait_time) tstamp_after_wait = nonportable.getruntime() t_waited = tstamp_after_wait - tstamp_before_wait if abs(wait_time - t_waited) < 5: notify_msg += " And lookup stalled for over 30 minutes (max timeout value)." else: notify_msg += " And lookup stalled for " + str(t_waited) + " seconds" integrationtestlib.notify(notify_msg) return
def build_installers(): return_dict = dict() # Build data for complex installers. vessels = [ {'percentage': 60, 'owner': 'alex', 'users': ['bob']}, {'percentage': 20, 'owner': 'carl', 'users': ['darren']}, ] user_data = { 'alex': {'public_key': '12345 54321'}, 'darren': {'public_key': '67890 09876'}, } #integrationtestlib.log('Building installer for operating system \'' + os_name + '\'') try: build_results = XMLRPC_PROXY.build_installers(vessels, user_data) except: integrationtestlib.log('Failed to build installers.') return False, None return True, build_results
def main(): """ <Purpose> Initialize the gmail info that is needed to send email notifications. Call the function to check if enough opendht servers are running. <Arguments> None <Exceptions> None <Side_Effects> None <Return> None """ # Setup the gmail user/password to use when sending email. success,explanation_str = send_gmail.init_gmail() if not success: integrationtestlib.log(explanation_str) sys.exit(0) # Run the test to see how many servers are up. check_opendht_servers()
def run(self): """ <Purpose> The thread function that actually pings the machines <Arguments> ipaddr - the ip address or the hostname of the machine pingcount - number of times the machine should be pinged by default the machines are pinged 5 times. <Exception> raises MachineDoesNotExist exception if user is attempting to ping an unknown host. <Side Effect> prints out a form of the ping result """ try: socket.gethostbyname(self.ipaddr) except: self.result_queue.append( (False, "The machine/ipaddr: " + self.ipaddr + " does not exist:")) raise MachineDoesNotExist, "The hostname/ipaddr " + self.ipaddr + " does not exist" #pings the machine and gets the result line back command = ("ping -q -c" + str(self.pingcount) + " " + self.ipaddr, "r") pingresult, pingerror = subprocess.Popen( command, shell=True, stdout=subprocess.PIPE).communicate() #splits up the result in order to analyze the result pingresult_formatted = pingresult.split('\n') #Go through the result and pick out the right line to analyze for ping_line in pingresult_formatted: packets_received = re.findall(r"(\d) received", ping_line) if packets_received: packets_received = int( packets_received[0]) * 100 / self.pingcount result = "Pinging " + str( self.ipaddr) + ": packets received " + str( packets_received) + "%" integrationtestlib.log(result) if packets_received == 0: self.result_queue.append((False, result)) else: self.result_queue.append((True, result))
def main(): """ <Purpose> Call check_nodes with the two different servers: opendht and central. Retrieve the result and then notify developers if result is unusual <Exceptions> None <Side Effects> May send out an email or notify on irc. <Return> None """ #setup the gmail for sending notification success,explanation_str = send_gmail.init_gmail() if not success: integrationtestlib.log(explanation_str) sys.exit(0) notification_subject = "test_lookup_node_states() failed" max_acceptdonation_nodes = 50 max_canonical_nodes = 50 max_movingtoonepercent = 20 min_onepercentmanyevents_nodes = 300 central_results = check_nodes('central') integrationtestlib.log("Lookup results for central: "+ str(central_results)) #check to see if any of the results is not normal, and send notifications accordingly #also send a message to irc. if central_results['acceptdonation'] > max_acceptdonation_nodes: message="Too many nodes in acceptdonation state: "+str(central_results['acceptdonation'])+"\nResults from 'central' server:\n"+str(central_results) print message integrationtestlib.notify(message, notification_subject) elif central_results['canonical'] > max_canonical_nodes: message="Too many nodes in canonical state: "+str(central_results['canonical'])+"\nResults from 'central' server:\n"+str(central_results) print message integrationtestlib.notify(message, notification_subject) elif central_results['onepercent_manyevent'] < min_onepercentmanyevents_nodes: message="Too few nodes in onepercentmanyevents state: "+str(central_results['onepercent_manyevent'])+"\nResults from 'central' server:\n"+str(central_results) print message integrationtestlib.notify(message, notification_subject) opendht_results = check_nodes('opendht') print opendht_results
def fetch_installer(installer_url): if installer_url is None: return False, None # integrationtestlib.log('Fetching installer at ' + installer_url) try: installer_filename = urllib.urlretrieve(installer_url)[0] except: integrationtestlib.log("Failed to fetch installer at " + installer_url) return False, None return True, installer_filename
def test_invalid_input(test_function, function_name, reason_invalid): integrationtestlib.log(('Verifying that the \'' + function_name + '\' function fails against invalid input.')) try: test_function() except: # An error occurred, so the invalid input was detected. return True # We didn't want success here! log_error(function_name, 'Function succeded with invalid input: ' + reason_invalid) return False
def fetch_installer(installer_url): if installer_url is None: return False, None #integrationtestlib.log('Fetching installer at ' + installer_url) try: installer_filename = urllib.urlretrieve(installer_url)[0] except: integrationtestlib.log('Failed to fetch installer at ' + installer_url) return False, None return True, installer_filename
def main(): # initialize the gmail module success,explanation_str = send_gmail.init_gmail() if not success: integrationtestlib.log(explanation_str) sys.exit(0) #add Eric Kimbrel to the email notify list integrationtestlib.notify_list.append("*****@*****.**") try: integrationtestlib.log("Looking up time_servers") # verify that there are at least 8 time servers running servers = advertise_lookup("time_server") if len(servers) < 8: integrationtestlib.log('WARNING: only '+str(len(servers))+' timeservers are running!') integrationtestlib.notify('WARNING: test_time_servers_running.py FAILED, only '+str(len(servers))+' timeservers are running!', "test_time_servers_running test failed") integrationtestlib.log("Finished looking up test servers... Test Passed") print "........................................................\n" except: integrationtestlib.notify("Test failed for an unknown reason:\n" + traceback.format_exc(), "test_time_servers_running test failed")
def main(): # initialize the gmail module success, explanation_str = send_gmail.init_gmail() if not success: integrationtestlib.log(explanation_str) sys.exit(0) #add Eric Kimbrel to the email notify list integrationtestlib.notify_list.append("*****@*****.**") try: integrationtestlib.log("Looking up time_servers") # verify that there are at least 8 time servers running servers = advertise_lookup("time_server") if len(servers) < 8: integrationtestlib.log('WARNING: only ' + str(len(servers)) + ' timeservers are running!') integrationtestlib.notify( 'WARNING: test_time_servers_running.py FAILED, only ' + str(len(servers)) + ' timeservers are running!', "test_time_servers_running test failed") integrationtestlib.log( "Finished looking up test servers... Test Passed") print "........................................................\n" except: integrationtestlib.notify( "Test failed for an unknown reason:\n" + traceback.format_exc(), "test_time_servers_running test failed")
def main(): # Initialize the gmail setup. success, explanation_str = send_gmail.init_gmail() if not success: integrationtestlib.log(explanation_str) sys.exit(0) print "Beginning query." success = True try: # Query zenodotus if not _dns_mapping_exists(zenodotus_servername, zenodotus_ipaddr): print "Zenodotus failed to respond properly!" # Query is invalid! success = False integrationtestlib.notify( "Error: Zenodotus has failed to correctly respond; the machine has likely been rebooted. Please restart the zenodotus server on zenodotus@blackbox. This report will be re-sent hourly while the problem persists.", "Cron: Zenodotus failure", ) # Check that advertised values work # Map an entirely random IP to a random DNS name. The mapped IP does # not have to actually exist (but should still be valid). random_ip_address = _generate_random_ip_address() random_publickey = rsa_gen_pubpriv_keys(1024)[0] random_publickey_string = rsa_publickey_to_string(random_publickey) random_subdomain = "test-" + sha_hexhash(random_publickey_string) random_dns_entry = random_subdomain + "." + zenodotus_servername print "Announcing", random_dns_entry, random_ip_address advertise_announce(random_dns_entry, random_ip_address, 60) if not _dns_mapping_exists(random_dns_entry, random_ip_address): print "Zenodotus failed to respond properly to advertised subdomain!" # Query is invalid! success = False integrationtestlib.notify( "Error: Zenodotus has failed to correctly respond to an advertised subdomain; there might be something wrong with the advertise server. This report will be re-sent hourly while the problem persists.", "Cron: Zenodotus failure", ) except Exception, e: print "Unknown error!" print str(e) success = False integrationtestlib.notify("Error: Zenodotus seems to be down! Error data: " + str(e), "Cron: Zenodotus failure")
def main(): # Initialize the gmail setup. success, explanation_str = send_gmail.init_gmail() if not success: integrationtestlib.log(explanation_str) sys.exit(0) print "Beginning query." success = True try: # Query zenodotus if not _dns_mapping_exists(zenodotus_servername, zenodotus_ipaddr): print "Zenodotus failed to respond properly!" # Query is invalid! success = False integrationtestlib.notify( "Error: Zenodotus has failed to correctly respond; the machine has likely been rebooted. Please restart the zenodotus server on zenodotus@blackbox. This report will be re-sent hourly while the problem persists.", "Cron: Zenodotus failure") # Check that advertised values work # Map an entirely random IP to a random DNS name. The mapped IP does # not have to actually exist (but should still be valid). random_ip_address = _generate_random_ip_address() random_publickey = rsa_gen_pubpriv_keys(1024)[0] random_publickey_string = rsa_publickey_to_string(random_publickey) random_subdomain = "test-" + sha_hexhash(random_publickey_string) random_dns_entry = random_subdomain + '.' + zenodotus_servername print "Announcing", random_dns_entry, random_ip_address advertise_announce(random_dns_entry, random_ip_address, 60) if not _dns_mapping_exists(random_dns_entry, random_ip_address): print "Zenodotus failed to respond properly to advertised subdomain!" # Query is invalid! success = False integrationtestlib.notify( "Error: Zenodotus has failed to correctly respond to an advertised subdomain; there might be something wrong with the advertise server. This report will be re-sent hourly while the problem persists.", "Cron: Zenodotus failure") except Exception, e: print "Unknown error!" print str(e) success = False integrationtestlib.notify( "Error: Zenodotus seems to be down! Error data: " + str(e), "Cron: Zenodotus failure")
def main(): """ <Purpose> Call check_nodes with the two different servers: opendht and central. Retrieve the result and then notify developers if result is unusual <Exceptions> None <Side Effects> May send out an email or notify on irc. <Return> None """ #setup the gmail for sending notification success,explanation_str = send_gmail.init_gmail() if not success: integrationtestlib.log(explanation_str) sys.exit(0) notification_subject = "test_lookup_node_states() failed" max_acceptdonation_nodes = 50 max_canonical_nodes = 50 max_movingtotwopercent = 20 min_twopercent_nodes = 300 central_results = check_nodes('central') integrationtestlib.log("Lookup results for central: "+ str(central_results)) #check to see if any of the results is not normal, and send notifications accordingly #also send a message to irc. if central_results['acceptdonation'] > max_acceptdonation_nodes: message="Too many nodes in acceptdonation state: "+str(central_results['acceptdonation'])+"\nResults from 'central' server:\n"+str(central_results) print message integrationtestlib.notify(message, notification_subject) elif central_results['canonical'] > max_canonical_nodes: message="Too many nodes in canonical state: "+str(central_results['canonical'])+"\nResults from 'central' server:\n"+str(central_results) print message integrationtestlib.notify(message, notification_subject) elif central_results['twopercent'] < min_twopercent_nodes: message="Too few nodes in twopercent state: "+str(central_results['twopercent'])+"\nResults from 'central' server:\n"+str(central_results) print message integrationtestlib.notify(message, notification_subject)
def report_results(): # If there are no entries in the dictionary, then no errors occurred. if len(ERRORS) == 0: integrationtestlib.log('All tests successful!') return # Otherwise, errors occurred... error_string = 'The following errors occurred:\n' for function in ERRORS: for error in ERRORS[function]: error_string += '\n[' + function + '] ' + error integrationtestlib.log(error_string) integrationtestlib.notify(error_string, 'Custom Installer Builder test failure')
def check_opendht_servers(): """ <Purpose> Checks to see how many servers are up for the opendht advertisement. If the number of server is less then 100 then an email notification is sent to the seattle developers. <Argument> None <Exception> None <Side_Effects> None <Returns> None """ notify_message = "There aren't enough opendht servers up and running currently." subject = "opendht_servercount test failed." try: # Retrieve the list of servers for opendht opendht_server_list = openDHTadvertise_get_proxy_list( maxnumberofattempts=150) except: # An exception is raised if there are no servers up and running for opendht by # openDHTadvertise_get_proxy_list(). integrationtestlib.handle_exception( "There are no servers up for opendht!", subject) integrationtestlib.log( "Retrieved the list of opendht servers up and running.") integrationtestlib.log( "There are " + str(len(opendht_server_list)) + " servers up and running for opendht advertisement.") # Check to see if there are less then 100 servrs up and running. # If there are less then 100 servers running then notify the seattle # developers about it. if len(opendht_server_list) < min_num_servers: subject += " There are only " + str( len(opendht_server_list )) + " servers up and running for opendht advertisement." integrationtestlib.notify(notify_message, subject)
def run(self): """ <Purpose> The thread function that actually pings the machines <Arguments> ipaddr - the ip address or the hostname of the machine pingcount - number of times the machine should be pinged by default the machines are pinged 5 times. <Exception> raises MachineDoesNotExist exception if user is attempting to ping an unknown host. <Side Effect> prints out a form of the ping result """ try: socket.gethostbyname(self.ipaddr) except: self.result_queue.append((False, "The machine/ipaddr: "+self.ipaddr+" does not exist:")) raise MachineDoesNotExist, "The hostname/ipaddr "+self.ipaddr+" does not exist" #pings the machine and gets the result line back command = ("ping -q -c"+str(self.pingcount)+" "+self.ipaddr, "r") pingresult, pingerror = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE).communicate() #splits up the result in order to analyze the result pingresult_formatted=pingresult.split('\n') #Go through the result and pick out the right line to analyze for ping_line in pingresult_formatted: packets_received=re.findall(r"(\d) received", ping_line) if packets_received: packets_received = int(packets_received[0])*100/self.pingcount result= "Pinging "+str(self.ipaddr)+": packets received "+str(packets_received)+"%" integrationtestlib.log(result) if packets_received == 0: self.result_queue.append((False,result)) else: self.result_queue.append((True,result))
def main(): """ <Purpose> Runs at regular time intervals to make sure that critical processes on a machine are still up and running. If a critical process is not running then system admins are sent an email, as well as a message is posted on the IRC. <Exceptions> None <Usage> This script takes no arguments. A typical use of this script is to have it run periodically using something like the following crontab line: */15 * * * * export GMAIL_USER='******' && export GMAIL_PWD='password' && /usr/bin/python /home/seattle/monitor_scripts/monitor_processes.py > /home/seattle/monitor_scripts/cron_log.monitor_processes """ # setup the gmail user/password to use when sending email success,explanation_str = send_gmail.init_gmail() if not success: integrationtestlib.log(explanation_str) sys.exit(0) #integrationtestlib.notify_list.append("*****@*****.**") #processes that should be running on seattle server seattle_process_list=['advertiseserver.py'] #The commands that should be run on seattle to get all the required processes seattle_command = ["ps auwx | grep python | grep -v grep | grep geni | awk '{print $14}'"] #processes that should be running on seattleclearinghouse server seattleclearinghouse_process_list=['transition_donation_to_canonical.py', 'transition_onepercentmanyevents_to_canonical.py', 'transition_canonical_to_twopercent.py', 'transition_twopercent_to_twopercent.py', 'check_active_db_nodes.py', 'apache2', '/usr/sbin/mysqld', 'backend_daemon.py', 'lockserver_daemon.py'] #The commands that should be run on seattleclearinghouse to get all the required processes seattleclearinghouse_command = ["ps auwx | grep python | grep -v grep | grep clearinghouse | awk '{print $12}'"] seattleclearinghouse_command.append("ps auwx | grep apache | grep -v grep | grep root | awk '{print $11}'") seattleclearinghouse_command.append("ps auwx | grep mysqld | grep -v grep | awk '{print $11}'") seattleclearinghouse_command.append("ps auwx | grep python | grep -v grep | grep justinc | awk '{print $12}'") #run monitor processes with the right command if sys.argv[1] == '-seattle': monitor_processes(seattle_process_list, seattle_command, "seattle") elif sys.argv[1] == '-seattleclearinghouse': monitor_processes(seattleclearinghouse_process_list, seattleclearinghouse_command, "seattleclearinghouse")
def check_nodes(server_lookup_type): """ <Purpse> Check for nodes that are in the central server and find the nodes in differen states. <Arguments> server_lookup_type - either central or opendht <Exception> AdvertiseLookup - raised if advertise_lookup gives an error. <Side Effects> None <Return> None """ #counter used to count the total number of nodes total_nodes = 0 node_result = {} integrationtestlib.log("Starting advertise_lookup() using only "+server_lookup_type+" lookup type") print server_lookup_type #go through all the possible node states and do an advertise lookup for node_state_name, node_state_pubkey in important_node_states: integrationtestlib.log("Printing "+node_state_name+" nodes:") #retrieve node list from advertise_lookup( try: node_list = advertise_lookup(node_state_pubkey, maxvals = 10*1024*1024, lookuptype=[server_lookup_type]) except: raise AdvertiseLookup, "advertise_lookup() failed when looking up key: "+ node_state_pubkey + " for "+server_lookup_type #keep track of total nodes total_nodes+=len(node_list) node_result[node_state_name] = len(node_list) #logg all the node lookup info for node in node_list: print node node_result['Total nodes'] = total_nodes return node_result
def check_nodes(server_lookup_type): """ <Purpse> Check for nodes that are in the central server and find the nodes in differen states. <Arguments> server_lookup_type - either central or opendht <Exception> AdvertiseLookup - raised if advertise_lookup gives an error. <Side Effects> None <Return> None """ #counter used to count the total number of nodes total_nodes = 0 node_result = {} integrationtestlib.log("Starting advertise_lookup() using only "+server_lookup_type+" lookup type") print server_lookup_type #go through all the possible node states and do an advertise lookup for node_state_name, node_state_pubkey in important_node_states: integrationtestlib.log("Printing "+node_state_name+" nodes:") #retrieve node list from advertise_lookup( try: node_list = advertise_lookup(node_state_pubkey, maxvals = 10*1024*1024, lookuptype=[server_lookup_type]) except: raise AdvertiseLookup("advertise_lookup() failed when looking up key: "+ rsa_publickey_to_string(node_state_pubkey) + " for "+server_lookup_type) #keep track of total nodes total_nodes+=len(node_list) node_result[node_state_name] = len(node_list) #logg all the node lookup info for node in node_list: print node node_result['Total nodes'] = total_nodes return node_result
def verify_vessel_info(installer_filename, os_name): if installer_filename is None: return False #integrationtestlib.log('Verifying installer ' + installer_filename + ' has a vesselinfo file.') if os_name in ZIP_PLATFORMS: process = subprocess.Popen(['unzip', '-l', installer_filename], stdout=subprocess.PIPE) elif os_name in TGZ_PLATFORMS: process = subprocess.Popen(['tar', 'tzf', installer_filename], stdout=subprocess.PIPE) file_listing = process.communicate()[0] if file_listing.find('seattle/seattle_repy/vesselinfo') < 0: integrationtestlib.log('Installer ' + installer_filename + ' does not have a vesselinfo file.') return False return True
def verify_vessel_info(installer_filename, os_name): if installer_filename is None: return False # integrationtestlib.log('Verifying installer ' + installer_filename + ' has a vesselinfo file.') if os_name in ZIP_PLATFORMS: process = subprocess.Popen(["unzip", "-l", installer_filename], stdout=subprocess.PIPE) elif os_name in TGZ_PLATFORMS: process = subprocess.Popen(["tar", "tzf", installer_filename], stdout=subprocess.PIPE) file_listing = process.communicate()[0] if file_listing.find("seattle/seattle_repy/vesselinfo") < 0: integrationtestlib.log("Installer " + installer_filename + " does not have a vesselinfo file.") return False return True
def check_nodes(): """ <Purpse> Check for nodes advertising on the advertise services, and find nodes in different states. <Arguments> None. <Exception> AdvertiseLookup - raised if advertise_lookup gives an error. <Side Effects> None <Return> None """ total_nodes = 0 node_result = {} integrationtestlib.log("Starting advertise_lookup()") # Go through all the possible node states and do an advertise lookup for node_state_name, node_state_pubkey in important_node_states: integrationtestlib.log("Printing " + node_state_name + " nodes:") # Retrieve node list from advertise services try: node_list = advertise.advertise_lookup(node_state_pubkey, maxvals=10 * 1024 * 1024) except Exception, e: raise AdvertiseLookup( "advertise_lookup() failed with " + repr(e) + " when looking up key " + rsa.rsa_publickey_to_string(node_state_pubkey)) # Keep track of total nodes total_nodes += len(node_list) node_result[node_state_name] = len(node_list) # Log all the node lookup info for node in node_list: print node
def main(): """ <Purpose> Runs at regular time intervals to make sure that critical processes on a machine are still up and running. If a critical process is not running then system admins are sent an email, as well as a message is posted on the IRC. <Exceptions> None <Usage> This script takes no arguments. A typical use of this script is to have it run periodically using something like the following crontab line: */15 * * * * export GMAIL_USER='******' && export GMAIL_PWD='password' && /usr/bin/python /home/seattle/monitor_scripts/monitor_processes.py > /home/seattle/monitor_scripts/cron_log.monitor_processes """ # setup the gmail user/password to use when sending email success,explanation_str = send_gmail.init_gmail() if not success: integrationtestlib.log(explanation_str) sys.exit(0) #processes that should be running on seattle server seattle_process_list=['advertiseserver.py'] #The commands that should be run on seattle to get all the required processes seattle_command = ["ps auwx | grep python | grep -v grep | grep geni | awk '{print $14}'"] #processes that should be running on seattleclearinghouse server seattleclearinghouse_process_list=['transition_donation_to_canonical.py', 'transition_onepercentmanyevents_to_canonical.py', 'transition_canonical_to_twopercent.py', 'transition_twopercent_to_twopercent.py', 'check_active_db_nodes.py', 'apache2', '/usr/sbin/mysqld', 'backend_daemon.py', 'lockserver_daemon.py'] #The commands that should be run on seattleclearinghouse to get all the required processes seattleclearinghouse_command = ["ps auwx | grep python | grep -v grep | grep geni | awk '{print $12}'"] seattleclearinghouse_command.append("ps auwx | grep apache | grep -v grep | grep root | awk '{print $11}'") seattleclearinghouse_command.append("ps auwx |grep mysqld |grep mysql | awk '{print $11}'") seattleclearinghouse_command.append("ps auwx | grep python | grep -v grep | grep justinc | awk '{print $12}'") #run monitor processes with the right command if sys.argv[1] == '-seattle': monitor_processes(seattle_process_list, seattle_command, "seattle") elif sys.argv[1] == '-seattleclearinghouse': monitor_processes(seattleclearinghouse_process_list, seattleclearinghouse_command, "seattleclearinghouse")
def run_tests(): test_results = dict() integrationtestlib.log("Preparing an installer for all platforms.") build_success, build_results = build_installers() for os_name in PLATFORMS: integrationtestlib.log("Testing an installer for operating system '" + os_name + "'") installer_url = build_results["installers"][os_name] results = dict() results["download"], installer_filename = fetch_installer(installer_url) results["vessel_info"] = verify_vessel_info(installer_filename, os_name) results["decompress"] = decompress_installer(installer_filename, os_name) test_results[os_name] = results return test_results
def report_results(test_results): failures = '' for os_name in test_results: tests_failed = '' for test_name in test_results[os_name]: if not test_results[os_name][test_name]: tests_failed += test_name + ' ' if tests_failed != '': failures += ' ' + os_name + ': ' + tests_failed + '\n' if failures == '': integrationtestlib.log('All tests successful!') else: results = 'The following tests failed:\n\n' + failures integrationtestlib.log(results) integrationtestlib.notify(results, 'Custom Installer Builder test failure')
def report_results(test_results): failures = "" for os_name in test_results: tests_failed = "" for test_name in test_results[os_name]: if not test_results[os_name][test_name]: tests_failed += test_name + " " if tests_failed != "": failures += " " + os_name + ": " + tests_failed + "\n" if failures == "": integrationtestlib.log("All tests successful!") else: results = "The following tests failed:\n\n" + failures integrationtestlib.log(results) integrationtestlib.notify(results, "Custom Installer Builder test failure")
def decompress_installer(installer_filename, os_name): if installer_filename is None: return False #integrationtestlib.log('Trying to decompress ' + installer_filename) build_dir = tempfile.mkdtemp(dir=TEMP_DIR) os.chdir(build_dir) try: if os_name in ZIP_PLATFORMS: subprocess.check_call(['unzip', '-q', installer_filename]) elif os_name in TGZ_PLATFORMS: subprocess.check_call(['tar', 'zxf', installer_filename]) except subprocess.CalledProcessError: integrationtestlib.log('Installer ' + installer_filename + ' could not be decompressed.') return False return True
def decompress_installer(installer_filename, os_name): if installer_filename is None: return False # integrationtestlib.log('Trying to decompress ' + installer_filename) build_dir = tempfile.mkdtemp(dir=TEMP_DIR) os.chdir(build_dir) try: if os_name in ZIP_PLATFORMS: subprocess.check_call(["unzip", "-q", installer_filename]) elif os_name in TGZ_PLATFORMS: subprocess.check_call(["tar", "zxf", installer_filename]) except subprocess.CalledProcessError: integrationtestlib.log("Installer " + installer_filename + " could not be decompressed.") return False return True
def test_valid_input(test_function, return_type, function_name): integrationtestlib.log( ('Verifying that the \'' + function_name + '\' function returns object of ' + 'type \'' + return_type.__name__ + '\'...')) results = None try: results = test_function() except: log_error(function_name, 'Failed against valid input.') return False, None if not isinstance(results, return_type): log_error(function_name, ('Returned object of type \'' + type(results).__name__ + '\' rather than expected type \'' + return_type.__name__) + '\'.') return False, None return True, results
def build_installers(): return_dict = dict() # Build data for complex installers. vessels = [ {"percentage": 60, "owner": "alex", "users": ["bob"]}, {"percentage": 20, "owner": "carl", "users": ["darren"]}, ] user_data = {"alex": {"public_key": "12345 54321"}, "darren": {"public_key": "67890 09876"}} # integrationtestlib.log('Building installer for operating system \'' + os_name + '\'') try: build_results = XMLRPC_PROXY.build_installers(vessels, user_data) except: integrationtestlib.log("Failed to build installer") return False, None return True, build_results
def main(): # Make the XML-RPC proxy accessible across the whole program. global XMLRPC_PROXY # Each test can reuse this proxy. XMLRPC_PROXY = xmlrpclib.ServerProxy(XMLRPC_PROXY_URL) # Setup the integration test library. success, explanation = send_gmail.init_gmail() if not success: integrationtestlib.log('Failed to execute init_gmail(): ' + explanation) sys.exit(1) # Add any extra error log recipients. integrationtestlib.notify_list.extend(NOTIFY_LIST) # The main event! run_tests() report_results()
def main(): """ <Purpose> Call check_nodes and then notify developers if result is unusual. <Exceptions> None <Side Effects> May send out a notification email. <Return> None """ # Setup the gmail lib for sending notification success, explanation_str = send_gmail.init_gmail() if not success: integrationtestlib.log(explanation_str) sys.exit(0) notification_subject = "test_lookup_node_states() failed" results = check_nodes() integrationtestlib.log("Lookup results: " + str(results)) # Check to see if any of the results is not normal, and # send notifications accordingly. message = "Too many nodes in state " if results['acceptdonation'] > max_acceptdonation_nodes: message += "acceptdonation: " + str(results['acceptdonation']) elif results['canonical'] > max_canonical_nodes: message += "canonical: " + str(results['canonical']) if results['twopercent'] < min_twopercent_nodes: message = "Too few nodes in state twopercent: " + str( results['twopercent']) message += "\nLookup results:\n" + str(results) print message integrationtestlib.notify(message, notification_subject)
def main(): # initialize the gmail module success,explanation_str = send_gmail.init_gmail() if not success: integrationtestlib.log(explanation_str) sys.exit(0) notify_str = '' integrationtestlib.log("Starting test_time_tcp.py...") # Whenever we fail to do tcp_time_updatetime, we add the exception string exception_string_list = [] # get the time 5 times and make sure they are reasonably close test_start = getruntime() times = [] # Keep a list of servers we connected to or tried to connect to for time. server_list = [] # Connect to 5 servers and retrieve the time. for i in range (5): try: connected_server = tcp_time_updatetime(12345) current_time = time_gettime() times.append(current_time) server_list.append(connected_server) integrationtestlib.log("Calling time_gettime(). Retrieved time: " + str(current_time)) except Exception,e: exception_string_list.append({'Server' : connected_server, 'Exception' : str(e), 'Traceback' : str(traceback.format_exc())}) pass
def monitor_processes(monitor_process_list, command_list, machine_name): """ <Purpose> Checks to make sure that the critical processes on the machine 'seattle' are still running <Exceptions> None <Arguments> monitor_process_list - a list of all the critical processes that should be checked to see if they are up and running. command_list - a list of all the commands required to find all the relevant processes <Return> None """ #string that holds the name of all the processes that are found to be running using the #ps commands that was passed in as argument processes_string="" integrationtestlib.log("Starting monitoring process on "+machine_name) #run a command on the linux machine to find all the relevant processes for command in command_list: try: relevant_processes, command_error = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE).communicate() except: integrationtestlib.handle_exception("Failed to run command: "+command) sys.exit(1) #make a string of all the processes processes_string = processes_string+relevant_processes print processes_string #keeps track to see if any processes are down critical_process_down=False error_message="WARNING: Critical processes down! Seattle developers please start the processes up as soon as possible\n" error_message=error_message+"Listing processes that are down:\n" #goes through the list of monitor_process_list to ensure that all processes are running for critical_process in monitor_process_list: integrationtestlib.log("Checking process: "+critical_process+".......") if not critical_process in processes_string: critical_process_down=True error_message = error_message+critical_process+" is down on "+machine_name+".cs.washington.edu\n" print "FAIL" else: print "PASS" error_message=error_message+"end of list of processes that are down.\n................................" if critical_process_down: integrationtestlib.notify(error_message) irc_seattlebot.send_msg(error_message) else: integrationtestlib.log("All critical processes on "+machine_name+" are up and running") print(".........................................................")
def monitor_processes(monitor_process_list, command_list, machine_name): """ <Purpose> Checks to make sure that the critical processes on the machine 'seattle' are still running <Exceptions> None <Arguments> monitor_process_list - a list of all the critical processes that should be checked to see if they are up and running. command_list - a list of all the commands required to find all the relevant processes <Return> None """ #string that holds the name of all the processes that are found to be running using the #ps commands that was passed in as argument processes_string="" integrationtestlib.log("Starting monitoring process on "+machine_name) #run a command on the linux machine to find all the relevant processes for command in command_list: try: relevant_processes, command_error = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE).communicate() except: integrationtestlib.handle_exception("Failed to run command: "+command) sys.exit(1) #make a string of all the processes processes_string = processes_string+relevant_processes print processes_string #keeps track to see if any processes are down critical_process_down=False error_message="WARNING: Critical processes down! Seattle developers please start the processes up as soon as possible\n" error_message=error_message+"Listing processes that are down:\n" #goes through the list of monitor_process_list to ensure that all processes are running for critical_process in monitor_process_list: integrationtestlib.log("Checking process: "+critical_process+".......") if not critical_process in processes_string: critical_process_down=True error_message = error_message+critical_process+" is down on "+machine_name+".poly.edu\n" print "FAIL" else: print "PASS" error_message=error_message+"end of list of processes that are down.\n................................" if critical_process_down: integrationtestlib.notify(error_message, "Critical process down!") irc_seattlebot.send_msg(error_message) else: integrationtestlib.log("All critical processes on "+machine_name+" are up and running") print(".........................................................")
def download_and_install(): """ <Purpose> Downloads and installs Seattle <Arguments> None. <Exceptions> None. <Side Effects> Downloads a .tgz file. Unpacks it and installs Seattle (this modifies the user's crontab). <Returns> None. """ if (os.path.isfile(prefix + "/seattle_linux.tar.gz")): os.remove(prefix + "/seattle_linux.tgz") integrationtestlib.log("downloading distro for seattle_install_tester...") os.system("wget --no-check-certificate " + seattle_linux_url) integrationtestlib.log("unpacking...") os.system("tar -xzvf " + prefix + "/seattle_linux.tar.gz") integrationtestlib.log("installing...") os.system("cd " + prefix + "/seattle/ && ./install.sh") return
def check_opendht_servers(): """ <Purpose> Checks to see how many servers are up for the opendht advertisement. If the number of server is less then 100 then an email notification is sent to the seattle developers. <Argument> None <Exception> None <Side_Effects> None <Returns> None """ notify_message = "There aren't enough opendht servers up and running currently." subject = "opendht_servercount test failed." try: # Retrieve the list of servers for opendht opendht_server_list = openDHTadvertise_get_proxy_list(maxnumberofattempts = 150) except: # An exception is raised if there are no servers up and running for opendht by # openDHTadvertise_get_proxy_list(). integrationtestlib.handle_exception("There are no servers up for opendht!", subject) integrationtestlib.log("Retrieved the list of opendht servers up and running.") integrationtestlib.log("There are " + str(len(opendht_server_list)) + " servers up and running for opendht advertisement.") # Check to see if there are less then 100 servrs up and running. # If there are less then 100 servers running then notify the seattle # developers about it. if len(opendht_server_list) < min_num_servers: subject += " There are only " + str(len(opendht_server_list)) + " servers up and running for opendht advertisement." integrationtestlib.notify(notify_message, subject)
def download_and_install(): """ <Purpose> Downloads and installs Seattle <Arguments> None. <Exceptions> None. <Side Effects> Downloads a .tgz file. Unpacks it and installs Seattle (this modifies the user's crontab). <Returns> None. """ if os.path.isfile(prefix + "/seattle_linux.tgz"): os.remove(prefix + "/seattle_linux.tgz") integrationtestlib.log("downloading distro for seattle_install_tester...") os.system("wget --no-check-certificate " + seattle_linux_url) integrationtestlib.log("unpacking...") os.system("tar -xzvf " + prefix + "/seattle_linux.tgz") integrationtestlib.log("installing...") os.system("cd " + prefix + "/seattle/ && ./install.sh") return
def main(): """ <Purpose> Program's main. <Arguments> None. <Exceptions> All exceptions are caught. <Side Effects> None. <Returns> None. """ # setup the gmail user/password to use when sending email success,explanation_str = send_gmail.init_gmail() if not success: integrationtestlib.log(explanation_str) sys.exit(0) key = random.randint(4,2**30) value = random.randint(4,2**30) ttlval = 60 # put(key,value) with ttlval into the Centralized HT integrationtestlib.log("calling centralizedadvertise_announce(key: " + str(key) + ", val: " + str(value) + ", ttl: " + str(ttlval) + ")") try: centralizedadvertise_announce(key,value,ttlval) except: integrationtestlib.handle_exception("centralizedadvertise_announce() failed") sys.exit(0) # a 30 second timer to email the notify_list on slow lookups lookup_timedout_timer = threading.Timer(30, lookup_timedout) # start the lookup timer lookup_timedout_timer.start() # get(key) from the centralized HT integrationtestlib.log("calling centralizedadvertise_lookup(key: " + str(key) + ")") try: ret_value = centralizedadvertise_lookup(key) print ret_value #Check if the value being returned is the one we want if value not in ret_value: raise Exception("incorrect value returned") #Check in case random.randint() produces same key again. elif len(ret_value) > 1 : raise Exception("Multiple copies of same key") except: integrationtestlib.handle_exception("centralizedadvertise_lookup() failed", "centralizedputget monitor script failure") sys.exit(0) lookup_timedout_timer.cancel() lookup_done_event.set() return
def main(): # initialize the gmail module success, explanation_str = send_gmail.init_gmail() if not success: integrationtestlib.log(explanation_str) sys.exit(0) notify_str = "" connected_server = "" integrationtestlib.log("Starting test_time_tcp.py...") # Whenever we fail to do tcp_time_updatetime, we add the exception string exception_string_list = [] # get the time 5 times and make sure they are reasonably close test_start = getruntime() print test_start times = [] # Keep a list of servers we connected to or tried to connect to for time. server_list = [] # Connect to 5 servers and retrieve the time. for i in range(5): try: connected_server = tcp_time_updatetime(12345) current_time = time_gettime() times.append(current_time) server_list.append(connected_server) integrationtestlib.log("Calling time_gettime(). Retrieved time: " + str(current_time)) except Exception, e: exception_string_list.append( {"Server": connected_server, "Exception": str(e), "Traceback": str(traceback.format_exc())} ) pass