def monitor_processes(monitor_process_list, command_list, machine_name): """ <Purpose> Checks to make sure that the critical processes on the machine 'seattle' are still running <Exceptions> None <Arguments> monitor_process_list - a list of all the critical processes that should be checked to see if they are up and running. command_list - a list of all the commands required to find all the relevant processes <Return> None """ #string that holds the name of all the processes that are found to be running using the #ps commands that was passed in as argument processes_string="" integrationtestlib.log("Starting monitoring process on "+machine_name) #run a command on the linux machine to find all the relevant processes for command in command_list: try: relevant_processes, command_error = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE).communicate() except: integrationtestlib.handle_exception("Failed to run command: "+command) sys.exit(1) #make a string of all the processes processes_string = processes_string+relevant_processes print processes_string #keeps track to see if any processes are down critical_process_down=False error_message="WARNING: Critical processes down! Seattle developers please start the processes up as soon as possible\n" error_message=error_message+"Listing processes that are down:\n" #goes through the list of monitor_process_list to ensure that all processes are running for critical_process in monitor_process_list: integrationtestlib.log("Checking process: "+critical_process+".......") if not critical_process in processes_string: critical_process_down=True error_message = error_message+critical_process+" is down on "+machine_name+".cs.washington.edu\n" print "FAIL" else: print "PASS" error_message=error_message+"end of list of processes that are down.\n................................" if critical_process_down: integrationtestlib.notify(error_message) irc_seattlebot.send_msg(error_message) else: integrationtestlib.log("All critical processes on "+machine_name+" are up and running") print(".........................................................")
def monitor_processes(monitor_process_list, command_list, machine_name): """ <Purpose> Checks to make sure that the critical processes on the machine 'seattle' are still running <Exceptions> None <Arguments> monitor_process_list - a list of all the critical processes that should be checked to see if they are up and running. command_list - a list of all the commands required to find all the relevant processes <Return> None """ #string that holds the name of all the processes that are found to be running using the #ps commands that was passed in as argument processes_string="" integrationtestlib.log("Starting monitoring process on "+machine_name) #run a command on the linux machine to find all the relevant processes for command in command_list: try: relevant_processes, command_error = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE).communicate() except: integrationtestlib.handle_exception("Failed to run command: "+command) sys.exit(1) #make a string of all the processes processes_string = processes_string+relevant_processes print processes_string #keeps track to see if any processes are down critical_process_down=False error_message="WARNING: Critical processes down! Seattle developers please start the processes up as soon as possible\n" error_message=error_message+"Listing processes that are down:\n" #goes through the list of monitor_process_list to ensure that all processes are running for critical_process in monitor_process_list: integrationtestlib.log("Checking process: "+critical_process+".......") if not critical_process in processes_string: critical_process_down=True error_message = error_message+critical_process+" is down on "+machine_name+".poly.edu\n" print "FAIL" else: print "PASS" error_message=error_message+"end of list of processes that are down.\n................................" if critical_process_down: integrationtestlib.notify(error_message, "Critical process down!") irc_seattlebot.send_msg(error_message) else: integrationtestlib.log("All critical processes on "+machine_name+" are up and running") print(".........................................................")
def main(): success,explanation_str = send_gmail.init_gmail() if not success: integrationtestlib.log(explanation_str) sys.exit(0) command = "df -h | grep '/dev/sda' | awk '{print $5}'" command_output_fd = os.popen(command) # Get the output and get rid of the extra lines and % sign. disk_use_percent = int(command_output_fd.read().split()[0][:-1]) hostname = socket.gethostname() + ".cs.washington.edu" subject = "High disk usage" if disk_use_percent > 95: message = "CRITICAL: Very High Disk Usage on %s: %s percent used" % ( hostname, disk_use_percent) integrationtestlib.notify(message, subject) irc_seattlebot.send_msg(message) elif disk_use_percent > 90: message = "WARNING: High disk usage on %s: %s percent used" % ( hostname, disk_use_percent) integrationtestlib.notify(message, subject)
def main(): success,explanation_str = send_gmail.init_gmail() #integrationtestlib.notify_list=['*****@*****.**'] if not success: integrationtestlib.log(explanation_str) sys.exit(0) command = "df -h | grep '/dev/sda' | awk '{print $5}'" command_output_fd = os.popen(command) # Get the output and get rid of the extra lines and % sign. disk_use_percent = int(command_output_fd.read().split()[0][:-1]) command_output_fd.close() disk_free_command = "df -h | grep '/dev/sda' | awk '{print $4}'" disk_free_fd = os.popen(disk_free_command) free_space = disk_free_fd.read() disk_free_fd.close() hostname = socket.gethostname() + ".poly.edu" subject = "High disk usage" if disk_use_percent >= 95: message = "CRITICAL: Very High Disk Usage on %s: %s percent used.\n" % ( hostname, disk_use_percent) message += "Disk space free: %s" % free_space integrationtestlib.log(message) integrationtestlib.notify(message, subject) irc_seattlebot.send_msg(message) elif disk_use_percent > 90: message = "WARNING: High disk usage on %s: %s percent used.\n" % ( hostname, disk_use_percent) message += "Disk space free: %s" % free_space integrationtestlib.log(message) integrationtestlib.notify(message, subject) print "Current disk usage: %s percent" % disk_use_percent print "Free disk space: %s" % free_space
def main(): success, explanation_str = send_gmail.init_gmail() #integrationtestlib.notify_list=['*****@*****.**'] if not success: integrationtestlib.log(explanation_str) sys.exit(0) command = "df -h | grep '/dev/sda' | awk '{print $5}'" command_output_fd = os.popen(command) # Get the output and get rid of the extra lines and % sign. disk_use_percent = int(command_output_fd.read().split()[0][:-1]) command_output_fd.close() disk_free_command = "df -h | grep '/dev/sda' | awk '{print $4}'" disk_free_fd = os.popen(disk_free_command) free_space = disk_free_fd.read() disk_free_fd.close() hostname = socket.gethostname() + ".poly.edu" subject = "High disk usage" if disk_use_percent >= 95: message = "CRITICAL: Very High Disk Usage on %s: %s percent used.\n" % ( hostname, disk_use_percent) message += "Disk space free: %s" % free_space integrationtestlib.log(message) integrationtestlib.notify(message, subject) irc_seattlebot.send_msg(message) elif disk_use_percent > 90: message = "WARNING: High disk usage on %s: %s percent used.\n" % ( hostname, disk_use_percent) message += "Disk space free: %s" % free_space integrationtestlib.log(message) integrationtestlib.notify(message, subject) print "Current disk usage: %s percent" % disk_use_percent print "Free disk space: %s" % free_space
def main(): """ <Purpose> Ping all the machines to see if they are up <Exceptions> none <Side Effects> Prints the ping result <Returns> None. """ # setup the gmail user/password to use when sending email success, explanation_str = send_gmail.init_gmail() if not success: integrationtestlib.log(explanation_str) sys.exit(0) integrationtestlib.log("pinging critical machines") #list of machines thats getting pinged pinglist = [] #list that contains all the ping results result_queue = [] #create a thread for each machine and ping them for host in machine_list: ping_current_machine = ping(str(host), result_queue) pinglist.append(ping_current_machine) ping_current_machine.start() #join all the threads for ping_host in pinglist: ping_host.join() #variable that keeps track if any machines are down ALL_MACHINES_RUNNING = True error_message = "WARNING: Seattle machines are down! Seattle developers please check on the machines.\n" error_message += "Displaying ping result:\n" #check to see if all the results were successful #on failures notify the admins and send a message to the irc for (success, ping_result) in result_queue: if not success: ALL_MACHINES_RUNNING = False error_message += ping_result + "\n" #if all machines were pinged successfully, notify on irc if option -m was used to run ping_machines.py if ALL_MACHINES_RUNNING: if len(sys.argv) >= 2 and sys.argv[1] == '-m': irc_seattlebot.send_msg("The machines: " + str(machine_list) + " were pinged successfully") else: integrationtestlib.notify(error_message, "Seattle machines down!") irc_seattlebot.send_msg(error_message) print time.ctime() + " : Done pinging all machiens." print "--------------------------------------------"
def main(): """ <Purpose> Ping all the machines to see if they are up <Exceptions> none <Side Effects> Prints the ping result <Returns> None. """ # setup the gmail user/password to use when sending email success,explanation_str = send_gmail.init_gmail() if not success: integrationtestlib.log(explanation_str) sys.exit(0) integrationtestlib.log("pinging critical machines") #list of machines thats getting pinged pinglist = [] #list that contains all the ping results result_queue = [] #create a thread for each machine and ping them for host in machine_list: ping_current_machine = ping(str(host), result_queue) pinglist.append(ping_current_machine) ping_current_machine.start() #join all the threads for ping_host in pinglist: ping_host.join() #variable that keeps track if any machines are down ALL_MACHINES_RUNNING = True error_message="WARNING: Seattle machines are down! Seattle developers please check on the machines.\n" error_message+="Displaying ping result:\n" #check to see if all the results were successful #on failures notify the admins and send a message to the irc for (success, ping_result) in result_queue: if not success: ALL_MACHINES_RUNNING = False error_message += ping_result+"\n" #if all machines were pinged successfully, notify on irc if option -m was used to run ping_machines.py if ALL_MACHINES_RUNNING: if len(sys.argv) >= 2 and sys.argv[1] == '-m': irc_seattlebot.send_msg("The machines: "+str(machine_list)+" were pinged successfully") else: integrationtestlib.notify(error_message, "Seattle machines down!") irc_seattlebot.send_msg(error_message) print time.ctime() + " : Done pinging all machiens." print "--------------------------------------------"