예제 #1
0
def monitor_processes(monitor_process_list, command_list, machine_name):
  """
  <Purpose>
    Checks to make sure that the critical processes on the machine 'seattle' are still running

  <Exceptions>
    None

  <Arguments>
    monitor_process_list - a list of all the critical processes that should be checked to 
      see if they are up and running.

    command_list - a list of all the commands required to find all the relevant processes

  <Return>
    None
  """
  
  #string that holds the name of all the processes that are found to be running using the
  #ps commands that was passed in as argument
  processes_string=""

  integrationtestlib.log("Starting monitoring process on "+machine_name)  

  #run a command on the linux machine to find all the relevant processes
  for command in command_list:
    try:
      relevant_processes, command_error = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE).communicate() 
    except:
      integrationtestlib.handle_exception("Failed to run command: "+command)
      sys.exit(1)

    #make a string of all the processes
    processes_string = processes_string+relevant_processes
  print processes_string 
  #keeps track to see if any processes are down 
  critical_process_down=False
  error_message="WARNING: Critical processes down! Seattle developers please start the processes up as soon as possible\n"
  error_message=error_message+"Listing processes that are down:\n"

  #goes through the list of monitor_process_list to ensure that all processes are running
  for critical_process in monitor_process_list:
    integrationtestlib.log("Checking process: "+critical_process+".......")
    if not critical_process in processes_string:
      critical_process_down=True
      error_message = error_message+critical_process+" is down on "+machine_name+".cs.washington.edu\n"
      print "FAIL"

    else:
      print "PASS"
  error_message=error_message+"end of list of processes that are down.\n................................"

  if critical_process_down:
    integrationtestlib.notify(error_message)
    irc_seattlebot.send_msg(error_message)

  else:
    integrationtestlib.log("All critical processes on "+machine_name+" are up and running")

  print(".........................................................")
예제 #2
0
def monitor_processes(monitor_process_list, command_list, machine_name):
  """
  <Purpose>
    Checks to make sure that the critical processes on the machine 'seattle' are still running

  <Exceptions>
    None

  <Arguments>
    monitor_process_list - a list of all the critical processes that should be checked to 
      see if they are up and running.

    command_list - a list of all the commands required to find all the relevant processes

  <Return>
    None
  """
  
  #string that holds the name of all the processes that are found to be running using the
  #ps commands that was passed in as argument
  processes_string=""

  integrationtestlib.log("Starting monitoring process on "+machine_name)  

  #run a command on the linux machine to find all the relevant processes
  for command in command_list:
    try:
      relevant_processes, command_error = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE).communicate() 
    except:
      integrationtestlib.handle_exception("Failed to run command: "+command)
      sys.exit(1)

    #make a string of all the processes
    processes_string = processes_string+relevant_processes
  print processes_string 
  #keeps track to see if any processes are down 
  critical_process_down=False
  error_message="WARNING: Critical processes down! Seattle developers please start the processes up as soon as possible\n"
  error_message=error_message+"Listing processes that are down:\n"

  #goes through the list of monitor_process_list to ensure that all processes are running
  for critical_process in monitor_process_list:
    integrationtestlib.log("Checking process: "+critical_process+".......")
    if not critical_process in processes_string:
      critical_process_down=True
      error_message = error_message+critical_process+" is down on "+machine_name+".poly.edu\n"
      print "FAIL"

    else:
      print "PASS"
  error_message=error_message+"end of list of processes that are down.\n................................"

  if critical_process_down:
    integrationtestlib.notify(error_message, "Critical process down!")
    irc_seattlebot.send_msg(error_message)

  else:
    integrationtestlib.log("All critical processes on "+machine_name+" are up and running")

  print(".........................................................")
예제 #3
0
def main():


  success,explanation_str = send_gmail.init_gmail()

  if not success:
    integrationtestlib.log(explanation_str)
    sys.exit(0)

  command = "df -h | grep '/dev/sda' | awk '{print $5}'"
  command_output_fd = os.popen(command)
  
  # Get the output and get rid of the extra lines and % sign.
  disk_use_percent = int(command_output_fd.read().split()[0][:-1])

  hostname = socket.gethostname() + ".cs.washington.edu"
  subject = "High disk usage"

  if disk_use_percent > 95:
    message = "CRITICAL: Very High Disk Usage on %s: %s percent used" % ( hostname, disk_use_percent)
    integrationtestlib.notify(message, subject)
    irc_seattlebot.send_msg(message)

  elif disk_use_percent > 90:
    message = "WARNING: High disk usage on %s: %s percent used" % ( hostname, disk_use_percent)
    integrationtestlib.notify(message, subject)
예제 #4
0
def main():


  success,explanation_str = send_gmail.init_gmail()
  #integrationtestlib.notify_list=['*****@*****.**']
  if not success:
    integrationtestlib.log(explanation_str)
    sys.exit(0)

  command = "df -h | grep '/dev/sda' | awk '{print $5}'"
  command_output_fd = os.popen(command)

  # Get the output and get rid of the extra lines and % sign.
  disk_use_percent = int(command_output_fd.read().split()[0][:-1])
  command_output_fd.close()

  disk_free_command = "df -h | grep '/dev/sda' | awk '{print $4}'"
  disk_free_fd = os.popen(disk_free_command)
  free_space = disk_free_fd.read()
  disk_free_fd.close()

  hostname = socket.gethostname() + ".poly.edu"
  subject = "High disk usage"

  if disk_use_percent >= 95:
    message = "CRITICAL: Very High Disk Usage on %s: %s percent used.\n" % ( hostname, disk_use_percent)
    message += "Disk space free: %s" % free_space
    integrationtestlib.log(message)
    integrationtestlib.notify(message, subject)
    irc_seattlebot.send_msg(message)

  elif disk_use_percent > 90:
    message = "WARNING: High disk usage on %s: %s percent used.\n" % ( hostname, disk_use_percent)
    message += "Disk space free: %s" % free_space
    integrationtestlib.log(message)
    integrationtestlib.notify(message, subject)

  
  print "Current disk usage: %s percent" % disk_use_percent
  print "Free disk space: %s" % free_space
예제 #5
0
def main():

    success, explanation_str = send_gmail.init_gmail()
    #integrationtestlib.notify_list=['*****@*****.**']
    if not success:
        integrationtestlib.log(explanation_str)
        sys.exit(0)

    command = "df -h | grep '/dev/sda' | awk '{print $5}'"
    command_output_fd = os.popen(command)

    # Get the output and get rid of the extra lines and % sign.
    disk_use_percent = int(command_output_fd.read().split()[0][:-1])
    command_output_fd.close()

    disk_free_command = "df -h | grep '/dev/sda' | awk '{print $4}'"
    disk_free_fd = os.popen(disk_free_command)
    free_space = disk_free_fd.read()
    disk_free_fd.close()

    hostname = socket.gethostname() + ".poly.edu"
    subject = "High disk usage"

    if disk_use_percent >= 95:
        message = "CRITICAL: Very High Disk Usage on %s: %s percent used.\n" % (
            hostname, disk_use_percent)
        message += "Disk space free: %s" % free_space
        integrationtestlib.log(message)
        integrationtestlib.notify(message, subject)
        irc_seattlebot.send_msg(message)

    elif disk_use_percent > 90:
        message = "WARNING: High disk usage on %s: %s percent used.\n" % (
            hostname, disk_use_percent)
        message += "Disk space free: %s" % free_space
        integrationtestlib.log(message)
        integrationtestlib.notify(message, subject)

    print "Current disk usage: %s percent" % disk_use_percent
    print "Free disk space: %s" % free_space
예제 #6
0
def main():
    """
  <Purpose>
    Ping all the machines to see if they are up

  <Exceptions>
    none

  <Side Effects>
    Prints the ping result

  <Returns>
    None.
  """

    # setup the gmail user/password to use when sending email
    success, explanation_str = send_gmail.init_gmail()
    if not success:
        integrationtestlib.log(explanation_str)
        sys.exit(0)

    integrationtestlib.log("pinging critical machines")

    #list of machines thats getting pinged
    pinglist = []

    #list that contains all the ping results
    result_queue = []

    #create a thread for each machine and ping them
    for host in machine_list:
        ping_current_machine = ping(str(host), result_queue)
        pinglist.append(ping_current_machine)
        ping_current_machine.start()

    #join all the threads
    for ping_host in pinglist:
        ping_host.join()

    #variable that keeps track if any machines are down
    ALL_MACHINES_RUNNING = True
    error_message = "WARNING: Seattle machines are down! Seattle developers please check on the machines.\n"
    error_message += "Displaying ping result:\n"

    #check to see if all the results were successful
    #on failures notify the admins and send a message to the irc
    for (success, ping_result) in result_queue:
        if not success:
            ALL_MACHINES_RUNNING = False
            error_message += ping_result + "\n"

    #if all machines were pinged successfully, notify on irc if option -m was used to run ping_machines.py
    if ALL_MACHINES_RUNNING:
        if len(sys.argv) >= 2 and sys.argv[1] == '-m':
            irc_seattlebot.send_msg("The machines: " + str(machine_list) +
                                    " were pinged successfully")

    else:
        integrationtestlib.notify(error_message, "Seattle machines down!")
        irc_seattlebot.send_msg(error_message)

    print time.ctime() + " : Done pinging all machiens."
    print "--------------------------------------------"
예제 #7
0
def main():
  """
  <Purpose>
    Ping all the machines to see if they are up

  <Exceptions>
    none

  <Side Effects>
    Prints the ping result

  <Returns>
    None.
  """

  # setup the gmail user/password to use when sending email
  success,explanation_str = send_gmail.init_gmail()
  if not success:
    integrationtestlib.log(explanation_str)
    sys.exit(0)

  integrationtestlib.log("pinging critical machines")

  #list of machines thats getting pinged
  pinglist = []

  #list that contains all the ping results
  result_queue = []  

  #create a thread for each machine and ping them
  for host in machine_list:
    ping_current_machine = ping(str(host), result_queue)
    pinglist.append(ping_current_machine)
    ping_current_machine.start()

  #join all the threads
  for ping_host in pinglist:
    ping_host.join()

  #variable that keeps track if any machines are down  
  ALL_MACHINES_RUNNING = True
  error_message="WARNING: Seattle machines are down! Seattle developers please check on the machines.\n"
  error_message+="Displaying ping result:\n"

  #check to see if all the results were successful
  #on failures notify the admins and send a message to the irc
  for (success, ping_result) in result_queue:
    if not success:
      ALL_MACHINES_RUNNING = False
      error_message += ping_result+"\n"

  #if all machines were pinged successfully, notify on irc if option -m was used to run ping_machines.py
  if ALL_MACHINES_RUNNING:
    if len(sys.argv) >= 2 and sys.argv[1] == '-m':
      irc_seattlebot.send_msg("The machines: "+str(machine_list)+" were pinged successfully")

  else:
    integrationtestlib.notify(error_message, "Seattle machines down!")
    irc_seattlebot.send_msg(error_message) 

    
  print time.ctime() + " : Done pinging all machiens."
  print "--------------------------------------------"