def server_monitor():
    """
  <Purpose>
    This method runs on its own thread called from main().  It checks to see
    if the webserver is running, and if it is not, it'll restart the webserver.
    Thread checks to see if the webserver is running every 2 minutes.
     
  <Arguments>
    None.
    
  <Exceptions>
    None.

  <Side Effects>
    None.

  <Returns>
    None. 
  """

    # check if server's running every several mins
    if not webserver_is_running():
        # not running, restart it in a non-blocking way, and fwd all stdout to webserver.log
        deploy_main.shellexec2(
            'python deploy_server_final.py > ~/webserver.log 2>&1 < /dev/null&'
        )
        #deploy_main.shellexec2('python deploy_server_final.py > /dev/null 2> /dev/null < /dev/null&')

    time.sleep(120)

    # let this thread die, and start a new one.
    thread.start_new_thread(server_monitor, ())
def check_ssh_agent():
    """
  <Purpose>
    Checks to see if ssh-agent is running, if not it should start it.
     
  <Arguments>
    None.
    
  <Exceptions>
    None.

  <Side Effects>
    None.

  <Returns>
    None. 
  """
    # checks to see if ssh-agent is running, and if not, then it'll start it
    # at this point, as the script is intended to run on nsr, the key has no passphrase
    out, err, returncode = deploy_main.shellexec2(
        "ps -ef | grep ssh-agent | awk '{ if ($1 == \"nsr\") print $8 }'")
    if out.find('ssh-agent') > -1:
        # good, at least one instance is running
        pass
    else:
        print "ssh-agent is not running"
        # not running.. let's boot it up
        deploy_main.shellexec2("eval `ssh-agent`; ssh-add ")
def server_monitor():
  """
  <Purpose>
    This method runs on its own thread called from main().  It checks to see
    if the webserver is running, and if it is not, it'll restart the webserver.
    Thread checks to see if the webserver is running every 2 minutes.
     
  <Arguments>
    None.
    
  <Exceptions>
    None.

  <Side Effects>
    None.

  <Returns>
    None. 
  """
  
  # check if server's running every several mins
  if not webserver_is_running():
    # not running, restart it in a non-blocking way, and fwd all stdout to webserver.log
    deploy_main.shellexec2('python deploy_server_final.py > ~/webserver.log 2>&1 < /dev/null&')
    #deploy_main.shellexec2('python deploy_server_final.py > /dev/null 2> /dev/null < /dev/null&')
    
  time.sleep(120)
  
  # let this thread die, and start a new one.
  thread.start_new_thread(server_monitor, ())
def check_ssh_agent():
  """
  <Purpose>
    Checks to see if ssh-agent is running, if not it should start it.
     
  <Arguments>
    None.
    
  <Exceptions>
    None.

  <Side Effects>
    None.

  <Returns>
    None. 
  """
  # checks to see if ssh-agent is running, and if not, then it'll start it
  # at this point, as the script is intended to run on nsr, the key has no passphrase
  out, err, returncode = deploy_main.shellexec2("ps -ef | grep ssh-agent | awk '{ if ($1 == \"nsr\") print $8 }'")
  if out.find('ssh-agent') > -1:
    # good, at least one instance is running
    pass
  else:
    print "ssh-agent is not running"
    # not running.. let's boot it up
    deploy_main.shellexec2("eval `ssh-agent`; ssh-add ")
def is_monitor_already_running():
    """
  <Purpose>
    Checks to see whether another monitor process (deploy_server_monitor.py) is already
    running.
     
  <Arguments>
    None.
    
  <Exceptions>
    None.

  <Side Effects>
    None.

  <Returns>
    Boolean. True/False: is more than one monitor running?
  """

    # check to see whether another instance of this script is already running
    out, err, retcode = deploy_main.shellexec2("ps -ef | grep deploy_server_monitor | grep -v grep "+\
      "| awk '{ if ($1 == \"nsr\") print $1 } ' | sort | uniq -c | awk ' { print $1 } '")
    if out:
        try:
            num_running = int(out)
            if num_running == 1:
                return False
            else:
                return True
        except Exception, e:
            # something went wrong..
            print 'Error in is_monitor_already_running'
            return True
Example #6
0
def get_uniq_machines(controller_file):
    """
  <Purpose>
    find out how many machines total we surveyed line looks like:
    
    Jun 16 2009 01:56:07 | Setup:  Found 950 unique hosts to connect to.
    
  <Arguments>
    controller_file:
      path to the controller.log file
    
  <Exceptions>
    None.

  <Side Effects>
    None.

  <Returns>
    returns an (int, HumanString)
  """

    out, err, retcode = deploy_main.shellexec2("awk '/Found/ { print $8 } ' " +
                                               controller_file)
    try:
        out = out.strip('\n\r ')
        return (str(int(out)),
                'There were ' + out + ' unique hosts surveyed\n\n')
    except ValueError, ve:
        print 'Unexpected number of uniq hosts returned from shell.'
        print ve
def get_uniq_machines(controller_file):
  """
  <Purpose>
    find out how many machines total we surveyed line looks like:
    
    Jun 16 2009 01:56:07 | Setup:  Found 950 unique hosts to connect to.
    
  <Arguments>
    controller_file:
      path to the controller.log file
    
  <Exceptions>
    None.

  <Side Effects>
    None.

  <Returns>
    returns an (int, HumanString)
  """

  out, err, retcode = deploy_main.shellexec2("awk '/Found/ { print $8 } ' "+controller_file)
  try:
    out = out.strip('\n\r ')
    return (str(int(out)), 'There were '+out+' unique hosts surveyed\n\n')
  except ValueError, ve:
    print 'Unexpected number of uniq hosts returned from shell.'
    print ve
Example #8
0
def get_nodes_up(summary_file):
    """
  <Purpose>
    Cheap way of seeing how many of the nodes our tests actually ran on..
    sum up the "versions", which is a unique line per host-log.  This can be slightly
    inaccurate (within several nodes, eg: if nodes upgraded?).
    
  <Arguments>
    summary_file:
      path to the summary.log file (htmlsummary.log)
    
  <Exceptions>
    None.

  <Side Effects>
    None.

  <Returns>
    Tuple of form (nodes_up, HumanString)
  """

    #

    out, err, retcode = deploy_main.shellexec2('grep ^version '+summary_file+\
        ' | sort | uniq -c | awk \'{ print $1 }\'')
    # each line starts with a number, so convert to int and give it a try
    try:
        # this is how many computers are 'up'
        counter = 0
        for line in out.splitlines():
            counter += int(line)
    except ValueError, e:
        # ignore it, we don't really care
        pass
def is_monitor_already_running():
  """
  <Purpose>
    Checks to see whether another monitor process (deploy_server_monitor.py) is already
    running.
     
  <Arguments>
    None.
    
  <Exceptions>
    None.

  <Side Effects>
    None.

  <Returns>
    Boolean. True/False: is more than one monitor running?
  """
  
  # check to see whether another instance of this script is already running
  out, err, retcode = deploy_main.shellexec2("ps -ef | grep deploy_server_monitor | grep -v grep "+\
    "| awk '{ if ($1 == \"nsr\") print $1 } ' | sort | uniq -c | awk ' { print $1 } '")
  if out:
    try:
      num_running = int(out)
      if num_running == 1:
        return False
      else:
        return True
    except Exception, e:
      # something went wrong..
      print 'Error in is_monitor_already_running'
      return True
def get_nodes_up(summary_file):
  """
  <Purpose>
    Cheap way of seeing how many of the nodes our tests actually ran on..
    sum up the "versions", which is a unique line per host-log.  This can be slightly
    inaccurate (within several nodes, eg: if nodes upgraded?).
    
  <Arguments>
    summary_file:
      path to the summary.log file (htmlsummary.log)
    
  <Exceptions>
    None.

  <Side Effects>
    None.

  <Returns>
    Tuple of form (nodes_up, HumanString)
  """
  
    # 
    
  out, err, retcode = deploy_main.shellexec2('grep ^version '+summary_file+\
      ' | sort | uniq -c | awk \'{ print $1 }\'')
  # each line starts with a number, so convert to int and give it a try
  try:
    # this is how many computers are 'up'
    counter = 0
    for line in out.splitlines():
      counter += int(line)
  except ValueError, e:
    # ignore it, we don't really care
    pass
def stop_deployment_scripts():
  """
  <Purpose>
    Stops all instances of the deployment scripts (deploy_main.py) if there 
    were multiple instances launched for some reason (although this should 
    never occur unless someone was launching them manually).
     
  <Arguments>
    None.
    
  <Exceptions>
    None.

  <Side Effects>
    None.

  <Returns>
    None. 
  """
  while deploymentscript_is_running():
    deploy_main.shellexec2("ps -ef | grep deploy_main.py | grep -v grep | awk ' { print $2 } ' | xargs kill -9")
def stop_ssh_scp():
    """
  <Purpose>
    Stops all possibly hung ssh/scp processes.
     
  <Arguments>
    None.
    
  <Exceptions>
    None.

  <Side Effects>
    Might close the users ssh session.

  <Returns>
    None. 
  """

    deploy_main.shellexec2(
        "ps -ef | grep 'ssh -T' | awk '{ if ($1 == \"nsr\") print $2 } ' | xargs kill -9"
    )
    deploy_main.shellexec2(
        "ps -ef | grep 'ssh -x' | awk '{ if ($1 == \"nsr\") print $2 } ' | xargs kill -9"
    )
    deploy_main.shellexec2(
        "ps -ef | grep 'scp -o' | awk '{ if ($1 == \"nsr\") print $2 } ' | xargs kill -9"
    )
def stop_web_server():
  """
  <Purpose>
    Stops all instances of the webserver (if for some reasont there were 
    multiple instances running
     
  <Arguments>
    None.
    
  <Exceptions>
    None.

  <Side Effects>
    None.

  <Returns>
    None. 
  """
  
  # if for some reason there are multiple processes running
  while webserver_is_running():
    deploy_main.shellexec2("ps -ef | grep deploy_server_final.py | grep -v grep | awk ' { print $2 } ' | xargs kill -9")
def script_monitor():
    """
  <Purpose>
    This method runs on its own thread.  It checks to see if the scripts 
    are done and once they are, it'll launch them again roughly every 90 mins.
    If the script are not done after 90 mins, the thread will sleep for 5 mins
    at a time for a recheck.
     
  <Arguments>
    None.
    
  <Exceptions>
    None.

  <Side Effects>
    See stop_ssh_scp().

  <Returns>
    None. 
  """

    # if the timeout is up, make sure that the last round of tests has finished
    while deploymentscript_is_running():
        # while it's still running, sleep 5 mins at a time until it's not done
        time.sleep(60 * 5)

    # kill all old, possibly hung ssh-processes
    # bug?: this'll close anyone's ssh-session who's connected as
    # nsr@blackbox when scripts connect.
    stop_ssh_scp()
    #check_ssh_agent()
    # run in non-blocking way.
    deploy_main.shellexec2(
        'python deploy_main.py -c custom.py > /dev/null 2> /dev/null < /dev/null&'
    )
    # sleep for 1.5 hrs. if scrips aren't done yet, it'll stall 5 mins at a time
    time.sleep(60 * 90)

    thread.start_new_thread(script_monitor, ())
def stop_deployment_scripts():
    """
  <Purpose>
    Stops all instances of the deployment scripts (deploy_main.py) if there 
    were multiple instances launched for some reason (although this should 
    never occur unless someone was launching them manually).
     
  <Arguments>
    None.
    
  <Exceptions>
    None.

  <Side Effects>
    None.

  <Returns>
    None. 
  """
    while deploymentscript_is_running():
        deploy_main.shellexec2(
            "ps -ef | grep deploy_main.py | grep -v grep | awk ' { print $2 } ' | xargs kill -9"
        )
def script_monitor():
  """
  <Purpose>
    This method runs on its own thread.  It checks to see if the scripts 
    are done and once they are, it'll launch them again roughly every 90 mins.
    If the script are not done after 90 mins, the thread will sleep for 5 mins
    at a time for a recheck.
     
  <Arguments>
    None.
    
  <Exceptions>
    None.

  <Side Effects>
    See stop_ssh_scp().

  <Returns>
    None. 
  """
  
  # if the timeout is up, make sure that the last round of tests has finished
  while deploymentscript_is_running():
    # while it's still running, sleep 5 mins at a time until it's not done
    time.sleep(60 * 5)
  
  # kill all old, possibly hung ssh-processes
  # bug?: this'll close anyone's ssh-session who's connected as 
  # nsr@blackbox when scripts connect.
  stop_ssh_scp()
  #check_ssh_agent()
  # run in non-blocking way.
  deploy_main.shellexec2('python deploy_main.py -c custom.py > /dev/null 2> /dev/null < /dev/null&')
  # sleep for 1.5 hrs. if scrips aren't done yet, it'll stall 5 mins at a time
  time.sleep(60 * 90)
  
  thread.start_new_thread(script_monitor, ())
def stop_web_server():
    """
  <Purpose>
    Stops all instances of the webserver (if for some reasont there were 
    multiple instances running
     
  <Arguments>
    None.
    
  <Exceptions>
    None.

  <Side Effects>
    None.

  <Returns>
    None. 
  """

    # if for some reason there are multiple processes running
    while webserver_is_running():
        deploy_main.shellexec2(
            "ps -ef | grep deploy_server_final.py | grep -v grep | awk ' { print $2 } ' | xargs kill -9"
        )
def deploymentscript_is_running():
  """
  <Purpose>
    IChecks to see if the deployment scripts (deploy_main.py) are running.
     
  <Arguments>
    None.
    
  <Exceptions>
    None.

  <Side Effects>
    None.

  <Returns>
    Boolean. True/False: are the scripts running
  """
  # True if running, false if not. checks via ps
  out, err, retcode = deploy_main.shellexec2('ps -ef | grep deploy_main.py | grep -v grep')
  # -1 if not running, otherwise it is
  return out.find('python deploy_main.py') > -1
def webserver_is_running():
  """
  <Purpose>
    Check to see if the webserver is running
     
  <Arguments>
    None.
    
  <Exceptions>
    None.

  <Side Effects>
    None.

  <Returns>
    Boolean. True/False: is the webserver running?
  """
  # True if running, false if not. checks via ps
  out, err, retcode = deploy_main.shellexec2('ps -ef | grep deploy_server_final.py | grep -v grep')
  # if -1, then not running, otherwise it is
  return out.find('python deploy_server_final.py') > -1
def webserver_is_running():
    """
  <Purpose>
    Check to see if the webserver is running
     
  <Arguments>
    None.
    
  <Exceptions>
    None.

  <Side Effects>
    None.

  <Returns>
    Boolean. True/False: is the webserver running?
  """
    # True if running, false if not. checks via ps
    out, err, retcode = deploy_main.shellexec2(
        'ps -ef | grep deploy_server_final.py | grep -v grep')
    # if -1, then not running, otherwise it is
    return out.find('python deploy_server_final.py') > -1
def deploymentscript_is_running():
    """
  <Purpose>
    IChecks to see if the deployment scripts (deploy_main.py) are running.
     
  <Arguments>
    None.
    
  <Exceptions>
    None.

  <Side Effects>
    None.

  <Returns>
    Boolean. True/False: are the scripts running
  """
    # True if running, false if not. checks via ps
    out, err, retcode = deploy_main.shellexec2(
        'ps -ef | grep deploy_main.py | grep -v grep')
    # -1 if not running, otherwise it is
    return out.find('python deploy_main.py') > -1
def stop_ssh_scp():
  """
  <Purpose>
    Stops all possibly hung ssh/scp processes.
     
  <Arguments>
    None.
    
  <Exceptions>
    None.

  <Side Effects>
    Might close the users ssh session.

  <Returns>
    None. 
  """

  deploy_main.shellexec2("ps -ef | grep 'ssh -T' | awk '{ if ($1 == \"nsr\") print $2 } ' | xargs kill -9")
  deploy_main.shellexec2("ps -ef | grep 'ssh -x' | awk '{ if ($1 == \"nsr\") print $2 } ' | xargs kill -9")
  deploy_main.shellexec2("ps -ef | grep 'scp -o' | awk '{ if ($1 == \"nsr\") print $2 } ' | xargs kill -9")
def build_summary():
  """
  <Purpose>
    This function collects all the important log files from the subdirectories
    and outputs them in a summary.log

  <Arguments>
    None.

  <Exceptions>
    Error opening/creating the log file.

  <Side Effects>
    None.

  <Returns>
    None.
  """

  sep = '---------------------'
  uniq_fn, timestamp = deploy_html.generate_uniq_fn()
  
  # collect all log files into a summary file
  summary_fn = 'detailed.'+uniq_fn
  

  # directory structure is as follows (for the files we want)
  # ./deploy.logs/[remote_host]/deployrun.log
  # ./deploy.logs/[remote_host]/[remote host].deployrun.err.log

  #try:
  # make sure that the dir exists
  if not os.path.isdir('./detailed_logs'):
    os.mkdir('./detailed_logs')
    
  summary_file_handle = open('./detailed_logs/'+summary_fn, 'w')
  
  # states map to #s
  node_states_counter = {}

  # num of states -> to # of occurences
  num_node_states = {}
  
  # has the following keys:
  # SU is running -> how many computers have a SU running
  # NM is running -> how many computers have a NM running
  # SU -> how may computers have just SU running
  # NM -> how many comptuers have just NM running
  # Both SU and NM are running -> how many computers have SU and NM running
  # none -> how many computer have neither SU nor NM running
  su_nm_stats_header = ['SU/NM Info', 'Number of Nodes']
  su_nm_stats = {}
  su_nm_stats['SU is running'] = 0
  su_nm_stats['NM is running'] = 0
  su_nm_stats['Only SU is running'] = 0
  su_nm_stats['Only NM is running'] = 0
  su_nm_stats['SU/NM are not running'] = 0
  su_nm_stats['Both SU and NM are running'] = 0
  
  # will have version that map to # of currently installed
  node_version_dict = {}
  # This'll keep track of the # of not installed computers
  node_version_dict['Not Installed'] = 0
  # This'ss kep track of the node ips/hostnames that have seattle missing
  node_version_dict['Not Installed Node Name'] = []
  
  # this dictionary will be used to build up our html page with all the node 
  # information. the keys to this dictionary are the nodenames, they map to an 
  # array of values which are the values in the table for that node. then 
  # we'll use the deploy_html lib to build up our html tables and write them to the file.
  
  html_dict = {}
  
  # used as the headers for the table built up in html_dict
  html_dict_headers = ['Node Name', 'NodeManager Status', 
      'SoftwareUpdater Status', 'Node Version', 'Node Status', 'Details']
  
  # the html FN that we'll be using
  
  # for every folder in the logs directory  
  for logfolder in os.listdir('./deploy.logs'):
    # each dir should have TWO files (at most), but we only care about one for our 
    # summary file
    # check that it's a directory.
    if os.path.isdir('./deploy.logs/'+logfolder):
      # it's a directory! good! 
      for logfile in os.listdir('./deploy.logs/'+logfolder):
        # now check that each file until we get a file by the name of 
        # 'deployrun.log'
        if os.path.isfile('./deploy.logs/'+logfolder+'/'+logfile):
          # It's a file.. is it the right name?
          errfn = logfolder+'.deployrun.err.log'
          
          if logfile == 'deployrun.log' or logfile == errfn:
            
            
            # Awesome it's the one we want!
            # the logfolder = the remote host (by ip or hostname)
            summary_file_handle.write('\nLog from '+logfolder)

            # make the HTML page. the logfolder is the nodename 
            #deploy_html.html_write('./deploy.logs/'+logfolder+'/'+logfile, logfolder, uniq_fn)
            
            logfile_name = './deploy.logs/'+logfolder+'/'+logfile
            logfile_handle = open(logfile_name, 'r')

            if not os.path.isdir('./detailed_logs/'+logfolder):
              os.mkdir('./detailed_logs/'+logfolder)
              
            detailed_handle = open('./detailed_logs/'+logfolder+'/'+timestamp, 'a')
            
            node_file_as_string = deploy_html.read_whole_file(logfile_handle)
            final_file_content = deploy_helper.summarize_all_blocks(node_file_as_string)
            
            # write to both the files
            summary_file_handle.write(final_file_content)
            detailed_handle.write(final_file_content)
            
                  
            # create a temp array that we'll use to build up the info, and 
            # then throw in to the html_dict
            temp_array = []            
            
            # now check if the node has seattle installed or not
            if deploy_stats.check_is_seattle_installed(node_file_as_string):
              
              # now we need the NM status
              NM_success_status, NM_desc_string, bgcolor  = deploy_stats.check_is_nm_running(node_file_as_string)
              if NM_success_status or NM_desc_string.lower().find('not') == -1:
                su_nm_stats['NM is running'] += 1

              temp_array.append((NM_desc_string, bgcolor))
              
              
              # next we need the SU status
              SU_success_status, SU_desc_string, bgcolor  = deploy_stats.check_is_su_running(node_file_as_string)
              # if it is running then increment the running counter by 1
              if SU_success_status or SU_desc_string.lower().find('not') == -1:
                su_nm_stats['SU is running'] += 1
              temp_array.append((SU_desc_string, bgcolor))
              
              # make sure to record the stats
              # the not is a hack for the high mem usage which returns false
              if SU_desc_string.lower().find('not') == -1 or SU_success_status:
                if NM_desc_string.lower().find('not') == -1 or NM_success_status:
                  # su and nm are running
                  su_nm_stats['Both SU and NM are running'] += 1
                else:
                  # only su is running, nm is not
                  su_nm_stats['Only SU is running'] += 1
              else:
                if NM_desc_string.lower().find('not') == -1 or NM_success_status:
                  # only NM is running
                  su_nm_stats['Only NM is running'] += 1
                else:
                  # neither is running
                  su_nm_stats['SU/NM are not running'] += 1
              
              # now get the node version
              success_status, version_string, bgcolor = deploy_stats.get_node_version(node_file_as_string)
              temp_array.append((version_string, bgcolor))
              
              # keep track of how many of each version/output we have (including errors and upgrades)
              if version_string not in node_version_dict.keys():
                node_version_dict[version_string] = 1
              else:
                node_version_dict[version_string] += 1
              
              
              # and now the node state
              try:
                (success_status, (node_state_array, state_counter), html_color)  = deploy_stats.get_node_state(node_file_as_string)
              except Exception, e:
                (success_status, (node_state_array, state_counter), html_color) = (False, ([], 0), deploy_html.colors_map['Error'])
              
              
              
              # the following chunk of code keeps track of how many nodes have X states on them
              # has # of states | number
              if str(state_counter) in num_node_states.keys():
                # has the key, just get the value and increment by one
                num_node_states[str(state_counter)] = num_node_states[str(state_counter)] + 1
              else:
                # set it to one, and create the key
                num_node_states[str(state_counter)] = 1
              
              # this'll be the string we'll dump to the temp_array.
              
              node_state_success = ''
              for each_vessel in node_state_array:
                # tuple (SuccessState, vesselID, explanation_str)
                if each_vessel[0]:
                  # success!
                  node_state_success += str(each_vessel[2])+','
                  summary_file_handle.write('\nVessel state:\t'+str(each_vessel[1])+':'+str(each_vessel[2]))
                  detailed_handle.write('\nVessel state:\t'+str(each_vessel[1])+':'+str(each_vessel[2]))
                  
                  # This next chunk of code keeps track of what states each nodes are in and how many we have
                  # in that particular state
                  if str(each_vessel[2]) in node_states_counter.keys():
                    node_states_counter[str(each_vessel[2])] = node_states_counter[str(each_vessel[2])] + 1
                  else:
                    node_states_counter[str(each_vessel[2])] = 1
                  
                else:
                  summary_file_handle.write('\nVessel state:\t'+str(each_vessel[1])+':'+str(each_vessel[2]))
                  # don't write the detailed log if we fail.
                
                detailed_handle.write('\n')
                summary_file_handle.write('\n')
                
              if state_counter == 1:
                temp_array.append((node_state_success[0:-1], deploy_html.colors_map['Success']))
              else:
                if state_counter == 0:
                  if node_state_array:
                    # if the array isn't null we have some msg to print, otherwise it's an error
                    temp_array.append((node_state_array[0], deploy_html.colors_map['Error']))
                  else:
                    temp_array.append(('Did not get vesseldict', deploy_html.colors_map['Error']))
                  # no keys on the node, print the human-friendly version (also could be an unknown key)
                  

                  #temp_array.append(('No node-state keys found', deploy_html.colors_map['Error']))
                  
                #else: # state_counter > 1:
                  #temp_array.append(('Multiple states on node!', deploy_html.colors_map['Error']))
              
              # end getting the node state here

            else: # no seattle installed!            
              temp_array = ['', '', '', ('Seattle is not installed', deploy_html.colors_map['Warning'])]
              node_version_dict['Not Installed'] = node_version_dict['Not Installed'] + 1
              # mark the node as not having seattle installed, we'll write a 
              # file that'll have all the missing seattle installs on the nodes
              # also, logfolder is the name of the node.
              node_version_dict['Not Installed Node Name'].append(logfolder)

            html_link = deploy_html.make_link_to_detailed(logfolder, uniq_fn)
            temp_array.append(html_link)            
            # add what we have to the html_dict
            html_dict[logfolder] = temp_array

            

                  
            if os.path.isfile('./deploy.logs/controller.log'):
              deploy_main.shellexec2('cp ./deploy.logs/controller.log ./detailed_logs/controller.'+timestamp)

            if os.path.isfile('./deploy.logs/deploy.err.log'):
              deploy_main.shellexec2('cp ./deploy.logs/deploy.err.log ./detailed_logs/deploy.err.'+timestamp)              
            
            logfile_handle.close()
            detailed_handle.close()

            summary_file_handle.write('\n'+sep+'\n')
Example #24
0
def build_summary():
    """
  <Purpose>
    This function collects all the important log files from the subdirectories
    and outputs them in a summary.log

  <Arguments>
    None.

  <Exceptions>
    Error opening/creating the log file.

  <Side Effects>
    None.

  <Returns>
    None.
  """

    sep = '---------------------'
    uniq_fn, timestamp = deploy_html.generate_uniq_fn()

    # collect all log files into a summary file
    summary_fn = 'detailed.' + uniq_fn

    # directory structure is as follows (for the files we want)
    # ./deploy.logs/[remote_host]/deployrun.log
    # ./deploy.logs/[remote_host]/[remote host].deployrun.err.log

    #try:
    # make sure that the dir exists
    if not os.path.isdir('./detailed_logs'):
        os.mkdir('./detailed_logs')

    summary_file_handle = open('./detailed_logs/' + summary_fn, 'w')

    # states map to #s
    node_states_counter = {}

    # num of states -> to # of occurences
    num_node_states = {}

    # has the following keys:
    # SU is running -> how many computers have a SU running
    # NM is running -> how many computers have a NM running
    # SU -> how may computers have just SU running
    # NM -> how many comptuers have just NM running
    # Both SU and NM are running -> how many computers have SU and NM running
    # none -> how many computer have neither SU nor NM running
    su_nm_stats_header = ['SU/NM Info', 'Number of Nodes']
    su_nm_stats = {}
    su_nm_stats['SU is running'] = 0
    su_nm_stats['NM is running'] = 0
    su_nm_stats['Only SU is running'] = 0
    su_nm_stats['Only NM is running'] = 0
    su_nm_stats['SU/NM are not running'] = 0
    su_nm_stats['Both SU and NM are running'] = 0

    # will have version that map to # of currently installed
    node_version_dict = {}
    # This'll keep track of the # of not installed computers
    node_version_dict['Not Installed'] = 0
    # This'ss kep track of the node ips/hostnames that have seattle missing
    node_version_dict['Not Installed Node Name'] = []

    # this dictionary will be used to build up our html page with all the node
    # information. the keys to this dictionary are the nodenames, they map to an
    # array of values which are the values in the table for that node. then
    # we'll use the deploy_html lib to build up our html tables and write them to the file.

    html_dict = {}

    # used as the headers for the table built up in html_dict
    html_dict_headers = [
        'Node Name', 'NodeManager Status', 'SoftwareUpdater Status',
        'Node Version', 'Node Status', 'Details'
    ]

    # the html FN that we'll be using

    # for every folder in the logs directory
    for logfolder in os.listdir('./deploy.logs'):
        # each dir should have TWO files (at most), but we only care about one for our
        # summary file
        # check that it's a directory.
        if os.path.isdir('./deploy.logs/' + logfolder):
            # it's a directory! good!
            for logfile in os.listdir('./deploy.logs/' + logfolder):
                # now check that each file until we get a file by the name of
                # 'deployrun.log'
                if os.path.isfile('./deploy.logs/' + logfolder + '/' +
                                  logfile):
                    # It's a file.. is it the right name?
                    errfn = logfolder + '.deployrun.err.log'

                    if logfile == 'deployrun.log' or logfile == errfn:

                        # Awesome it's the one we want!
                        # the logfolder = the remote host (by ip or hostname)
                        summary_file_handle.write('\nLog from ' + logfolder)

                        # make the HTML page. the logfolder is the nodename
                        #deploy_html.html_write('./deploy.logs/'+logfolder+'/'+logfile, logfolder, uniq_fn)

                        logfile_name = './deploy.logs/' + logfolder + '/' + logfile
                        logfile_handle = open(logfile_name, 'r')

                        if not os.path.isdir('./detailed_logs/' + logfolder):
                            os.mkdir('./detailed_logs/' + logfolder)

                        detailed_handle = open(
                            './detailed_logs/' + logfolder + '/' + timestamp,
                            'a')

                        node_file_as_string = deploy_html.read_whole_file(
                            logfile_handle)
                        final_file_content = deploy_helper.summarize_all_blocks(
                            node_file_as_string)

                        # write to both the files
                        summary_file_handle.write(final_file_content)
                        detailed_handle.write(final_file_content)

                        # create a temp array that we'll use to build up the info, and
                        # then throw in to the html_dict
                        temp_array = []

                        # now check if the node has seattle installed or not
                        if deploy_stats.check_is_seattle_installed(
                                node_file_as_string):

                            # now we need the NM status
                            NM_success_status, NM_desc_string, bgcolor = deploy_stats.check_is_nm_running(
                                node_file_as_string)
                            if NM_success_status or NM_desc_string.lower(
                            ).find('not') == -1:
                                su_nm_stats['NM is running'] += 1

                            temp_array.append((NM_desc_string, bgcolor))

                            # next we need the SU status
                            SU_success_status, SU_desc_string, bgcolor = deploy_stats.check_is_su_running(
                                node_file_as_string)
                            # if it is running then increment the running counter by 1
                            if SU_success_status or SU_desc_string.lower(
                            ).find('not') == -1:
                                su_nm_stats['SU is running'] += 1
                            temp_array.append((SU_desc_string, bgcolor))

                            # make sure to record the stats
                            # the not is a hack for the high mem usage which returns false
                            if SU_desc_string.lower().find(
                                    'not') == -1 or SU_success_status:
                                if NM_desc_string.lower().find(
                                        'not') == -1 or NM_success_status:
                                    # su and nm are running
                                    su_nm_stats[
                                        'Both SU and NM are running'] += 1
                                else:
                                    # only su is running, nm is not
                                    su_nm_stats['Only SU is running'] += 1
                            else:
                                if NM_desc_string.lower().find(
                                        'not') == -1 or NM_success_status:
                                    # only NM is running
                                    su_nm_stats['Only NM is running'] += 1
                                else:
                                    # neither is running
                                    su_nm_stats['SU/NM are not running'] += 1

                            # now get the node version
                            success_status, version_string, bgcolor = deploy_stats.get_node_version(
                                node_file_as_string)
                            temp_array.append((version_string, bgcolor))

                            # keep track of how many of each version/output we have (including errors and upgrades)
                            if version_string not in node_version_dict.keys():
                                node_version_dict[version_string] = 1
                            else:
                                node_version_dict[version_string] += 1

                            # and now the node state
                            try:
                                (success_status, (node_state_array,
                                                  state_counter),
                                 html_color) = deploy_stats.get_node_state(
                                     node_file_as_string)
                            except Exception, e:
                                (success_status, (node_state_array,
                                                  state_counter),
                                 html_color) = (False, (
                                     [], 0), deploy_html.colors_map['Error'])

                            # the following chunk of code keeps track of how many nodes have X states on them
                            # has # of states | number
                            if str(state_counter) in num_node_states.keys():
                                # has the key, just get the value and increment by one
                                num_node_states[str(
                                    state_counter
                                )] = num_node_states[str(state_counter)] + 1
                            else:
                                # set it to one, and create the key
                                num_node_states[str(state_counter)] = 1

                            # this'll be the string we'll dump to the temp_array.

                            node_state_success = ''
                            for each_vessel in node_state_array:
                                # tuple (SuccessState, vesselID, explanation_str)
                                if each_vessel[0]:
                                    # success!
                                    node_state_success += str(
                                        each_vessel[2]) + ','
                                    summary_file_handle.write(
                                        '\nVessel state:\t' +
                                        str(each_vessel[1]) + ':' +
                                        str(each_vessel[2]))
                                    detailed_handle.write('\nVessel state:\t' +
                                                          str(each_vessel[1]) +
                                                          ':' +
                                                          str(each_vessel[2]))

                                    # This next chunk of code keeps track of what states each nodes are in and how many we have
                                    # in that particular state
                                    if str(each_vessel[2]
                                           ) in node_states_counter.keys():
                                        node_states_counter[str(
                                            each_vessel[2]
                                        )] = node_states_counter[str(
                                            each_vessel[2])] + 1
                                    else:
                                        node_states_counter[str(
                                            each_vessel[2])] = 1

                                else:
                                    summary_file_handle.write(
                                        '\nVessel state:\t' +
                                        str(each_vessel[1]) + ':' +
                                        str(each_vessel[2]))
                                    # don't write the detailed log if we fail.

                                detailed_handle.write('\n')
                                summary_file_handle.write('\n')

                            if state_counter == 1:
                                temp_array.append(
                                    (node_state_success[0:-1],
                                     deploy_html.colors_map['Success']))
                            else:
                                if state_counter == 0:
                                    if node_state_array:
                                        # if the array isn't null we have some msg to print, otherwise it's an error
                                        temp_array.append(
                                            (node_state_array[0],
                                             deploy_html.colors_map['Error']))
                                    else:
                                        temp_array.append(
                                            ('Did not get vesseldict',
                                             deploy_html.colors_map['Error']))
                                    # no keys on the node, print the human-friendly version (also could be an unknown key)

                                    #temp_array.append(('No node-state keys found', deploy_html.colors_map['Error']))

                                #else: # state_counter > 1:
                                #temp_array.append(('Multiple states on node!', deploy_html.colors_map['Error']))

                            # end getting the node state here

                        else:  # no seattle installed!
                            temp_array = [
                                '', '', '',
                                ('Seattle is not installed',
                                 deploy_html.colors_map['Warning'])
                            ]
                            node_version_dict[
                                'Not Installed'] = node_version_dict[
                                    'Not Installed'] + 1
                            # mark the node as not having seattle installed, we'll write a
                            # file that'll have all the missing seattle installs on the nodes
                            # also, logfolder is the name of the node.
                            node_version_dict[
                                'Not Installed Node Name'].append(logfolder)

                        html_link = deploy_html.make_link_to_detailed(
                            logfolder, uniq_fn)
                        temp_array.append(html_link)
                        # add what we have to the html_dict
                        html_dict[logfolder] = temp_array

                        if os.path.isfile('./deploy.logs/controller.log'):
                            deploy_main.shellexec2(
                                'cp ./deploy.logs/controller.log ./detailed_logs/controller.'
                                + timestamp)

                        if os.path.isfile('./deploy.logs/deploy.err.log'):
                            deploy_main.shellexec2(
                                'cp ./deploy.logs/deploy.err.log ./detailed_logs/deploy.err.'
                                + timestamp)

                        logfile_handle.close()
                        detailed_handle.close()

                        summary_file_handle.write('\n' + sep + '\n')