def server_monitor(): """ <Purpose> This method runs on its own thread called from main(). It checks to see if the webserver is running, and if it is not, it'll restart the webserver. Thread checks to see if the webserver is running every 2 minutes. <Arguments> None. <Exceptions> None. <Side Effects> None. <Returns> None. """ # check if server's running every several mins if not webserver_is_running(): # not running, restart it in a non-blocking way, and fwd all stdout to webserver.log deploy_main.shellexec2( 'python deploy_server_final.py > ~/webserver.log 2>&1 < /dev/null&' ) #deploy_main.shellexec2('python deploy_server_final.py > /dev/null 2> /dev/null < /dev/null&') time.sleep(120) # let this thread die, and start a new one. thread.start_new_thread(server_monitor, ())
def check_ssh_agent(): """ <Purpose> Checks to see if ssh-agent is running, if not it should start it. <Arguments> None. <Exceptions> None. <Side Effects> None. <Returns> None. """ # checks to see if ssh-agent is running, and if not, then it'll start it # at this point, as the script is intended to run on nsr, the key has no passphrase out, err, returncode = deploy_main.shellexec2( "ps -ef | grep ssh-agent | awk '{ if ($1 == \"nsr\") print $8 }'") if out.find('ssh-agent') > -1: # good, at least one instance is running pass else: print "ssh-agent is not running" # not running.. let's boot it up deploy_main.shellexec2("eval `ssh-agent`; ssh-add ")
def server_monitor(): """ <Purpose> This method runs on its own thread called from main(). It checks to see if the webserver is running, and if it is not, it'll restart the webserver. Thread checks to see if the webserver is running every 2 minutes. <Arguments> None. <Exceptions> None. <Side Effects> None. <Returns> None. """ # check if server's running every several mins if not webserver_is_running(): # not running, restart it in a non-blocking way, and fwd all stdout to webserver.log deploy_main.shellexec2('python deploy_server_final.py > ~/webserver.log 2>&1 < /dev/null&') #deploy_main.shellexec2('python deploy_server_final.py > /dev/null 2> /dev/null < /dev/null&') time.sleep(120) # let this thread die, and start a new one. thread.start_new_thread(server_monitor, ())
def check_ssh_agent(): """ <Purpose> Checks to see if ssh-agent is running, if not it should start it. <Arguments> None. <Exceptions> None. <Side Effects> None. <Returns> None. """ # checks to see if ssh-agent is running, and if not, then it'll start it # at this point, as the script is intended to run on nsr, the key has no passphrase out, err, returncode = deploy_main.shellexec2("ps -ef | grep ssh-agent | awk '{ if ($1 == \"nsr\") print $8 }'") if out.find('ssh-agent') > -1: # good, at least one instance is running pass else: print "ssh-agent is not running" # not running.. let's boot it up deploy_main.shellexec2("eval `ssh-agent`; ssh-add ")
def is_monitor_already_running(): """ <Purpose> Checks to see whether another monitor process (deploy_server_monitor.py) is already running. <Arguments> None. <Exceptions> None. <Side Effects> None. <Returns> Boolean. True/False: is more than one monitor running? """ # check to see whether another instance of this script is already running out, err, retcode = deploy_main.shellexec2("ps -ef | grep deploy_server_monitor | grep -v grep "+\ "| awk '{ if ($1 == \"nsr\") print $1 } ' | sort | uniq -c | awk ' { print $1 } '") if out: try: num_running = int(out) if num_running == 1: return False else: return True except Exception, e: # something went wrong.. print 'Error in is_monitor_already_running' return True
def get_uniq_machines(controller_file): """ <Purpose> find out how many machines total we surveyed line looks like: Jun 16 2009 01:56:07 | Setup: Found 950 unique hosts to connect to. <Arguments> controller_file: path to the controller.log file <Exceptions> None. <Side Effects> None. <Returns> returns an (int, HumanString) """ out, err, retcode = deploy_main.shellexec2("awk '/Found/ { print $8 } ' " + controller_file) try: out = out.strip('\n\r ') return (str(int(out)), 'There were ' + out + ' unique hosts surveyed\n\n') except ValueError, ve: print 'Unexpected number of uniq hosts returned from shell.' print ve
def get_uniq_machines(controller_file): """ <Purpose> find out how many machines total we surveyed line looks like: Jun 16 2009 01:56:07 | Setup: Found 950 unique hosts to connect to. <Arguments> controller_file: path to the controller.log file <Exceptions> None. <Side Effects> None. <Returns> returns an (int, HumanString) """ out, err, retcode = deploy_main.shellexec2("awk '/Found/ { print $8 } ' "+controller_file) try: out = out.strip('\n\r ') return (str(int(out)), 'There were '+out+' unique hosts surveyed\n\n') except ValueError, ve: print 'Unexpected number of uniq hosts returned from shell.' print ve
def get_nodes_up(summary_file): """ <Purpose> Cheap way of seeing how many of the nodes our tests actually ran on.. sum up the "versions", which is a unique line per host-log. This can be slightly inaccurate (within several nodes, eg: if nodes upgraded?). <Arguments> summary_file: path to the summary.log file (htmlsummary.log) <Exceptions> None. <Side Effects> None. <Returns> Tuple of form (nodes_up, HumanString) """ # out, err, retcode = deploy_main.shellexec2('grep ^version '+summary_file+\ ' | sort | uniq -c | awk \'{ print $1 }\'') # each line starts with a number, so convert to int and give it a try try: # this is how many computers are 'up' counter = 0 for line in out.splitlines(): counter += int(line) except ValueError, e: # ignore it, we don't really care pass
def stop_deployment_scripts(): """ <Purpose> Stops all instances of the deployment scripts (deploy_main.py) if there were multiple instances launched for some reason (although this should never occur unless someone was launching them manually). <Arguments> None. <Exceptions> None. <Side Effects> None. <Returns> None. """ while deploymentscript_is_running(): deploy_main.shellexec2("ps -ef | grep deploy_main.py | grep -v grep | awk ' { print $2 } ' | xargs kill -9")
def stop_ssh_scp(): """ <Purpose> Stops all possibly hung ssh/scp processes. <Arguments> None. <Exceptions> None. <Side Effects> Might close the users ssh session. <Returns> None. """ deploy_main.shellexec2( "ps -ef | grep 'ssh -T' | awk '{ if ($1 == \"nsr\") print $2 } ' | xargs kill -9" ) deploy_main.shellexec2( "ps -ef | grep 'ssh -x' | awk '{ if ($1 == \"nsr\") print $2 } ' | xargs kill -9" ) deploy_main.shellexec2( "ps -ef | grep 'scp -o' | awk '{ if ($1 == \"nsr\") print $2 } ' | xargs kill -9" )
def stop_web_server(): """ <Purpose> Stops all instances of the webserver (if for some reasont there were multiple instances running <Arguments> None. <Exceptions> None. <Side Effects> None. <Returns> None. """ # if for some reason there are multiple processes running while webserver_is_running(): deploy_main.shellexec2("ps -ef | grep deploy_server_final.py | grep -v grep | awk ' { print $2 } ' | xargs kill -9")
def script_monitor(): """ <Purpose> This method runs on its own thread. It checks to see if the scripts are done and once they are, it'll launch them again roughly every 90 mins. If the script are not done after 90 mins, the thread will sleep for 5 mins at a time for a recheck. <Arguments> None. <Exceptions> None. <Side Effects> See stop_ssh_scp(). <Returns> None. """ # if the timeout is up, make sure that the last round of tests has finished while deploymentscript_is_running(): # while it's still running, sleep 5 mins at a time until it's not done time.sleep(60 * 5) # kill all old, possibly hung ssh-processes # bug?: this'll close anyone's ssh-session who's connected as # nsr@blackbox when scripts connect. stop_ssh_scp() #check_ssh_agent() # run in non-blocking way. deploy_main.shellexec2( 'python deploy_main.py -c custom.py > /dev/null 2> /dev/null < /dev/null&' ) # sleep for 1.5 hrs. if scrips aren't done yet, it'll stall 5 mins at a time time.sleep(60 * 90) thread.start_new_thread(script_monitor, ())
def stop_deployment_scripts(): """ <Purpose> Stops all instances of the deployment scripts (deploy_main.py) if there were multiple instances launched for some reason (although this should never occur unless someone was launching them manually). <Arguments> None. <Exceptions> None. <Side Effects> None. <Returns> None. """ while deploymentscript_is_running(): deploy_main.shellexec2( "ps -ef | grep deploy_main.py | grep -v grep | awk ' { print $2 } ' | xargs kill -9" )
def script_monitor(): """ <Purpose> This method runs on its own thread. It checks to see if the scripts are done and once they are, it'll launch them again roughly every 90 mins. If the script are not done after 90 mins, the thread will sleep for 5 mins at a time for a recheck. <Arguments> None. <Exceptions> None. <Side Effects> See stop_ssh_scp(). <Returns> None. """ # if the timeout is up, make sure that the last round of tests has finished while deploymentscript_is_running(): # while it's still running, sleep 5 mins at a time until it's not done time.sleep(60 * 5) # kill all old, possibly hung ssh-processes # bug?: this'll close anyone's ssh-session who's connected as # nsr@blackbox when scripts connect. stop_ssh_scp() #check_ssh_agent() # run in non-blocking way. deploy_main.shellexec2('python deploy_main.py -c custom.py > /dev/null 2> /dev/null < /dev/null&') # sleep for 1.5 hrs. if scrips aren't done yet, it'll stall 5 mins at a time time.sleep(60 * 90) thread.start_new_thread(script_monitor, ())
def stop_web_server(): """ <Purpose> Stops all instances of the webserver (if for some reasont there were multiple instances running <Arguments> None. <Exceptions> None. <Side Effects> None. <Returns> None. """ # if for some reason there are multiple processes running while webserver_is_running(): deploy_main.shellexec2( "ps -ef | grep deploy_server_final.py | grep -v grep | awk ' { print $2 } ' | xargs kill -9" )
def deploymentscript_is_running(): """ <Purpose> IChecks to see if the deployment scripts (deploy_main.py) are running. <Arguments> None. <Exceptions> None. <Side Effects> None. <Returns> Boolean. True/False: are the scripts running """ # True if running, false if not. checks via ps out, err, retcode = deploy_main.shellexec2('ps -ef | grep deploy_main.py | grep -v grep') # -1 if not running, otherwise it is return out.find('python deploy_main.py') > -1
def webserver_is_running(): """ <Purpose> Check to see if the webserver is running <Arguments> None. <Exceptions> None. <Side Effects> None. <Returns> Boolean. True/False: is the webserver running? """ # True if running, false if not. checks via ps out, err, retcode = deploy_main.shellexec2('ps -ef | grep deploy_server_final.py | grep -v grep') # if -1, then not running, otherwise it is return out.find('python deploy_server_final.py') > -1
def webserver_is_running(): """ <Purpose> Check to see if the webserver is running <Arguments> None. <Exceptions> None. <Side Effects> None. <Returns> Boolean. True/False: is the webserver running? """ # True if running, false if not. checks via ps out, err, retcode = deploy_main.shellexec2( 'ps -ef | grep deploy_server_final.py | grep -v grep') # if -1, then not running, otherwise it is return out.find('python deploy_server_final.py') > -1
def deploymentscript_is_running(): """ <Purpose> IChecks to see if the deployment scripts (deploy_main.py) are running. <Arguments> None. <Exceptions> None. <Side Effects> None. <Returns> Boolean. True/False: are the scripts running """ # True if running, false if not. checks via ps out, err, retcode = deploy_main.shellexec2( 'ps -ef | grep deploy_main.py | grep -v grep') # -1 if not running, otherwise it is return out.find('python deploy_main.py') > -1
def stop_ssh_scp(): """ <Purpose> Stops all possibly hung ssh/scp processes. <Arguments> None. <Exceptions> None. <Side Effects> Might close the users ssh session. <Returns> None. """ deploy_main.shellexec2("ps -ef | grep 'ssh -T' | awk '{ if ($1 == \"nsr\") print $2 } ' | xargs kill -9") deploy_main.shellexec2("ps -ef | grep 'ssh -x' | awk '{ if ($1 == \"nsr\") print $2 } ' | xargs kill -9") deploy_main.shellexec2("ps -ef | grep 'scp -o' | awk '{ if ($1 == \"nsr\") print $2 } ' | xargs kill -9")
def build_summary(): """ <Purpose> This function collects all the important log files from the subdirectories and outputs them in a summary.log <Arguments> None. <Exceptions> Error opening/creating the log file. <Side Effects> None. <Returns> None. """ sep = '---------------------' uniq_fn, timestamp = deploy_html.generate_uniq_fn() # collect all log files into a summary file summary_fn = 'detailed.'+uniq_fn # directory structure is as follows (for the files we want) # ./deploy.logs/[remote_host]/deployrun.log # ./deploy.logs/[remote_host]/[remote host].deployrun.err.log #try: # make sure that the dir exists if not os.path.isdir('./detailed_logs'): os.mkdir('./detailed_logs') summary_file_handle = open('./detailed_logs/'+summary_fn, 'w') # states map to #s node_states_counter = {} # num of states -> to # of occurences num_node_states = {} # has the following keys: # SU is running -> how many computers have a SU running # NM is running -> how many computers have a NM running # SU -> how may computers have just SU running # NM -> how many comptuers have just NM running # Both SU and NM are running -> how many computers have SU and NM running # none -> how many computer have neither SU nor NM running su_nm_stats_header = ['SU/NM Info', 'Number of Nodes'] su_nm_stats = {} su_nm_stats['SU is running'] = 0 su_nm_stats['NM is running'] = 0 su_nm_stats['Only SU is running'] = 0 su_nm_stats['Only NM is running'] = 0 su_nm_stats['SU/NM are not running'] = 0 su_nm_stats['Both SU and NM are running'] = 0 # will have version that map to # of currently installed node_version_dict = {} # This'll keep track of the # of not installed computers node_version_dict['Not Installed'] = 0 # This'ss kep track of the node ips/hostnames that have seattle missing node_version_dict['Not Installed Node Name'] = [] # this dictionary will be used to build up our html page with all the node # information. the keys to this dictionary are the nodenames, they map to an # array of values which are the values in the table for that node. then # we'll use the deploy_html lib to build up our html tables and write them to the file. html_dict = {} # used as the headers for the table built up in html_dict html_dict_headers = ['Node Name', 'NodeManager Status', 'SoftwareUpdater Status', 'Node Version', 'Node Status', 'Details'] # the html FN that we'll be using # for every folder in the logs directory for logfolder in os.listdir('./deploy.logs'): # each dir should have TWO files (at most), but we only care about one for our # summary file # check that it's a directory. if os.path.isdir('./deploy.logs/'+logfolder): # it's a directory! good! for logfile in os.listdir('./deploy.logs/'+logfolder): # now check that each file until we get a file by the name of # 'deployrun.log' if os.path.isfile('./deploy.logs/'+logfolder+'/'+logfile): # It's a file.. is it the right name? errfn = logfolder+'.deployrun.err.log' if logfile == 'deployrun.log' or logfile == errfn: # Awesome it's the one we want! # the logfolder = the remote host (by ip or hostname) summary_file_handle.write('\nLog from '+logfolder) # make the HTML page. the logfolder is the nodename #deploy_html.html_write('./deploy.logs/'+logfolder+'/'+logfile, logfolder, uniq_fn) logfile_name = './deploy.logs/'+logfolder+'/'+logfile logfile_handle = open(logfile_name, 'r') if not os.path.isdir('./detailed_logs/'+logfolder): os.mkdir('./detailed_logs/'+logfolder) detailed_handle = open('./detailed_logs/'+logfolder+'/'+timestamp, 'a') node_file_as_string = deploy_html.read_whole_file(logfile_handle) final_file_content = deploy_helper.summarize_all_blocks(node_file_as_string) # write to both the files summary_file_handle.write(final_file_content) detailed_handle.write(final_file_content) # create a temp array that we'll use to build up the info, and # then throw in to the html_dict temp_array = [] # now check if the node has seattle installed or not if deploy_stats.check_is_seattle_installed(node_file_as_string): # now we need the NM status NM_success_status, NM_desc_string, bgcolor = deploy_stats.check_is_nm_running(node_file_as_string) if NM_success_status or NM_desc_string.lower().find('not') == -1: su_nm_stats['NM is running'] += 1 temp_array.append((NM_desc_string, bgcolor)) # next we need the SU status SU_success_status, SU_desc_string, bgcolor = deploy_stats.check_is_su_running(node_file_as_string) # if it is running then increment the running counter by 1 if SU_success_status or SU_desc_string.lower().find('not') == -1: su_nm_stats['SU is running'] += 1 temp_array.append((SU_desc_string, bgcolor)) # make sure to record the stats # the not is a hack for the high mem usage which returns false if SU_desc_string.lower().find('not') == -1 or SU_success_status: if NM_desc_string.lower().find('not') == -1 or NM_success_status: # su and nm are running su_nm_stats['Both SU and NM are running'] += 1 else: # only su is running, nm is not su_nm_stats['Only SU is running'] += 1 else: if NM_desc_string.lower().find('not') == -1 or NM_success_status: # only NM is running su_nm_stats['Only NM is running'] += 1 else: # neither is running su_nm_stats['SU/NM are not running'] += 1 # now get the node version success_status, version_string, bgcolor = deploy_stats.get_node_version(node_file_as_string) temp_array.append((version_string, bgcolor)) # keep track of how many of each version/output we have (including errors and upgrades) if version_string not in node_version_dict.keys(): node_version_dict[version_string] = 1 else: node_version_dict[version_string] += 1 # and now the node state try: (success_status, (node_state_array, state_counter), html_color) = deploy_stats.get_node_state(node_file_as_string) except Exception, e: (success_status, (node_state_array, state_counter), html_color) = (False, ([], 0), deploy_html.colors_map['Error']) # the following chunk of code keeps track of how many nodes have X states on them # has # of states | number if str(state_counter) in num_node_states.keys(): # has the key, just get the value and increment by one num_node_states[str(state_counter)] = num_node_states[str(state_counter)] + 1 else: # set it to one, and create the key num_node_states[str(state_counter)] = 1 # this'll be the string we'll dump to the temp_array. node_state_success = '' for each_vessel in node_state_array: # tuple (SuccessState, vesselID, explanation_str) if each_vessel[0]: # success! node_state_success += str(each_vessel[2])+',' summary_file_handle.write('\nVessel state:\t'+str(each_vessel[1])+':'+str(each_vessel[2])) detailed_handle.write('\nVessel state:\t'+str(each_vessel[1])+':'+str(each_vessel[2])) # This next chunk of code keeps track of what states each nodes are in and how many we have # in that particular state if str(each_vessel[2]) in node_states_counter.keys(): node_states_counter[str(each_vessel[2])] = node_states_counter[str(each_vessel[2])] + 1 else: node_states_counter[str(each_vessel[2])] = 1 else: summary_file_handle.write('\nVessel state:\t'+str(each_vessel[1])+':'+str(each_vessel[2])) # don't write the detailed log if we fail. detailed_handle.write('\n') summary_file_handle.write('\n') if state_counter == 1: temp_array.append((node_state_success[0:-1], deploy_html.colors_map['Success'])) else: if state_counter == 0: if node_state_array: # if the array isn't null we have some msg to print, otherwise it's an error temp_array.append((node_state_array[0], deploy_html.colors_map['Error'])) else: temp_array.append(('Did not get vesseldict', deploy_html.colors_map['Error'])) # no keys on the node, print the human-friendly version (also could be an unknown key) #temp_array.append(('No node-state keys found', deploy_html.colors_map['Error'])) #else: # state_counter > 1: #temp_array.append(('Multiple states on node!', deploy_html.colors_map['Error'])) # end getting the node state here else: # no seattle installed! temp_array = ['', '', '', ('Seattle is not installed', deploy_html.colors_map['Warning'])] node_version_dict['Not Installed'] = node_version_dict['Not Installed'] + 1 # mark the node as not having seattle installed, we'll write a # file that'll have all the missing seattle installs on the nodes # also, logfolder is the name of the node. node_version_dict['Not Installed Node Name'].append(logfolder) html_link = deploy_html.make_link_to_detailed(logfolder, uniq_fn) temp_array.append(html_link) # add what we have to the html_dict html_dict[logfolder] = temp_array if os.path.isfile('./deploy.logs/controller.log'): deploy_main.shellexec2('cp ./deploy.logs/controller.log ./detailed_logs/controller.'+timestamp) if os.path.isfile('./deploy.logs/deploy.err.log'): deploy_main.shellexec2('cp ./deploy.logs/deploy.err.log ./detailed_logs/deploy.err.'+timestamp) logfile_handle.close() detailed_handle.close() summary_file_handle.write('\n'+sep+'\n')
def build_summary(): """ <Purpose> This function collects all the important log files from the subdirectories and outputs them in a summary.log <Arguments> None. <Exceptions> Error opening/creating the log file. <Side Effects> None. <Returns> None. """ sep = '---------------------' uniq_fn, timestamp = deploy_html.generate_uniq_fn() # collect all log files into a summary file summary_fn = 'detailed.' + uniq_fn # directory structure is as follows (for the files we want) # ./deploy.logs/[remote_host]/deployrun.log # ./deploy.logs/[remote_host]/[remote host].deployrun.err.log #try: # make sure that the dir exists if not os.path.isdir('./detailed_logs'): os.mkdir('./detailed_logs') summary_file_handle = open('./detailed_logs/' + summary_fn, 'w') # states map to #s node_states_counter = {} # num of states -> to # of occurences num_node_states = {} # has the following keys: # SU is running -> how many computers have a SU running # NM is running -> how many computers have a NM running # SU -> how may computers have just SU running # NM -> how many comptuers have just NM running # Both SU and NM are running -> how many computers have SU and NM running # none -> how many computer have neither SU nor NM running su_nm_stats_header = ['SU/NM Info', 'Number of Nodes'] su_nm_stats = {} su_nm_stats['SU is running'] = 0 su_nm_stats['NM is running'] = 0 su_nm_stats['Only SU is running'] = 0 su_nm_stats['Only NM is running'] = 0 su_nm_stats['SU/NM are not running'] = 0 su_nm_stats['Both SU and NM are running'] = 0 # will have version that map to # of currently installed node_version_dict = {} # This'll keep track of the # of not installed computers node_version_dict['Not Installed'] = 0 # This'ss kep track of the node ips/hostnames that have seattle missing node_version_dict['Not Installed Node Name'] = [] # this dictionary will be used to build up our html page with all the node # information. the keys to this dictionary are the nodenames, they map to an # array of values which are the values in the table for that node. then # we'll use the deploy_html lib to build up our html tables and write them to the file. html_dict = {} # used as the headers for the table built up in html_dict html_dict_headers = [ 'Node Name', 'NodeManager Status', 'SoftwareUpdater Status', 'Node Version', 'Node Status', 'Details' ] # the html FN that we'll be using # for every folder in the logs directory for logfolder in os.listdir('./deploy.logs'): # each dir should have TWO files (at most), but we only care about one for our # summary file # check that it's a directory. if os.path.isdir('./deploy.logs/' + logfolder): # it's a directory! good! for logfile in os.listdir('./deploy.logs/' + logfolder): # now check that each file until we get a file by the name of # 'deployrun.log' if os.path.isfile('./deploy.logs/' + logfolder + '/' + logfile): # It's a file.. is it the right name? errfn = logfolder + '.deployrun.err.log' if logfile == 'deployrun.log' or logfile == errfn: # Awesome it's the one we want! # the logfolder = the remote host (by ip or hostname) summary_file_handle.write('\nLog from ' + logfolder) # make the HTML page. the logfolder is the nodename #deploy_html.html_write('./deploy.logs/'+logfolder+'/'+logfile, logfolder, uniq_fn) logfile_name = './deploy.logs/' + logfolder + '/' + logfile logfile_handle = open(logfile_name, 'r') if not os.path.isdir('./detailed_logs/' + logfolder): os.mkdir('./detailed_logs/' + logfolder) detailed_handle = open( './detailed_logs/' + logfolder + '/' + timestamp, 'a') node_file_as_string = deploy_html.read_whole_file( logfile_handle) final_file_content = deploy_helper.summarize_all_blocks( node_file_as_string) # write to both the files summary_file_handle.write(final_file_content) detailed_handle.write(final_file_content) # create a temp array that we'll use to build up the info, and # then throw in to the html_dict temp_array = [] # now check if the node has seattle installed or not if deploy_stats.check_is_seattle_installed( node_file_as_string): # now we need the NM status NM_success_status, NM_desc_string, bgcolor = deploy_stats.check_is_nm_running( node_file_as_string) if NM_success_status or NM_desc_string.lower( ).find('not') == -1: su_nm_stats['NM is running'] += 1 temp_array.append((NM_desc_string, bgcolor)) # next we need the SU status SU_success_status, SU_desc_string, bgcolor = deploy_stats.check_is_su_running( node_file_as_string) # if it is running then increment the running counter by 1 if SU_success_status or SU_desc_string.lower( ).find('not') == -1: su_nm_stats['SU is running'] += 1 temp_array.append((SU_desc_string, bgcolor)) # make sure to record the stats # the not is a hack for the high mem usage which returns false if SU_desc_string.lower().find( 'not') == -1 or SU_success_status: if NM_desc_string.lower().find( 'not') == -1 or NM_success_status: # su and nm are running su_nm_stats[ 'Both SU and NM are running'] += 1 else: # only su is running, nm is not su_nm_stats['Only SU is running'] += 1 else: if NM_desc_string.lower().find( 'not') == -1 or NM_success_status: # only NM is running su_nm_stats['Only NM is running'] += 1 else: # neither is running su_nm_stats['SU/NM are not running'] += 1 # now get the node version success_status, version_string, bgcolor = deploy_stats.get_node_version( node_file_as_string) temp_array.append((version_string, bgcolor)) # keep track of how many of each version/output we have (including errors and upgrades) if version_string not in node_version_dict.keys(): node_version_dict[version_string] = 1 else: node_version_dict[version_string] += 1 # and now the node state try: (success_status, (node_state_array, state_counter), html_color) = deploy_stats.get_node_state( node_file_as_string) except Exception, e: (success_status, (node_state_array, state_counter), html_color) = (False, ( [], 0), deploy_html.colors_map['Error']) # the following chunk of code keeps track of how many nodes have X states on them # has # of states | number if str(state_counter) in num_node_states.keys(): # has the key, just get the value and increment by one num_node_states[str( state_counter )] = num_node_states[str(state_counter)] + 1 else: # set it to one, and create the key num_node_states[str(state_counter)] = 1 # this'll be the string we'll dump to the temp_array. node_state_success = '' for each_vessel in node_state_array: # tuple (SuccessState, vesselID, explanation_str) if each_vessel[0]: # success! node_state_success += str( each_vessel[2]) + ',' summary_file_handle.write( '\nVessel state:\t' + str(each_vessel[1]) + ':' + str(each_vessel[2])) detailed_handle.write('\nVessel state:\t' + str(each_vessel[1]) + ':' + str(each_vessel[2])) # This next chunk of code keeps track of what states each nodes are in and how many we have # in that particular state if str(each_vessel[2] ) in node_states_counter.keys(): node_states_counter[str( each_vessel[2] )] = node_states_counter[str( each_vessel[2])] + 1 else: node_states_counter[str( each_vessel[2])] = 1 else: summary_file_handle.write( '\nVessel state:\t' + str(each_vessel[1]) + ':' + str(each_vessel[2])) # don't write the detailed log if we fail. detailed_handle.write('\n') summary_file_handle.write('\n') if state_counter == 1: temp_array.append( (node_state_success[0:-1], deploy_html.colors_map['Success'])) else: if state_counter == 0: if node_state_array: # if the array isn't null we have some msg to print, otherwise it's an error temp_array.append( (node_state_array[0], deploy_html.colors_map['Error'])) else: temp_array.append( ('Did not get vesseldict', deploy_html.colors_map['Error'])) # no keys on the node, print the human-friendly version (also could be an unknown key) #temp_array.append(('No node-state keys found', deploy_html.colors_map['Error'])) #else: # state_counter > 1: #temp_array.append(('Multiple states on node!', deploy_html.colors_map['Error'])) # end getting the node state here else: # no seattle installed! temp_array = [ '', '', '', ('Seattle is not installed', deploy_html.colors_map['Warning']) ] node_version_dict[ 'Not Installed'] = node_version_dict[ 'Not Installed'] + 1 # mark the node as not having seattle installed, we'll write a # file that'll have all the missing seattle installs on the nodes # also, logfolder is the name of the node. node_version_dict[ 'Not Installed Node Name'].append(logfolder) html_link = deploy_html.make_link_to_detailed( logfolder, uniq_fn) temp_array.append(html_link) # add what we have to the html_dict html_dict[logfolder] = temp_array if os.path.isfile('./deploy.logs/controller.log'): deploy_main.shellexec2( 'cp ./deploy.logs/controller.log ./detailed_logs/controller.' + timestamp) if os.path.isfile('./deploy.logs/deploy.err.log'): deploy_main.shellexec2( 'cp ./deploy.logs/deploy.err.log ./detailed_logs/deploy.err.' + timestamp) logfile_handle.close() detailed_handle.close() summary_file_handle.write('\n' + sep + '\n')