def stop(self): """ Public method to stop daemon. """ if self.status(True) == False: #make sure it is running sys.stdout.write('%s is not running\n' % self.daemon_name) return pid = self.get_pid() #get pid from the file try: while 1: #send SIGTERM to pid until we get a exception os.kill(pid, signal.SIGTERM) time.sleep(0.5) except OSError as err: err = unicode(err) if err.find('No such process') > 0: #check whether system reported unknown process if os.path.exists(self.pidfile): try: os.remove(self.pidfile) #remove pid file except Exception as e: sys.stderr.write(unicode(e) + '\n') sys.exit(exit_status.AGENT_ERR_FAILED_PID_FILE) sys.stdout.write('%s stopped successfully\n' % self.daemon_name) else: sys.stderr.write(err + '\n') sys.exit(exit_status.AGENT_ERR_FAILED_TERMINATE)
def run(self): """ Method runs in the daemon. """ try: #monit.sh to restart agent if it is killed self.monit_pid = subprocess.Popen([exe_path + 'bin/monit.sh', unicode(os.getpid()), '%d' % self.monit_interval], preexec_fn = os.setpgrp).pid except Exception as e: _log.error('Failed to open monitoring script; %s' % unicode(e)) self.set_procname('sealiond') #set process name for display purpose is_update_only_mode = False crash_dump_details = self.get_crash_dump_details() #get crash dump details helper.terminatehook = self.termination_hook #set the termination hook called whenever agent shutdown disgracefully if crash_dump_details[1] > 0: #start thread to send crash dump _log.info('Found %d dumps' % crash_dump_details[1]) ThreadEx(target = self.send_crash_dumps, name = 'CrashDumpSender').start() if crash_dump_details[0] == True: #crash loop detected. start agent in update only mode _log.info('Crash loop detected; Starting agent in update-only mode') is_update_only_mode = True import main main.run(is_update_only_mode) #start executing agent
def stop(self): """ Public method to stop daemon. """ if self.get_status() == False: #make sure it is running sys.stdout.write('%s is not running\n' % self.daemon_name) return pid = self.get_pid() #get pid from the file try: while 1: #send SIGTERM to pid until we get a exception os.kill(pid, signal.SIGTERM) time.sleep(0.5) except OSError as e: err = unicode(e) if err.find('No such process') > 0: #check whether system reported unknown process sys.stdout.write('%s stopped successfully\n' % self.daemon_name) else: sys.stderr.write(err + '\n') sys.exit(exit_status.AGENT_ERR_FAILED_TERMINATE) except Exception as e: sys.stderr.write(unicode(e) + '\n') sys.exit(exit_status.AGENT_ERR_FAILED_TERMINATE)
def initialize(self): """ Method to perform some tasks before daemonizing. The idea is to throw any error before daemonizing. """ try: user = pwd.getpwnam(self.user_name) #get the pwd db entry for the user name #if it is not sealion user, then we need to change user and group #if current user is not super user, trying to change the user/group id will raise exception if user.pw_uid != os.getuid(): os.setgroups([]) #leave any effective groups os.setgid(user.pw_gid) #set group id os.setuid(user.pw_uid) #set user id os.environ['HOME'] = '/' #reset the environment except KeyError as e: sys.stderr.write('Failed to find user %s; %s\n' % (self.user_name, unicode(e))) sys.exit(exit_status.AGENT_ERR_FAILED_FIND_USER) except Exception as e: sys.stderr.write('Failed to change the group or user to %s; %s\n' % (self.user_name, unicode(e))) sys.exit(exit_status.AGENT_ERR_FAILED_CHANGE_GROUP_OR_USER) try: #try to create pid file helper.Utils.get_safe_path(self.pidfile) f = open(self.pidfile, 'w'); f.close() except Exception as e: sys.stderr.write(unicode(e) + '\n') sys.exit(exit_status.AGENT_ERR_FAILED_PID_FILE) sys.excepthook = self.exception_hook #set the exception hook so that we can generate crash dumps import main #import main module so that we get any error before daemonizing
def run(self): """ Method runs in the daemon. """ try: #monit.sh to restart agent if it is killed self.monit_pid = subprocess.Popen([ exe_path + 'bin/monit.sh', unicode(os.getpid()), '%d' % self.monit_interval ], preexec_fn=os.setpgrp).pid except Exception as e: _log.error('Failed to open monitoring script; %s' % unicode(e)) self.set_procname('sealiond') #set process name for display purpose is_update_only_mode = False crash_dump_details = self.get_crash_dump_details( ) #get crash dump details helper.terminatehook = self.termination_hook #set the termination hook called whenever agent shutdown disgracefully if crash_dump_details[1] > 0: #start thread to send crash dump _log.info('Found %d dumps' % crash_dump_details[1]) ThreadEx(target=self.send_crash_dumps, name='CrashDumpSender').start() if crash_dump_details[ 0] == True: #crash loop detected. start agent in update only mode _log.info( 'Crash loop detected; Starting agent in update-only mode') is_update_only_mode = True import main main.run(is_update_only_mode) #start executing agent
def main(directory, command_interval): signal.signal( signal.SIGQUIT, sigquit_handler) #install SIGQUIT handler so that the program can stop sys.stderr = sys.stdout #as we are using log module and we want the output to be in stdout, redirect logging.basicConfig(level=logging.DEBUG, format='%(message)s') service.set_user() #set the user and group for the process univ, seperator = universal.Universal(), '\n' #export the environment variables os.environ.update( univ.config.sealion.get_dict((['config', 'envVariables'], {}))['envVariables']) os.environ.update(univ.config.sealion.get_dict(('env', {}))['env']) os.environ.update({'COMMAND_INTERVAL': unicode(command_interval)}) try: os.chdir(os.path.realpath(directory)) log.debug( 'SIGQUIT(Ctrl-\\) to exit; SIGINT(Ctrl-C) to abort current operation' ) log.debug('Working directory: %s' % os.path.realpath(directory)) #loop through the content of the directory for activity in os.listdir('./'): try: #consider only *.sh files if activity[-3:] != '.sh' or not os.path.isfile(activity): continue seperator and log.debug(seperator) output, status = execute(activity) #execute and get the output if not output: continue metrics = {} #loop through the contents of the metric folder for the activity for metric in os.listdir(activity[:-3]): try: #consider only *.py files if metric[-3:] != '.py': continue #read the parser code from the file with open(activity[:-3] + '/' + metric) as f: metrics[metric] = {'parser': f.read()} except: pass extract.extract_metrics(output, status, metrics, activity) #extract metrics seperator = '%s\n' % ('_' * 50) except: pass except Exception as e: log.error('Error: %s', unicode(e))
def set_user(default_user_name='sealion'): """ Function to set the user and group for the process Args: default_user_name: the default user name to be used in case no users found in the config Returns: The user name for the process """ try: user_regex = universal.SealionConfig.schema['user'].get( 'regex') #get the regex used for validation #read the user name from the config f = open(exe_path + '/etc/config.json', 'r') user_name = json.load(f)['user'] f.close() #update the user name if it is valid if not user_regex or re.match(user_regex, user_name): default_user_name = user_name except: pass try: user = pwd.getpwnam( default_user_name) #get the pwd db entry for the user name #if it is not sealion user, then we need to change user and group #if current user is not super user, trying to change the user/group id will raise exception if user.pw_uid != os.getuid(): #find all the groups where the user a member groups = [ group.gr_gid for group in grp.getgrall() if user.pw_name in group.gr_mem and user.pw_gid != group.gr_gid ] os.setgroups(groups) #set the suplimentary groups os.setgid(user.pw_gid) #set group id os.setuid(user.pw_uid) #set user id except KeyError as e: sys.stderr.write('Failed to find user %s; %s\n' % (default_user_name, unicode(e))) sys.exit(exit_status.AGENT_ERR_FAILED_FIND_USER) except Exception as e: sys.stderr.write('Failed to change the group or user to %s; %s\n' % (default_user_name, unicode(e))) sys.exit(exit_status.AGENT_ERR_FAILED_CHANGE_GROUP_OR_USER) return default_user_name
def extract_metrics(output, return_code, metrics, job): """ Function to extract the metrics from the output given Args: output: command output to be parsed return_code: return code of the command metrics: the metrics to be extracted job: name of the job for which the output is produced, for logging purpose Returns: dict representing the metrics extracted """ #create the context for executing the parser code #also inject the text processing modules context = {'__builtins__': globals()['__builtins__']} context.update(text_modules) #valid types for the value extracted and a variable holding the final metrics valid_types, ret = ['int', 'float'], {} for metric_id in metrics: #set the context context['command_output'] = output context['command_return_code'] = return_code context['metric_value'] = None log_debug('Extracting metric %s from %s' % (metric_id, job)) #set the alarm to signal after the mentioned timeout which in turn raises an exception _timeout > 0 and signal.alarm(_timeout) try: #execute the code in the context created exec(metrics[metric_id]['parser'], context) #get the value value = context.get('metric_value') #raise the exception if it is not a valid type if type(value).__name__ not in valid_types: raise TypeError('metric_value should be %s' % ' or '.join(valid_types)) ret[metric_id] = value log_debug('Extracted metric_value %s for metric %s from %s' % (value, metric_id, job)) except: log_error('Failed to extract metric %s from %s; %s' % (metric_id, job, unicode(sys.exc_info()[1]))) _timeout > 0 and signal.alarm(0) #reset the alarm return ret
def send_crash_dumps(self): """ Method to send all crash dumps to server. This method runs in a seperate thread. """ import api univ = Universal() #get Universal #how much time the crash dump sender wait before start sending. #this is required not to affect crash loop detection, since crash loop detection is done by checking number crash dumps generated in a span of time crash_dump_timeout = (self.crash_loop_count * self.monit_interval) + 10 #get the agent version regex to differentiate dumps from any other file agent_version_regex = univ.config.agent.schema['agentVersion'].get('regex', '.*') agent_version_regex = re.sub('^\^?([^\$]+)\$?$', '\g<1>', agent_version_regex) _log.debug('CrashDumpSender waiting for stop event for %d seconds' % crash_dump_timeout) univ.stop_event.wait(crash_dump_timeout) try: for file in os.listdir(self.crash_dump_path): #loop though files in the crash dump directory file_name = self.crash_dump_path + file #is this a valid crash dump filename if os.path.isfile(file_name) and re.match('^sealion-%s-[0-9]+\.dmp$' % agent_version_regex, file) != None: report = None while 1: if univ.stop_event.is_set(): #do we need to stop now _log.debug('CrashDumpSender received stop event') return #read the report from the dump, or retry the report report = report if report != None else self.read_dump(file_name) if report == None or api.is_not_connected(api.unauth_session.send_crash_report(report)) == False: #send the dump break _log.debug('CrashDumpSender waiting for stop event for 10 seconds') univ.stop_event.wait(10) #on failure, wait for some time try: os.remove(file_name) #remove the dump as we sent it _log.info('Removed dump %s' % file_name) except Exception as e: _log.error('Failed to remove dump %s; %s' % (file_name, unicode(e))) if univ.stop_event.is_set(): #do we need to stop now _log.debug('CrashDumpSender received stop event') return except: pass
def set_procname(self, proc_name = None): """ Method to set the process name to show in 'top' command output. """ proc_name = proc_name if proc_name else self.daemon_name try: from ctypes import cdll, byref, create_string_buffer libc = cdll.LoadLibrary('libc.so.6') buff = create_string_buffer(len(proc_name) + 1) buff.value = proc_name.encode('utf-8') libc.prctl(15, byref(buff), 0, 0, 0) except Exception as e: _log.error('Failed to set process name; %s' % unicode(e))
def initialize(self): """ Method to perform some tasks before daemonizing. The idea is to throw any error before daemonizing. """ try: user = pwd.getpwnam( self.user_name) #get the pwd db entry for the user name #if it is not sealion user, then we need to change user and group #if current user is not super user, trying to change the user/group id will raise exception if user.pw_uid != os.getuid(): os.setgroups([]) #leave any effective groups os.setgid(user.pw_gid) #set group id os.setuid(user.pw_uid) #set user id os.environ['HOME'] = '/' #reset the environment except KeyError as e: sys.stderr.write('Failed to find user %s; %s\n' % (self.user_name, unicode(e))) sys.exit(exit_status.AGENT_ERR_FAILED_FIND_USER) except Exception as e: sys.stderr.write('Failed to change the group or user to %s; %s\n' % (self.user_name, unicode(e))) sys.exit(exit_status.AGENT_ERR_FAILED_CHANGE_GROUP_OR_USER) try: #try to create pid file helper.Utils.get_safe_path(self.pidfile) f = open(self.pidfile, 'w') f.close() except Exception as e: sys.stderr.write(unicode(e) + '\n') sys.exit(exit_status.AGENT_ERR_FAILED_PID_FILE) sys.excepthook = self.exception_hook #set the exception hook so that we can generate crash dumps import main #import main module so that we get any error before daemonizing
def set_procname(self, proc_name=None): """ Method to set the process name to show in 'top' command output. """ proc_name = proc_name if proc_name else self.daemon_name try: from ctypes import cdll, byref, create_string_buffer libc = cdll.LoadLibrary('libc.so.6') buff = create_string_buffer(len(proc_name) + 1) buff.value = proc_name.encode('utf-8') libc.prctl(15, byref(buff), 0, 0, 0) except Exception as e: _log.error('Failed to set process name; %s' % unicode(e))
def initialize(self): """ Method to perform some tasks before daemonizing. The idea is to throw any error before daemonizing. """ set_user() #set the user and group for the current process try: #try to create pid file helper.Utils.get_safe_path(self.pidfile) f = open(self.pidfile, 'w') f.close() except Exception as e: sys.stderr.write(unicode(e) + '\n') sys.exit(exit_status.AGENT_ERR_FAILED_PID_FILE) import main #import main module so that we get any error before daemonizing sys.excepthook = self.exception_hook #set the exception hook so that we can generate crash dumps
except Exception as e: log.error('Error: %s', unicode(e)) try: options, args = getopt.getopt(sys.argv[1:], 'i:h', ['interval=', 'version', 'help']) command_interval = 60 #default command interval for option, arg in options: if option == '--version': version_info.print_version() and sys.exit(0) elif option in ['-i', '--interval']: command_interval = int(arg) elif option in ['-h', '--help']: usage(True) and sys.exit(0) directory = args[-1].strip() if args else '' except getopt.GetoptError as e: sys.stderr.write(unicode(e).capitalize() + '\n') #missing option value usage() and sys.exit(1) except Exception as e: sys.stderr.write('Error: ' + unicode(e) + '\n') sys.exit(1) if not directory: #no directory specified sys.stderr.write('Please specify a directory to test\n') usage() and sys.exit(1) main(directory, command_interval)
import sys ERR_SUCCESS = 0 ERR_INCOMPATIBLE_PYTHON = 2 ERR_FAILED_DEPENDENCY = 3 #Python version check. SeaLion agent works only with Python version >= 2.6 if float('%d.%d' % (sys.version_info[0], sys.version_info[1])) < 2.6: sys.stderr.write('SeaLion agent requires python version 2.6 or above\n') sys.exit(ERR_INCOMPATIBLE_PYTHON) try: import os.path except Exception: e = sys.exc_info()[1] sys.stderr.write(unicode(e) + '\n') sys.exit(ERR_FAILED_DEPENDENCY) #add module lookup paths to sys.path so that import can find them #we are inserting at the begining of sys.path so that we can be sure that we are importing the right module exe_path = os.path.dirname(os.path.realpath(__file__)).rsplit('/', 1)[0] sys.path.insert(0, exe_path + '/lib/socketio_client') sys.path.insert(0, exe_path + '/lib/websocket_client') sys.path.insert(0, exe_path + '/lib') from constructs import unicode error = False #any errors #modules to be checked for #a module can provide alternative modules by enclosing them in a list modules = [
elif option in ['-f', '--file']: #environment variable description with open(arg) as f: env_vars_count += read_env_vars(f, env_vars) elif option == '--restart': restart_agent = True elif option == '--version': version_info.print_version() and sys.exit(0) elif option in ['-h', '--help']: usage(True) and sys.exit(0) if not env_vars_count: sys.stderr.write( 'Please specify the environment variables to configure\n') usage() and sys.exit(1) except getopt.GetoptError as e: sys.stderr.write(unicode(e).capitalize() + '\n') #missing option value usage() and sys.exit(1) except (KeyboardInterrupt, EOFError): sys.stdout.write('\n') sys.exit(0) except Exception as e: sys.stderr.write('Error: ' + unicode(e) + '\n') #any other exception sys.exit(1) try: #perform the action JSONfig.perform(filename=exe_path + '/etc/config.json', action='set', keys='env', value=json.dumps(env_vars), pretty_print=True)
elif arg == '-h': sys.stdout.write(usage) sys.exit(0) else: #anything else is considered as url to fetch url = arg.strip() if url: url = url if re.match('https?://.*', url) else 'http://' + url #default to http scheme if no scheme specified urls.append(url) i += 1 #next option except IndexError: sys.stderr.write('Error: %s requires an argument\n%s' % (sys.argv[i - 1], usage)) #missing option value sys.exit(1) except Exception as e: sys.stderr.write('Error: ' + unicode(e) + '\n') sys.exit(1) if len(urls) == 0: #no urls specified sys.stderr.write('Error: please specify atleast one URL\n%s' % usage) sys.exit(1) try: f = sys.stdout if not output_file else open(output_file, 'wb') #if no output file is specified, write to stdout for url in urls: #fetch all urls response = method(url, **kwargs) #retreive data chunkwise and write it to file for chunk in response.iter_content(chunk_size = 1024): if chunk:
env_vars_count += 1 elif option in ['-f', '--file']: #environment variable description with open(arg) as f: env_vars_count += read_env_vars(f, env_vars) elif option == '--restart': restart_agent = True elif option == '--version': version_info.print_version() and sys.exit(0) elif option in ['-h', '--help']: usage(True) and sys.exit(0) if not env_vars_count: sys.stderr.write('Please specify the environment variables to configure\n') usage() and sys.exit(1) except getopt.GetoptError as e: sys.stderr.write(unicode(e).capitalize() + '\n') #missing option value usage() and sys.exit(1) except (KeyboardInterrupt, EOFError): sys.stdout.write('\n'); sys.exit(0) except Exception as e: sys.stderr.write('Error: ' + unicode(e) + '\n') #any other exception sys.exit(1) try: #perform the action JSONfig.perform(filename = exe_path + '/etc/config.json', action = 'set', keys = 'env', value = json.dumps(env_vars), pretty_print = True) except KeyError as e: sys.stderr.write('Error: unknown key ' + unicode(e) + '\n') sys.exit(1) except Exception as e:
elif option in ['-a', '--action']: #operation to be performed; default to 'get' if arg in JSONfig.actions: action = arg #set the action else: sys.stderr.write('Uknown argument \'%s\' for %s\n' % (arg, option)) #unknown action usage() and sys.exit(1) elif option == '--version': version_info.print_version() and sys.exit(0) elif option in ['-h', '--help']: usage(True) and sys.exit(0) else: #anything else is considered as the file to read filename = arg.strip() filename = args[-1] if args else filename except getopt.GetoptError as e: sys.stderr.write(unicode(e).capitalize() + '\n') #missing option value usage() and sys.exit(1) except Exception as e: sys.stderr.write('Error: ' + unicode(e) + '\n') #any other exception sys.exit(1) if keys == None: #no keys specified for the operation sys.stderr.write('Please specify a key\n') usage() and sys.exit(1) if value == None and action not in ['get', 'delete']: #any action other than get and del requires a value to be set sys.stderr.write('Please specify a value to %s\n' % action) usage() and sys.exit(1) if not filename: #no file specified sys.stderr.write('Please specify a file to read/write\n')
def send_crash_dumps(self): """ Method to send all crash dumps to server. This method runs in a seperate thread. """ import api univ = universal.Universal() #get Universal #how much time the crash dump sender wait before start sending. #this is required not to affect crash loop detection, since crash loop detection is done by checking number crash dumps generated in a span of time crash_dump_timeout = (self.crash_loop_count * self.crash_loop_timeout) + 10 _log.debug('CrashDumpSender waiting for stop event for %d seconds' % crash_dump_timeout) univ.stop_event.wait(crash_dump_timeout) try: for file in os.listdir( self.crash_dump_path ): #loop though files in the crash dump directory file_name = self.crash_dump_path + file #is this a valid crash dump filename if os.path.isfile(file_name) and re.match( self.crash_dump_pattern % self.agent_version_regex, file) != None: report = None while 1: if univ.stop_event.is_set(): #do we need to stop now _log.debug('CrashDumpSender received stop event') return #read the report from the dump, or retry the report report = report if report != None else self.read_dump( file_name) if report == None or api.is_not_connected( api.unauth_session.send_crash_report( report)) == False: #send the dump break _log.debug( 'CrashDumpSender waiting for stop event for 10 seconds' ) univ.stop_event.wait( 10) #on failure, wait for some time try: os.remove(file_name) #remove the dump as we sent it _log.info('Removed dump %s' % file_name) except Exception as e: _log.error('Failed to remove dump %s; %s' % (file_name, unicode(e))) if univ.stop_event.is_set(): #do we need to stop now _log.debug('CrashDumpSender received stop event') return except: pass
if arg in JSONfig.actions: action = arg #set the action else: sys.stderr.write('Uknown argument \'%s\' for %s\n' % (arg, option)) #unknown action usage() and sys.exit(1) elif option == '--version': version_info.print_version() and sys.exit(0) elif option in ['-h', '--help']: usage(True) and sys.exit(0) else: #anything else is considered as the file to read filename = arg.strip() filename = args[-1] if args else filename except getopt.GetoptError as e: sys.stderr.write(unicode(e).capitalize() + '\n') #missing option value usage() and sys.exit(1) except Exception as e: sys.stderr.write('Error: ' + unicode(e) + '\n') #any other exception sys.exit(1) if keys == None: #no keys specified for the operation sys.stderr.write('Please specify a key\n') usage() and sys.exit(1) if value == None and action not in [ 'get', 'delete' ]: #any action other than get and del requires a value to be set sys.stderr.write('Please specify a value to %s\n' % action) usage() and sys.exit(1)
else: #anything else is considered as url to fetch url = arg.strip() if url: url = url if re.match( 'https?://.*', url ) else 'http://' + url #default to http scheme if no scheme specified urls.append(url) i += 1 #next option except IndexError: sys.stderr.write('Error: %s requires an argument\n%s' % (sys.argv[i - 1], usage)) #missing option value sys.exit(1) except Exception as e: sys.stderr.write('Error: ' + unicode(e) + '\n') sys.exit(1) if len(urls) == 0: #no urls specified sys.stderr.write('Error: please specify atleast one URL\n%s' % usage) sys.exit(1) try: f = sys.stdout if not output_file else open( output_file, 'wb') #if no output file is specified, write to stdout for url in urls: #fetch all urls response = method(url, **kwargs) #retreive data chunkwise and write it to file for chunk in response.iter_content(chunk_size=1024):