def finish(self): ctr=300 if self._config: ctr = self._config._cleanup_time self.info('Program exit: waiting %d [sec] for all projects to end...' % ctr) ready = False while not ready and ctr: ready = True for p in self._project_v.values(): if p.active(): ready = False break time.sleep(1) ctr -= 1 # string containing message to report message = '' if ctr: message = 'All projects finished gracefully.' self.info(message) else: message = 'There are still un-finished projects... killing them now.' self.warning(message) for x in self._project_v: self.warning('killing %s' % x) self._project_v[x].kill() try: d_msg.email( self.__class__.__name__, subject = 'Daemon ended!', text = 'Daemon has ended. The following message was produced:\n%s' % message) except BaseException as be: self._logger.critical('Project %s error could not be reported via email!' % self._info._project) for line in be.v.split('\n'): self._logger.error(line)
def log_daemon(self): uptime = int(time.time() - self._creation_ts) proj_ctr = 0 for x in self._project_v.values(): if x.active(): proj_ctr += 1 hstore = dict() if self._logger_func: hstore = self._logger_func() self._log.log(proj_ctr=proj_ctr, uptime=uptime, log=hstore, max_proj_ctr=self._config._max_proj_ctr, lifetime=self._config._lifetime) self._api.log_daemon(self._log) if self._server_handler: status, msg = self._server_handler() if msg: if not status: d_msg.email( self.__class__.__name__, subject= '[DAEMON SHUT DOWN] Message from Server Handler', text=msg) else: d_msg.email(self.__class__.__name__, subject='[NOTICE] Message from Server Handler', text=msg) self._exit_routine = not status
def log_daemon(self): uptime = int(time.time() - self._creation_ts) proj_ctr = 0 for x in self._project_v.values(): if x.active(): proj_ctr += 1 hstore = dict() if self._logger_func: hstore = self._logger_func() self._log.log( proj_ctr = proj_ctr, uptime = uptime, log = hstore, max_proj_ctr = self._config._max_proj_ctr, lifetime = self._config._lifetime ) self._api.log_daemon(self._log) if self._server_handler: status,msg = self._server_handler() if msg: if not status: d_msg.email( self.__class__.__name__, subject = '[DAEMON SHUT DOWN] Message from Server Handler', text = msg) else: d_msg.email( self.__class__.__name__, subject = '[NOTICE] Message from Server Handler', text = msg) self._exit_routine = not status
def finish(self): ctr = 300 if self._config: ctr = self._config._cleanup_time self.info('Program exit: waiting %d [sec] for all projects to end...' % ctr) ready = False while not ready and ctr: ready = True for p in self._project_v.values(): if p.active(): ready = False break time.sleep(1) ctr -= 1 # string containing message to report message = '' if ctr: message = 'All projects finished gracefully.' self.info(message) else: message = 'There are still un-finished projects... killing them now.' self.warning(message) for x in self._project_v: self.warning('killing %s' % x) self._project_v[x].kill() try: d_msg.email( self.__class__.__name__, subject='Daemon ended!', text='Daemon has ended. The following message was produced:\n%s' % message) except BaseException as be: self._logger.critical( 'Project %s error could not be reported via email!' % self._info._project) for line in be.v.split('\n'): self._logger.error(line)
def routine(self): routine_ctr = 0 routine_sleep = 0 while routine_ctr >= 0 and not self._exit_routine: routine_ctr += 1 time.sleep(1) now_str = time.strftime('%Y-%m-%d %H:%M:%S') now_ts = time.time() self.debug(now_str) # If sleep is set, do nothing and continue if routine_sleep: routine_sleep -= 1 continue try: self._api.connect() if not self._config: self.load_daemon() try: msg = 'Daemon has started @ %s\n' % self._config._server msg += str(self._config) d_msg.email(self.__class__.__name__, subject='Daemon started!', text=msg) except BaseException as be: self._logger.critical( 'Project %s error could not be reported via email!' % self._info._project) for line in be.v.split('\n'): self._logger.error(line) except DBException as e: self.error( 'Failed connection to DB @ %s ... Retry in 1 minute' % now_str) routine_sleep = 60 d_msg.email(self.__class__.__name__, subject='Daemon Error', text='Failed to establish DB connection @ %s' % now_str) continue # Exit if time exceeds the daemon lifetime if self._config and (now_ts - self._creation_ts) > self._config._lifetime: self.warning('Exceeded pre-defined lifetime. Exiting...') break if (routine_ctr - 1) % self._config._update_time == 0: self.info('Routine project update @ %s ' % now_str) if self._api.is_cursor_connected() is None: self._api.connect() #else: # self._api.reconnect() if not self._api.is_cursor_connected(): d_msg.email(self.__class__.__name__, subject='Daemon Error', text='Failed to establish DB connection @ %s' % now_str) continue if (routine_ctr - 1): self.load_daemon() if self._config._enable: self.load_projects() self.log_daemon() if not self._config._enable: continue if (routine_ctr - 1) % self._config._runsync_time == 0: self.info('Routine RunSync Start @ %s' % now_str) try: for proj_name, proj_ptr in self._project_v.iteritems(): if not proj_ptr._info._enable: continue self.runsync_project(proj_name) except Exception as runsync_error: msg = 'RunSync failed...\n' msg += str(runsync_error) self.error(msg) try: d_msg.email(self.__class__.__name__, subject='Daemon Error', text='Failed to execute RunSynch @ %s' % now_str) except DSException as msg_error: self.critical('Report to daemon experts failed!') self.info('Routine RunSync Done @ %s' % now_str) if now_ts < self._next_exec_time: continue for proj in self.ordered_projects(): if self._exit_routine: break proj_ptr = self._project_v[proj] if not proj_ptr._info._enable: continue active_ctr = 0 for x in self._project_v: if self._project_v[x].active(): active_ctr += 1 if active_ctr >= self._config._max_proj_ctr: self.debug( 'Max number of simultaneous project execution reached.' ) break last_ts = self._exe_time_v[proj] now_ts = time.time() if now_ts < self._next_exec_time: continue proj_active = proj_ptr.active() if not proj_active: if not proj_active and proj_ptr.process_exist(): self._api.project_stopped(proj) (code, out, err) = proj_ptr.clear() self.info(' %s returned %s @ %s' % (proj, code, now_str)) if out or err: self.info(' %s stdout/stderr:\n %s\n%s' % (proj, out, err)) if (last_ts is None or last_ts < (now_ts - proj_ptr._info._period)): try: self.info('Execute %s @ %s' % (proj, now_str)) proj_ptr.execute() self._api.project_running(proj) except DSException as e: self.critical('Call expert and review project %s' % proj) try: d_msg.email( self.__class__.__name__, subject='Daemon Error', text='Failed to execute project \'%s\' @ %s' % (proj, now_str)) except DSException as e: self.critical( 'Report to daemon experts failed!') self._exe_time_v[proj] = time.time() self._exe_ctr_v[proj] += 1 self._next_exec_time = self._exe_time_v[ proj] + proj_ptr._info._sleep self.finish() self._exit_routine = False
def load_projects(self): # Load new/updated projects msg = '' for x in self._api.list_all_projects(): proj_ptr = None if x._project in self._project_v: proj_ptr = self._project_v[x._project] if x._server and not x._server in self._server: self.debug('Skipping a project on irrelevant server: %s' % x._project) continue if proj_ptr and proj_ptr.active(): self.info('Skipping update on project %s (still active)' % x._project) continue if not x._enable: if not proj_ptr: self.debug('Skipping a disabled project: %s' % x._project) elif proj_ptr._info._enable: self.info('Disabling project: %s' % x._project) if not msg: msg = 'Report @ proc_daemon::load_projects()\n' msg += 'Disabling \'%s\'\n' % x._project proj_ptr.set_info(x) continue if not proj_ptr: self.info('Enabling project %s' % x._project) if not msg: msg = 'Report @ proc_daemon::load_projects()\n' msg += 'Enabling \'%s\'\n' % x._project self._project_v[x._project] = proc_action(x, self._logger) elif not proj_ptr._info._enable: self.info('Re-enabling project %s' % x._project) if not msg: msg = 'Report @ proc_daemon::load_projects()\n' msg += 'Re-enabling project \'%s\'\n' % x._project proj_ptr.set_info(x) elif not x == self._project_v[x._project]._info: self.info('Updating project %s information' % x._project) if not msg: msg = 'Report @ proc_daemon::load_projects()\n' msg += 'Updating project info for \'%s\'\n' % x._project proj_ptr.set_info(x) if not x._project in self._exe_time_v: self._exe_time_v[x._project] = None self._exe_ctr_v[x._project] = 0 # for p in rm_list: # print p, self._project_v[p].active() # if not self._project_v[p].active() is None: # self._project_v.pop(p) if msg and self._config: try: d_msg.email(self.__class__.__name__, subject='Project Info Update Report', text=msg) except BaseException as be: self._logger.critical( 'Project update could not be reported via email!') for line in be.v.split('\n'): self._logger.error(line)
def dummy_logger(): result = {} # keep track of status (if anything went wrong) status = 0 ''' dt = pub_watch.time('dummy_logger') if not dt: pub_watch.start('dummy_logger') return result if dt < 40.: return result lines = [x for x in commands.getoutput('df').split('\n') if len(x.split())==5] for l in lines: words=l.split(None) result[words[-1]] = float(words[1])/float(words[0]) ''' # check disk usage: homedir = '/home' datadir = '/data' if (os.path.isdir(homedir)): diskUsage = getDISKusage(homedir) result['DISK_USAGE_HOME'] = diskUsage if (diskUsage > 0.9): # send email... msg = "disk usage in %s above 90-percent..."%homedir d_msg.email('proc_daemon','dummy_logger',msg) else: # log the fact that /home is not recognized as dir #print "/home not recognized as directory..." status = -1 if (os.path.isdir(datadir)): diskUsage = getDISKusage(datadir) result['DISK_USAGE_DATA'] = diskUsage if (diskUsage > 0.9): # send email... msg = "disk usage in %s above 90-percent..."%datadir d_msg.email('proc_daemon','dummy_logger',msg) else: # log the fact that /data is not recognized as dir #print "/data not recognized as directory..." status = -1 mempath = '/proc/meminfo' if (os.path.isfile(mempath)): RAMused = getRAMusage(mempath) result['RAM_PERCENT'] = RAMused else: # log the fact that we cannot access /proc/meminfo... #print "cannot access /proc/meminfo file..." status = -1 statpath = '/proc/stat' if (os.path.isfile(statpath)): CPUpercent = getCPUusage(statpath) result['CPU_PERCENT'] = CPUpercent else: # log the fact that we cannot access /proc/stat #print "cannot access /proc/stat file..." status = -1 #d_msg.email('proc_daemon','hello world','executed dummy_logger! dt=%g' % dt) pub_watch.start('dummy_logger') return result
def routine(self): routine_ctr=0 routine_sleep=0 while routine_ctr >= 0 and not self._exit_routine: routine_ctr+=1 time.sleep(1) now_str = time.strftime('%Y-%m-%d %H:%M:%S') now_ts = time.time() self.debug(now_str) # If sleep is set, do nothing and continue if routine_sleep: routine_sleep -=1 continue try: self._api.connect() if not self._config: self.load_daemon() try: msg = 'Daemon has started @ %s\n' % self._config._server msg += str(self._config) d_msg.email( self.__class__.__name__, subject = 'Daemon started!', text = msg) except BaseException as be: self._logger.critical('Project %s error could not be reported via email!' % self._info._project) for line in be.v.split('\n'): self._logger.error(line) except DBException as e: self.error('Failed connection to DB @ %s ... Retry in 1 minute' % now_str) routine_sleep = 60 d_msg.email(self.__class__.__name__, subject = 'Daemon Error', text = 'Failed to establish DB connection @ %s' % now_str) continue # Exit if time exceeds the daemon lifetime if self._config and (now_ts - self._creation_ts) > self._config._lifetime: self.warning('Exceeded pre-defined lifetime. Exiting...') break if (routine_ctr-1) % self._config._update_time == 0: self.info('Routine project update @ %s ' % now_str) if self._api.is_cursor_connected() is None: self._api.connect() #else: # self._api.reconnect() if not self._api.is_cursor_connected(): d_msg.email(self.__class__.__name__, subject = 'Daemon Error', text = 'Failed to establish DB connection @ %s' % now_str) continue if (routine_ctr-1): self.load_daemon() if self._config._enable: self.load_projects() self.log_daemon() if not self._config._enable: continue if (routine_ctr-1) % self._config._runsync_time == 0: self.info('Routine RunSync Start @ %s' % now_str) try: for proj_name,proj_ptr in self._project_v.iteritems(): if not proj_ptr._info._enable: continue self.runsync_project(proj_name) except Exception as runsync_error: msg = 'RunSync failed...\n' msg += str(runsync_error) self.error(msg) try: d_msg.email(self.__class__.__name__, subject = 'Daemon Error', text = 'Failed to execute RunSynch @ %s' % now_str) except DSException as msg_error: self.critical('Report to daemon experts failed!') self.info('Routine RunSync Done @ %s' % now_str) if now_ts < self._next_exec_time: continue for proj in self.ordered_projects(): if self._exit_routine: break proj_ptr = self._project_v[proj] if not proj_ptr._info._enable: continue active_ctr = 0 for x in self._project_v: if self._project_v[x].active(): active_ctr += 1 if active_ctr >= self._config._max_proj_ctr: self.debug('Max number of simultaneous project execution reached.') break last_ts = self._exe_time_v[proj] now_ts = time.time() if now_ts < self._next_exec_time: continue proj_active = proj_ptr.active() if not proj_active: if not proj_active and proj_ptr.process_exist(): self._api.project_stopped(proj) (code,out,err) = proj_ptr.clear() self.info(' %s returned %s @ %s' % (proj,code,now_str)) if out or err: self.info(' %s stdout/stderr:\n %s\n%s' % (proj,out,err)) if ( last_ts is None or last_ts < ( now_ts - proj_ptr._info._period) ): try: self.info('Execute %s @ %s' % (proj,now_str)) proj_ptr.execute() self._api.project_running(proj) except DSException as e: self.critical('Call expert and review project %s' % proj) try: d_msg.email(self.__class__.__name__, subject = 'Daemon Error', text = 'Failed to execute project \'%s\' @ %s' % (proj,now_str)) except DSException as e: self.critical('Report to daemon experts failed!') self._exe_time_v[proj] = time.time() self._exe_ctr_v[proj] += 1 self._next_exec_time = self._exe_time_v[proj] + proj_ptr._info._sleep self.finish() self._exit_routine = False
def load_projects(self): # Load new/updated projects msg = '' for x in self._api.list_all_projects(): proj_ptr = None if x._project in self._project_v: proj_ptr = self._project_v[x._project] if x._server and not x._server in self._server: self.debug('Skipping a project on irrelevant server: %s' % x._project) continue if proj_ptr and proj_ptr.active(): self.info('Skipping update on project %s (still active)' % x._project) continue if not x._enable: if not proj_ptr: self.debug('Skipping a disabled project: %s' % x._project) elif proj_ptr._info._enable: self.info('Disabling project: %s' % x._project) if not msg: msg = 'Report @ proc_daemon::load_projects()\n' msg += 'Disabling \'%s\'\n' % x._project proj_ptr.set_info(x) continue if not proj_ptr: self.info('Enabling project %s' % x._project) if not msg: msg = 'Report @ proc_daemon::load_projects()\n' msg += 'Enabling \'%s\'\n' % x._project self._project_v[x._project] = proc_action(x,self._logger) elif not proj_ptr._info._enable: self.info('Re-enabling project %s' % x._project) if not msg: msg = 'Report @ proc_daemon::load_projects()\n' msg += 'Re-enabling project \'%s\'\n' % x._project proj_ptr.set_info(x) elif not x == self._project_v[x._project]._info: self.info('Updating project %s information' % x._project) if not msg: msg = 'Report @ proc_daemon::load_projects()\n' msg += 'Updating project info for \'%s\'\n' % x._project proj_ptr.set_info(x) if not x._project in self._exe_time_v: self._exe_time_v[x._project] = None self._exe_ctr_v[x._project] = 0 # for p in rm_list: # print p, self._project_v[p].active() # if not self._project_v[p].active() is None: # self._project_v.pop(p) if msg and self._config: try: d_msg.email( self.__class__.__name__, subject = 'Project Info Update Report', text = msg) except BaseException as be: self._logger.critical('Project update could not be reported via email!') for line in be.v.split('\n'): self._logger.error(line)
def dummy_logger(): result = {} # keep track of status (if anything went wrong) status = 0 ''' dt = pub_watch.time('dummy_logger') if not dt: pub_watch.start('dummy_logger') return result if dt < 40.: return result lines = [x for x in commands.getoutput('df').split('\n') if len(x.split())==5] for l in lines: words=l.split(None) result[words[-1]] = float(words[1])/float(words[0]) ''' # check disk usage: homedir = '/home' datadir = '/data' if (os.path.isdir(homedir)): diskUsage = getDISKusage(homedir) result['DISK_USAGE_HOME'] = diskUsage if (diskUsage > 0.9): # send email... msg = "disk usage in %s above 90-percent..." % homedir d_msg.email('proc_daemon', 'dummy_logger', msg) else: # log the fact that /home is not recognized as dir #print "/home not recognized as directory..." status = -1 if (os.path.isdir(datadir)): diskUsage = getDISKusage(datadir) result['DISK_USAGE_DATA'] = diskUsage if (diskUsage > 0.9): # send email... msg = "disk usage in %s above 90-percent..." % datadir d_msg.email('proc_daemon', 'dummy_logger', msg) else: # log the fact that /data is not recognized as dir #print "/data not recognized as directory..." status = -1 mempath = '/proc/meminfo' if (os.path.isfile(mempath)): RAMused = getRAMusage(mempath) result['RAM_PERCENT'] = RAMused else: # log the fact that we cannot access /proc/meminfo... #print "cannot access /proc/meminfo file..." status = -1 statpath = '/proc/stat' if (os.path.isfile(statpath)): CPUpercent = getCPUusage(statpath) result['CPU_PERCENT'] = CPUpercent else: # log the fact that we cannot access /proc/stat #print "cannot access /proc/stat file..." status = -1 #d_msg.email('proc_daemon','hello world','executed dummy_logger! dt=%g' % dt) pub_watch.start('dummy_logger') return result