def _process_event(self, id_, event): """Processes the given event, updating the instance data structures accordingly.""" datetime_ = event['datetime'] if event['status'] is not None: status = event['status'] prevstatus = self.status[id_]['status'] # Avoid overwriting a status with a less important one. if status == CrabStatus.CLEARED: self.status[id_]['status'] = status elif CrabStatus.is_trivial(status): if prevstatus is None or CrabStatus.is_ok(prevstatus): self.status[id_]['status'] = status elif CrabStatus.is_warning(status): if prevstatus is None or not CrabStatus.is_error(prevstatus): self.status[id_]['status'] = status # Always set success / failure status (the remaining options). else: self.status[id_]['status'] = status if not CrabStatus.is_trivial(status): history = self.status[id_]['history'] if len(history) >= HISTORY_COUNT: del history[0] history.append(status) # Handle ALREADYRUNNING as a 'start' type event, so that # the MISSED alarm is not raised and the timeout period # is extended. if (event['type'] == CrabEvent.START or event['status'] == CrabStatus.ALREADYRUNNING): self.status[id_]['running'] = True if not self.passive: self.last_start[id_] = datetime_ self.timeout[id_] = datetime_ + self.config[id_]['timeout'] if id_ in self.late_timeout: del self.late_timeout[id_] if id_ in self.miss_timeout: del self.miss_timeout[id_] elif (event['type'] == CrabEvent.FINISH or event['status'] == CrabStatus.TIMEOUT): self.status[id_]['running'] = False if not self.passive: if id_ in self.timeout: del self.timeout[id_]
def event_to_rssitem(self, event): """Function converting an event (Python dict) to an RSSItem object.""" title = (CrabStatus.get_name(event['status']) + ': ' + event['user'] + ' @ ' + event['host']) if event['command'] is not None: title += ': ' + event['command'] link = self.base + '/job/' + str(event['id']) if event['finishid'] is not None: link += '/output/' + str(event['finishid']) output = '' if event['stdout']: output += event['stdout'] if event['stderr']: if event['stdout']: output += '\n\nStandard Error:\n\n' output += event['stderr'] date = event['datetime'] guid = ':'.join([ 'crab', self.fqdn, str(event['id']), str(calendar.timegm(date.timetuple())), str(event['status'])]) info = {} if output != '': info['description'] = '<pre>' + output + '</pre>' return RSSItem(title=title, link=link, pubDate=date, guid=Guid(guid, isPermaLink=False), **info)
def event_to_rssitem(self, event): """Function converting an event (Python dict) to an RSSItem object.""" title = (CrabStatus.get_name(event['status']) + ': ' + event['user'] + ' @ ' + event['host']) if event['command'] is not None: title += ': ' + event['command'] link = self.base + '/job/' + str(event['id']) if event['finishid'] is not None: link += '/output/' + str(event['finishid']) output = '' if event['stdout']: output += event['stdout'] if event['stderr']: if event['stdout']: output += '\n\nStandard Error:\n\n' output += event['stderr'] date = self.store.parse_datetime(event['datetime']) guid = ':'.join(['crab', self.fqdn, str(event['id']), str(calendar.timegm(date.timetuple())), str(event['status'])]) info = {} if output != '': info['description'] = '<pre>' + output + '</pre>' return RSSItem(title=title, link=link, pubDate=date, guid=Guid(guid, isPermaLink = False), **info)
def check_status_patterns(status, config, output): """Function to update a job status based on the patterns. Compares the given output with the patterns in the job configuration, and returns the updated status.""" # Is this a special status which doesn't indicate job completion? # If so we should not attempt to look at the patterns. if status == CrabStatus.ALREADYRUNNING: return status # Check for error status. if CrabStatus.is_error(status): return status fail_pattern = config['fail_pattern'] if fail_pattern is not None and re.search(fail_pattern, output): return CrabStatus.FAIL # Check for warning status. if CrabStatus.is_warning(status): return status warning_pattern = config['warning_pattern'] if warning_pattern is not None and re.search(warning_pattern, output): return CrabStatus.WARNING # Check for good status. success_pattern = config['success_pattern'] if success_pattern is not None and re.search(success_pattern, output): return CrabStatus.SUCCESS # No match -- decide what to do based on which patterns were defined. if success_pattern is not None: if fail_pattern is not None: # There were success and fail patterns but we matched neither # of them, so the status is UNKNOWN. return CrabStatus.UNKNOWN else: # There was a success pattern which we did not match, so # assume this was a failure as there was no explicit success # match. return CrabStatus.FAIL # Otherwise return the original status. If there was a failure # pattern, then we already know we didn't match it. return status
def __call__(self, events, skip_ok=False, skip_warning=False, skip_error=False, skip_trivial=True, skip_start=False, squash_start=False): """Performs filtering, and returns the altered event list.""" output = [] squash = set() self.errors = 0 self.warnings = 0 for (i, e) in enumerate(events): if i in squash: continue e = e.copy() if e['type'] == CrabEvent.START: if skip_start: continue else: if (skip_trivial and CrabStatus.is_trivial(e['status']) or skip_ok and CrabStatus.is_ok(e['status']) or skip_warning and CrabStatus.is_warning(e['status']) or skip_error and CrabStatus.is_error(e['status'])): continue if CrabStatus.is_error(e['status']): self.errors += 1 if CrabStatus.is_warning(e['status']): self.warnings += 1 if squash_start and e['type'] == CrabEvent.FINISH: start = _find_previous_start(events, i) if start is not None: squash.add(start) delta = ( self.store.parse_datetime(e['datetime']) - self.store.parse_datetime(events[start]['datetime'])) e['duration'] = str(delta) e['datetime'] = self.in_timezone(e['datetime']) output.append(e) return output
def _find_previous_start(events, i): """Looks in the event list, past position i, for the previous start. Skips over alarms and other trivial events.""" i += 1 while (i < len(events)): e = events[i] if e['type'] == CrabEvent.START: return i elif (e['type'] != CrabEvent.ALARM and not CrabStatus.is_trivial(e['status'])): return None i += 1 return None
def __call__(self, events, skip_ok=False, skip_warning=False, skip_error=False, skip_trivial=True, skip_start=False, squash_start=False): """Performs filtering, and returns the altered event list.""" output = [] squash = set() self.errors = 0 self.warnings = 0 for (i, e) in enumerate(events): if i in squash: continue e = e.copy() if e['type'] == CrabEvent.START: if skip_start: continue else: if (skip_trivial and CrabStatus.is_trivial(e['status']) or skip_ok and CrabStatus.is_ok(e['status']) or skip_warning and CrabStatus.is_warning(e['status']) or skip_error and CrabStatus.is_error(e['status'])): continue if CrabStatus.is_error(e['status']): self.errors += 1 if CrabStatus.is_warning(e['status']): self.warnings += 1 if squash_start and e['type'] == CrabEvent.FINISH: start = _find_previous_start(events, i) if start is not None: squash.add(start) delta = (self.store.parse_datetime(e['datetime']) - self.store.parse_datetime(events[start]['datetime'])) e['duration'] = str(delta) e['datetime'] = self.in_timezone(e['datetime']) output.append(e) return output
def run(self): """Monitor thread main run function. When the thread is started, this function will run. It begins by fetching a list of jobs and using them to populate its data structures. When this is complete, the Event status_ready is fired. It then goes into a loop, and every few seconds it checks for new events, processing any which are found. The new_event Condition is fired if there were any new events. We call _check_minute from CrabMinutely to check whether the minute has changed since the last time round the loop.""" jobs = self.store.get_jobs() for job in jobs: id_ = job['id'] try: self._initialize_job(id_, load_events=True) except JobDeleted: logger.warning('Warning: job {} has vanished'.format(id_)) self.status_ready.set() while True: time.sleep(5) datetime_ = datetime.now(pytz.UTC) # Retrieve events. Trap exceptions in case of database # disconnection. events = [] try: events = self.store.get_events_since(self.max_startid, self.max_alarmid, self.max_finishid) except Exception as e: logger.exception('Error: monitor exception getting events') for event in events: id_ = event['jobid'] self._update_max_id_values(event) try: if id_ not in self.status: self._initialize_job(id_) self._process_event(id_, event) self._compute_reliability(id_) # If the monitor is loaded when a job has just been # deleted, then it may have events more recent # than those of the events that still exist. except JobDeleted: pass # Also trap other exceptions, in case a database disconnection # causes a failure from _initialize_job. Do this separately, # inside the events loop so that we keep the max_id_values # up to date with the other events. except Exception as e: logger.exception('Error: monitor exception handling event') self.num_error = 0 self.num_warning = 0 for id_ in self.status: jobstatus = self.status[id_]['status'] if (jobstatus is None or CrabStatus.is_ok(jobstatus)): pass elif (CrabStatus.is_warning(jobstatus)): self.num_warning += 1 else: self.num_error += 1 if events: with self.new_event: self.new_event.notify_all() # Allow superclass CrabMinutely to call our run_minutely # method as required. Note: the call back to run_minutely # is protected by a try-except block in the superclass. self._check_minute() # Check status of timeouts - need to get a list of keys # so that we can delete from the dict while iterating. # Note: _write_alarm uses a try-except block for CrabErrors. for id_ in list(self.late_timeout.keys()): if self.late_timeout[id_] < datetime_: self._write_alarm(id_, CrabStatus.LATE) del self.late_timeout[id_] for id_ in list(self.miss_timeout.keys()): if self.miss_timeout[id_] < datetime_: self._write_alarm(id_, CrabStatus.MISSED) del self.miss_timeout[id_] for id_ in list(self.timeout.keys()): if self.timeout[id_] < datetime_: self._write_alarm(id_, CrabStatus.TIMEOUT) del self.timeout[id_]
def run(self): """Monitor thread main run function. When the thread is started, this function will run. It begins by fetching a list of jobs and using them to populate its data structures. When this is complete, the Event status_ready is fired. It then goes into a loop, and every few seconds it checks for new events, processing any which are found. The new_event Condition is fired if there were any new events. We call _check_minute from CrabMinutely to check whether the minute has changed since the last time round the loop.""" jobs = self.store.get_jobs() for job in jobs: id_ = job['id'] try: self._initialize_job(id_, load_events=True) except JobDeleted: print('Warning: job', id_, 'has vanished') self.status_ready.set() while True: time.sleep(5) datetime_ = datetime.datetime.now(pytz.UTC) events = self.store.get_events_since(self.max_startid, self.max_alarmid, self.max_finishid) for event in events: id_ = event['jobid'] self._update_max_id_values(event) try: if id_ not in self.status: self._initialize_job(id_) self._process_event(id_, event) self._compute_reliability(id_) # If the monitor is loaded when a job has just been # deleted, then it may have events more recent # than those of the events that still exist. except JobDeleted: pass self.num_error = 0 self.num_warning = 0 for id_ in self.status: jobstatus = self.status[id_]['status'] if (jobstatus is None or CrabStatus.is_ok(jobstatus)): pass elif (CrabStatus.is_warning(jobstatus)): self.num_warning += 1 else: self.num_error += 1 if events: with self.new_event: self.new_event.notify_all() # Allow superclass CrabMinutely to call our run_minutely # method as required. self._check_minute() # Check status of timeouts - need to get a list of keys # so that we can delete from the dict while iterating. for id_ in list(self.miss_timeout.keys()): if self.miss_timeout[id_] < datetime_: self._write_alarm(id_, CrabStatus.MISSED) del self.miss_timeout[id_] for id_ in list(self.timeout.keys()): if self.timeout[id_] < datetime_: self._write_alarm(id_, CrabStatus.TIMEOUT) del self.timeout[id_]
def _event_line(event): return '{0:10} {1:10} {2}'.format(CrabEvent.get_name(event['type']), CrabStatus.get_name(event['status']), event['datetime'])
def run(self): """Monitor thread main run function. When the thread is started, this function will run. It begins by fetching a list of jobs and using them to populate its data structures. When this is complete, the Event status_ready is fired. It then goes into a loop, and every few seconds it checks for new events, processing any which are found. The new_event Condition is fired if there were any new events. We call _check_minute from CrabMinutely to check whether the minute has changed since the last time round the loop.""" jobs = self.store.get_jobs() for job in jobs: id_ = job['id'] try: self._initialize_job(id_, load_events=True) except JobDeleted: logger.warning('Warning: job {} has vanished'.format(id_)) self.status_ready.set() while True: time.sleep(5) datetime_ = datetime.now(pytz.UTC) # Retrieve events. Trap exceptions in case of database # disconnection. events = [] try: events = self.store.get_events_since( self.max_startid, self.max_alarmid, self.max_finishid) except Exception as e: logger.exception('Error: monitor exception getting events') for event in events: id_ = event['jobid'] self._update_max_id_values(event) try: if id_ not in self.status: self._initialize_job(id_) self._process_event(id_, event) self._compute_reliability(id_) # If the monitor is loaded when a job has just been # deleted, then it may have events more recent # than those of the events that still exist. except JobDeleted: pass # Also trap other exceptions, in case a database disconnection # causes a failure from _initialize_job. Do this separately, # inside the events loop so that we keep the max_id_values # up to date with the other events. except Exception as e: logger.exception('Error: monitor exception handling event') self.num_error = 0 self.num_warning = 0 for id_ in self.status: jobstatus = self.status[id_]['status'] if (jobstatus is None or CrabStatus.is_ok(jobstatus)): pass elif (CrabStatus.is_warning(jobstatus)): self.num_warning += 1 else: self.num_error += 1 if events: with self.new_event: self.new_event.notify_all() # Allow superclass CrabMinutely to call our run_minutely # method as required. Note: the call back to run_minutely # is protected by a try-except block in the superclass. self._check_minute() # Check status of timeouts - need to get a list of keys # so that we can delete from the dict while iterating. # Note: _write_alarm uses a try-except block for CrabErrors. for id_ in list(self.late_timeout.keys()): if self.late_timeout[id_] < datetime_: self._write_alarm(id_, CrabStatus.LATE) del self.late_timeout[id_] for id_ in list(self.miss_timeout.keys()): if self.miss_timeout[id_] < datetime_: self._write_alarm(id_, CrabStatus.MISSED) del self.miss_timeout[id_] for id_ in list(self.timeout.keys()): if self.timeout[id_] < datetime_: self._write_alarm(id_, CrabStatus.TIMEOUT) del self.timeout[id_]
def run(self): """Monitor thread main run function. When the thread is started, this function will run. It begins by fetching a list of jobs and using them to populate its data structures. When this is complete, the Event status_ready is fired. It then goes into a loop, and every few seconds it checks for new events, processing any which are found. The new_event Condition is fired if there were any new events. We call _check_minute from CrabMinutely to check whether the minute has changed since the last time round the loop.""" jobs = self.store.get_jobs() for job in jobs: id_ = job['id'] try: self._initialize_job(id_, load_events=True) except JobDeleted: print('Warning: job', id_, 'has vanished') self.status_ready.set() while True: time.sleep(5) datetime_ = datetime.datetime.now(pytz.UTC) events = self.store.get_events_since(self.max_startid, self.max_alarmid, self.max_finishid) for event in events: id_ = event['jobid'] self._update_max_id_values(event) try: if id_ not in self.status: self._initialize_job(id_) self._process_event(id_, event) self._compute_reliability(id_) # If the monitor is loaded when a job has just been # deleted, then it may have events more recent # than those of the events that still exist. except JobDeleted: pass self.num_error = 0; self.num_warning = 0; for id_ in self.status: jobstatus = self.status[id_]['status'] if (jobstatus is None or CrabStatus.is_ok(jobstatus)): pass elif (CrabStatus.is_warning(jobstatus)): self.num_warning += 1; else: self.num_error += 1; if events: with self.new_event: self.new_event.notify_all() # Allow superclass CrabMinutely to call our run_minutely # method as required. self._check_minute() # Check status of timeouts - need to get a list of keys # so that we can delete from the dict while iterating. for id_ in list(self.miss_timeout.keys()): if self.miss_timeout[id_] < datetime_: self._write_alarm(id_, CrabStatus.MISSED) del self.miss_timeout[id_] for id_ in list(self.timeout.keys()): if self.timeout[id_] < datetime_: self._write_alarm(id_, CrabStatus.TIMEOUT) del self.timeout[id_]