Ejemplo n.º 1
0
    def _process_event(self, id_, event):
        """Processes the given event, updating the instance data
        structures accordingly."""

        datetime_ = event['datetime']

        if event['status'] is not None:
            status = event['status']
            prevstatus = self.status[id_]['status']

            # Avoid overwriting a status with a less important one.

            if status == CrabStatus.CLEARED:
                self.status[id_]['status'] = status

            elif CrabStatus.is_trivial(status):
                if prevstatus is None or CrabStatus.is_ok(prevstatus):
                    self.status[id_]['status'] = status

            elif CrabStatus.is_warning(status):
                if prevstatus is None or not CrabStatus.is_error(prevstatus):
                    self.status[id_]['status'] = status

            # Always set success / failure status (the remaining options).

            else:
                self.status[id_]['status'] = status

            if not CrabStatus.is_trivial(status):
                history = self.status[id_]['history']
                if len(history) >= HISTORY_COUNT:
                    del history[0]
                history.append(status)

        # Handle ALREADYRUNNING as a 'start' type event, so that
        # the MISSED alarm is not raised and the timeout period
        # is extended.

        if (event['type'] == CrabEvent.START
                or event['status'] == CrabStatus.ALREADYRUNNING):
            self.status[id_]['running'] = True
            if not self.passive:
                self.last_start[id_] = datetime_
                self.timeout[id_] = datetime_ + self.config[id_]['timeout']
                if id_ in self.late_timeout:
                    del self.late_timeout[id_]
                if id_ in self.miss_timeout:
                    del self.miss_timeout[id_]

        elif (event['type'] == CrabEvent.FINISH
              or event['status'] == CrabStatus.TIMEOUT):
            self.status[id_]['running'] = False
            if not self.passive:
                if id_ in self.timeout:
                    del self.timeout[id_]
Ejemplo n.º 2
0
    def _process_event(self, id_, event):
        """Processes the given event, updating the instance data
        structures accordingly."""

        datetime_ = event['datetime']

        if event['status'] is not None:
            status = event['status']
            prevstatus = self.status[id_]['status']

            # Avoid overwriting a status with a less important one.

            if status == CrabStatus.CLEARED:
                self.status[id_]['status'] = status

            elif CrabStatus.is_trivial(status):
                if prevstatus is None or CrabStatus.is_ok(prevstatus):
                    self.status[id_]['status'] = status

            elif CrabStatus.is_warning(status):
                if prevstatus is None or not CrabStatus.is_error(prevstatus):
                    self.status[id_]['status'] = status

            # Always set success / failure status (the remaining options).

            else:
                self.status[id_]['status'] = status

            if not CrabStatus.is_trivial(status):
                history = self.status[id_]['history']
                if len(history) >= HISTORY_COUNT:
                    del history[0]
                history.append(status)

        # Handle ALREADYRUNNING as a 'start' type event, so that
        # the MISSED alarm is not raised and the timeout period
        # is extended.

        if (event['type'] == CrabEvent.START or
                event['status'] == CrabStatus.ALREADYRUNNING):
            self.status[id_]['running'] = True
            if not self.passive:
                self.last_start[id_] = datetime_
                self.timeout[id_] = datetime_ + self.config[id_]['timeout']
                if id_ in self.late_timeout:
                    del self.late_timeout[id_]
                if id_ in self.miss_timeout:
                    del self.miss_timeout[id_]

        elif (event['type'] == CrabEvent.FINISH or
                event['status'] == CrabStatus.TIMEOUT):
            self.status[id_]['running'] = False
            if not self.passive:
                if id_ in self.timeout:
                    del self.timeout[id_]
Ejemplo n.º 3
0
    def event_to_rssitem(self, event):
        """Function converting an event (Python dict) to an RSSItem object."""

        title = (CrabStatus.get_name(event['status']) + ': ' +
                 event['user'] + ' @ ' + event['host'])
        if event['command'] is not None:
            title += ': ' + event['command']
        link = self.base + '/job/' + str(event['id'])
        if event['finishid'] is not None:
            link += '/output/' + str(event['finishid'])
        output = ''
        if event['stdout']:
            output += event['stdout']
        if event['stderr']:
            if event['stdout']:
                output += '\n\nStandard Error:\n\n'
            output += event['stderr']

        date = event['datetime']

        guid = ':'.join([
            'crab', self.fqdn, str(event['id']),
            str(calendar.timegm(date.timetuple())), str(event['status'])])

        info = {}

        if output != '':
            info['description'] = '<pre>' + output + '</pre>'

        return RSSItem(title=title,
                       link=link,
                       pubDate=date,
                       guid=Guid(guid, isPermaLink=False),
                       **info)
Ejemplo n.º 4
0
    def event_to_rssitem(self, event):
        """Function converting an event (Python dict) to an RSSItem object."""

        title = (CrabStatus.get_name(event['status']) + ': ' +
                    event['user'] + ' @ ' + event['host'])
        if event['command'] is not None:
            title += ': ' + event['command']
        link = self.base + '/job/' + str(event['id'])
        if event['finishid'] is not None:
            link += '/output/' + str(event['finishid'])
        output = ''
        if event['stdout']:
            output += event['stdout']
        if event['stderr']:
            if event['stdout']:
                output += '\n\nStandard Error:\n\n'
            output += event['stderr']

        date = self.store.parse_datetime(event['datetime'])

        guid = ':'.join(['crab', self.fqdn, str(event['id']),
               str(calendar.timegm(date.timetuple())), str(event['status'])])

        info = {}

        if output != '':
            info['description'] = '<pre>' + output + '</pre>'

        return RSSItem(title=title,
                       link=link,
                       pubDate=date,
                       guid=Guid(guid, isPermaLink = False),
                       **info)
Ejemplo n.º 5
0
def check_status_patterns(status, config, output):
    """Function to update a job status based on the patterns.

    Compares the given output with the patterns in the
    job configuration, and returns the updated status."""

    # Is this a special status which doesn't indicate job completion?
    # If so we should not attempt to look at the patterns.
    if status == CrabStatus.ALREADYRUNNING:
        return status

    # Check for error status.
    if CrabStatus.is_error(status):
        return status

    fail_pattern = config['fail_pattern']
    if fail_pattern is not None and re.search(fail_pattern, output):
        return CrabStatus.FAIL

    # Check for warning status.
    if CrabStatus.is_warning(status):
        return status

    warning_pattern = config['warning_pattern']
    if warning_pattern is not None and re.search(warning_pattern, output):
        return CrabStatus.WARNING

    # Check for good status.
    success_pattern = config['success_pattern']
    if success_pattern is not None and re.search(success_pattern, output):
        return CrabStatus.SUCCESS

    # No match -- decide what to do based on which patterns were defined.
    if success_pattern is not None:
        if fail_pattern is not None:
            # There were success and fail patterns but we matched neither
            # of them, so the status is UNKNOWN.
            return CrabStatus.UNKNOWN
        else:
            # There was a success pattern which we did not match, so
            # assume this was a failure as there was no explicit success
            # match.
            return CrabStatus.FAIL

    # Otherwise return the original status.  If there was a failure
    # pattern, then we already know we didn't match it.
    return status
Ejemplo n.º 6
0
def check_status_patterns(status, config, output):
    """Function to update a job status based on the patterns.

    Compares the given output with the patterns in the
    job configuration, and returns the updated status."""

    # Is this a special status which doesn't indicate job completion?
    # If so we should not attempt to look at the patterns.
    if status == CrabStatus.ALREADYRUNNING:
        return status

    # Check for error status.
    if CrabStatus.is_error(status):
        return status

    fail_pattern = config['fail_pattern']
    if fail_pattern is not None and re.search(fail_pattern, output):
        return CrabStatus.FAIL

    # Check for warning status.
    if CrabStatus.is_warning(status):
        return status

    warning_pattern = config['warning_pattern']
    if warning_pattern is not None and re.search(warning_pattern, output):
        return CrabStatus.WARNING

    # Check for good status.
    success_pattern = config['success_pattern']
    if success_pattern is not None and re.search(success_pattern, output):
        return CrabStatus.SUCCESS

    # No match -- decide what to do based on which patterns were defined.
    if success_pattern is not None:
        if fail_pattern is not None:
            # There were success and fail patterns but we matched neither
            # of them, so the status is UNKNOWN.
            return CrabStatus.UNKNOWN
        else:
            # There was a success pattern which we did not match, so
            # assume this was a failure as there was no explicit success
            # match.
            return CrabStatus.FAIL

    # Otherwise return the original status.  If there was a failure
    # pattern, then we already know we didn't match it.
    return status
Ejemplo n.º 7
0
    def __call__(self,
                 events,
                 skip_ok=False,
                 skip_warning=False,
                 skip_error=False,
                 skip_trivial=True,
                 skip_start=False,
                 squash_start=False):
        """Performs filtering, and returns the altered event list."""

        output = []
        squash = set()
        self.errors = 0
        self.warnings = 0

        for (i, e) in enumerate(events):
            if i in squash:
                continue

            e = e.copy()

            if e['type'] == CrabEvent.START:
                if skip_start:
                    continue
            else:
                if (skip_trivial and CrabStatus.is_trivial(e['status'])
                        or skip_ok and CrabStatus.is_ok(e['status'])
                        or skip_warning and CrabStatus.is_warning(e['status'])
                        or skip_error and CrabStatus.is_error(e['status'])):
                    continue

                if CrabStatus.is_error(e['status']):
                    self.errors += 1
                if CrabStatus.is_warning(e['status']):
                    self.warnings += 1

            if squash_start and e['type'] == CrabEvent.FINISH:
                start = _find_previous_start(events, i)
                if start is not None:
                    squash.add(start)
                    delta = (
                        self.store.parse_datetime(e['datetime']) -
                        self.store.parse_datetime(events[start]['datetime']))
                    e['duration'] = str(delta)

            e['datetime'] = self.in_timezone(e['datetime'])

            output.append(e)

        return output
Ejemplo n.º 8
0
def _find_previous_start(events, i):
    """Looks in the event list, past position i, for the previous start.

    Skips over alarms and other trivial events."""

    i += 1

    while (i < len(events)):
        e = events[i]

        if e['type'] == CrabEvent.START:
            return i

        elif (e['type'] != CrabEvent.ALARM and
                not CrabStatus.is_trivial(e['status'])):
            return None

        i += 1

    return None
Ejemplo n.º 9
0
def _find_previous_start(events, i):
    """Looks in the event list, past position i, for the previous start.

    Skips over alarms and other trivial events."""

    i += 1

    while (i < len(events)):
        e = events[i]

        if e['type'] == CrabEvent.START:
            return i

        elif (e['type'] != CrabEvent.ALARM
              and not CrabStatus.is_trivial(e['status'])):
            return None

        i += 1

    return None
Ejemplo n.º 10
0
    def __call__(self, events, skip_ok=False, skip_warning=False,
                 skip_error=False, skip_trivial=True, skip_start=False,
                 squash_start=False):
        """Performs filtering, and returns the altered event list."""

        output = []
        squash = set()
        self.errors = 0
        self.warnings = 0

        for (i, e) in enumerate(events):
            if i in squash:
                continue

            e = e.copy()

            if e['type'] == CrabEvent.START:
                if skip_start:
                    continue
            else:
                if (skip_trivial and CrabStatus.is_trivial(e['status'])
                or skip_ok and CrabStatus.is_ok(e['status'])
                or skip_warning and CrabStatus.is_warning(e['status'])
                or skip_error and CrabStatus.is_error(e['status'])):
                    continue

                if CrabStatus.is_error(e['status']):
                    self.errors += 1
                if CrabStatus.is_warning(e['status']):
                    self.warnings += 1

            if squash_start and e['type'] == CrabEvent.FINISH:
                start = _find_previous_start(events, i)
                if start is not None:
                    squash.add(start)
                    delta = (self.store.parse_datetime(e['datetime'])
                        - self.store.parse_datetime(events[start]['datetime']))
                    e['duration'] = str(delta)

            e['datetime'] = self.in_timezone(e['datetime'])

            output.append(e)

        return output
Ejemplo n.º 11
0
    def run(self):
        """Monitor thread main run function.

        When the thread is started, this function will run.  It begins
        by fetching a list of jobs and using them to populate its
        data structures.  When this is complete, the Event status_ready
        is fired.

        It then goes into a loop, and every few seconds it checks
        for new events, processing any which are found.  The new_event
        Condition is fired if there were any new events.

        We call _check_minute from CrabMinutely to check whether the
        minute has changed since the last time round the loop."""

        jobs = self.store.get_jobs()

        for job in jobs:
            id_ = job['id']
            try:
                self._initialize_job(id_, load_events=True)

            except JobDeleted:
                logger.warning('Warning: job {} has vanished'.format(id_))

        self.status_ready.set()

        while True:
            time.sleep(5)
            datetime_ = datetime.now(pytz.UTC)

            # Retrieve events.  Trap exceptions in case of database
            # disconnection.
            events = []
            try:
                events = self.store.get_events_since(self.max_startid,
                                                     self.max_alarmid,
                                                     self.max_finishid)
            except Exception as e:
                logger.exception('Error: monitor exception getting events')

            for event in events:
                id_ = event['jobid']
                self._update_max_id_values(event)

                try:
                    if id_ not in self.status:
                        self._initialize_job(id_)

                    self._process_event(id_, event)
                    self._compute_reliability(id_)

                # If the monitor is loaded when a job has just been
                # deleted, then it may have events more recent
                # than those of the events that still exist.
                except JobDeleted:
                    pass

                # Also trap other exceptions, in case a database disconnection
                # causes a failure from _initialize_job.  Do this separately,
                # inside the events loop so that we keep the max_id_values
                # up to date with the other events.
                except Exception as e:
                    logger.exception('Error: monitor exception handling event')

            self.num_error = 0
            self.num_warning = 0
            for id_ in self.status:
                jobstatus = self.status[id_]['status']
                if (jobstatus is None or CrabStatus.is_ok(jobstatus)):
                    pass
                elif (CrabStatus.is_warning(jobstatus)):
                    self.num_warning += 1
                else:
                    self.num_error += 1

            if events:
                with self.new_event:
                    self.new_event.notify_all()

            # Allow superclass CrabMinutely to call our run_minutely
            # method as required.  Note: the call back to run_minutely
            # is protected by a try-except block in the superclass.
            self._check_minute()

            # Check status of timeouts - need to get a list of keys
            # so that we can delete from the dict while iterating.
            # Note: _write_alarm uses a try-except block for CrabErrors.
            for id_ in list(self.late_timeout.keys()):
                if self.late_timeout[id_] < datetime_:
                    self._write_alarm(id_, CrabStatus.LATE)
                    del self.late_timeout[id_]

            for id_ in list(self.miss_timeout.keys()):
                if self.miss_timeout[id_] < datetime_:
                    self._write_alarm(id_, CrabStatus.MISSED)
                    del self.miss_timeout[id_]

            for id_ in list(self.timeout.keys()):
                if self.timeout[id_] < datetime_:
                    self._write_alarm(id_, CrabStatus.TIMEOUT)
                    del self.timeout[id_]
Ejemplo n.º 12
0
    def run(self):
        """Monitor thread main run function.

        When the thread is started, this function will run.  It begins
        by fetching a list of jobs and using them to populate its
        data structures.  When this is complete, the Event status_ready
        is fired.

        It then goes into a loop, and every few seconds it checks
        for new events, processing any which are found.  The new_event
        Condition is fired if there were any new events.

        We call _check_minute from CrabMinutely to check whether the
        minute has changed since the last time round the loop."""

        jobs = self.store.get_jobs()

        for job in jobs:
            id_ = job['id']
            try:
                self._initialize_job(id_, load_events=True)

            except JobDeleted:
                print('Warning: job', id_, 'has vanished')

        self.status_ready.set()

        while True:
            time.sleep(5)
            datetime_ = datetime.datetime.now(pytz.UTC)

            events = self.store.get_events_since(self.max_startid,
                                                 self.max_alarmid,
                                                 self.max_finishid)
            for event in events:
                id_ = event['jobid']
                self._update_max_id_values(event)

                try:
                    if id_ not in self.status:
                        self._initialize_job(id_)

                    self._process_event(id_, event)
                    self._compute_reliability(id_)

                # If the monitor is loaded when a job has just been
                # deleted, then it may have events more recent
                # than those of the events that still exist.
                except JobDeleted:
                    pass

            self.num_error = 0
            self.num_warning = 0
            for id_ in self.status:
                jobstatus = self.status[id_]['status']
                if (jobstatus is None or CrabStatus.is_ok(jobstatus)):
                    pass
                elif (CrabStatus.is_warning(jobstatus)):
                    self.num_warning += 1
                else:
                    self.num_error += 1

            if events:
                with self.new_event:
                    self.new_event.notify_all()

            # Allow superclass CrabMinutely to call our run_minutely
            # method as required.
            self._check_minute()

            # Check status of timeouts - need to get a list of keys
            # so that we can delete from the dict while iterating.

            for id_ in list(self.miss_timeout.keys()):
                if self.miss_timeout[id_] < datetime_:
                    self._write_alarm(id_, CrabStatus.MISSED)
                    del self.miss_timeout[id_]

            for id_ in list(self.timeout.keys()):
                if self.timeout[id_] < datetime_:
                    self._write_alarm(id_, CrabStatus.TIMEOUT)
                    del self.timeout[id_]
Ejemplo n.º 13
0
Archivo: text.py Proyecto: somabc/crab
def _event_line(event):
    return '{0:10} {1:10} {2}'.format(CrabEvent.get_name(event['type']),
                                      CrabStatus.get_name(event['status']),
                                      event['datetime'])
Ejemplo n.º 14
0
    def run(self):
        """Monitor thread main run function.

        When the thread is started, this function will run.  It begins
        by fetching a list of jobs and using them to populate its
        data structures.  When this is complete, the Event status_ready
        is fired.

        It then goes into a loop, and every few seconds it checks
        for new events, processing any which are found.  The new_event
        Condition is fired if there were any new events.

        We call _check_minute from CrabMinutely to check whether the
        minute has changed since the last time round the loop."""

        jobs = self.store.get_jobs()

        for job in jobs:
            id_ = job['id']
            try:
                self._initialize_job(id_, load_events=True)

            except JobDeleted:
                logger.warning('Warning: job {} has vanished'.format(id_))

        self.status_ready.set()

        while True:
            time.sleep(5)
            datetime_ = datetime.now(pytz.UTC)

            # Retrieve events.  Trap exceptions in case of database
            # disconnection.
            events = []
            try:
                events = self.store.get_events_since(
                    self.max_startid, self.max_alarmid, self.max_finishid)
            except Exception as e:
                logger.exception('Error: monitor exception getting events')

            for event in events:
                id_ = event['jobid']
                self._update_max_id_values(event)

                try:
                    if id_ not in self.status:
                        self._initialize_job(id_)

                    self._process_event(id_, event)
                    self._compute_reliability(id_)

                # If the monitor is loaded when a job has just been
                # deleted, then it may have events more recent
                # than those of the events that still exist.
                except JobDeleted:
                    pass

                # Also trap other exceptions, in case a database disconnection
                # causes a failure from _initialize_job.  Do this separately,
                # inside the events loop so that we keep the max_id_values
                # up to date with the other events.
                except Exception as e:
                    logger.exception('Error: monitor exception handling event')

            self.num_error = 0
            self.num_warning = 0
            for id_ in self.status:
                jobstatus = self.status[id_]['status']
                if (jobstatus is None or CrabStatus.is_ok(jobstatus)):
                    pass
                elif (CrabStatus.is_warning(jobstatus)):
                    self.num_warning += 1
                else:
                    self.num_error += 1

            if events:
                with self.new_event:
                    self.new_event.notify_all()

            # Allow superclass CrabMinutely to call our run_minutely
            # method as required.  Note: the call back to run_minutely
            # is protected by a try-except block in the superclass.
            self._check_minute()

            # Check status of timeouts - need to get a list of keys
            # so that we can delete from the dict while iterating.
            # Note: _write_alarm uses a try-except block for CrabErrors.
            for id_ in list(self.late_timeout.keys()):
                if self.late_timeout[id_] < datetime_:
                    self._write_alarm(id_, CrabStatus.LATE)
                    del self.late_timeout[id_]

            for id_ in list(self.miss_timeout.keys()):
                if self.miss_timeout[id_] < datetime_:
                    self._write_alarm(id_, CrabStatus.MISSED)
                    del self.miss_timeout[id_]

            for id_ in list(self.timeout.keys()):
                if self.timeout[id_] < datetime_:
                    self._write_alarm(id_, CrabStatus.TIMEOUT)
                    del self.timeout[id_]
Ejemplo n.º 15
0
def _event_line(event):
    return '{0:10} {1:10} {2}'.format(CrabEvent.get_name(event['type']),
                                      CrabStatus.get_name(event['status']),
                                      event['datetime'])
Ejemplo n.º 16
0
    def run(self):
        """Monitor thread main run function.

        When the thread is started, this function will run.  It begins
        by fetching a list of jobs and using them to populate its
        data structures.  When this is complete, the Event status_ready
        is fired.

        It then goes into a loop, and every few seconds it checks
        for new events, processing any which are found.  The new_event
        Condition is fired if there were any new events.

        We call _check_minute from CrabMinutely to check whether the
        minute has changed since the last time round the loop."""

        jobs = self.store.get_jobs()

        for job in jobs:
            id_ = job['id']
            try:
                self._initialize_job(id_, load_events=True)

            except JobDeleted:
                print('Warning: job', id_, 'has vanished')

        self.status_ready.set()

        while True:
            time.sleep(5)
            datetime_ = datetime.datetime.now(pytz.UTC)

            events = self.store.get_events_since(self.max_startid,
                                self.max_alarmid, self.max_finishid)
            for event in events:
                id_ = event['jobid']
                self._update_max_id_values(event)

                try:
                    if id_ not in self.status:
                        self._initialize_job(id_)

                    self._process_event(id_, event)
                    self._compute_reliability(id_)

                # If the monitor is loaded when a job has just been
                # deleted, then it may have events more recent
                # than those of the events that still exist.
                except JobDeleted:
                    pass

            self.num_error = 0;
            self.num_warning = 0;
            for id_ in self.status:
                jobstatus = self.status[id_]['status']
                if (jobstatus is None or CrabStatus.is_ok(jobstatus)):
                    pass
                elif (CrabStatus.is_warning(jobstatus)):
                    self.num_warning += 1;
                else:
                    self.num_error += 1;

            if events:
                with self.new_event:
                    self.new_event.notify_all()

            # Allow superclass CrabMinutely to call our run_minutely
            # method as required.
            self._check_minute()

            # Check status of timeouts - need to get a list of keys
            # so that we can delete from the dict while iterating.

            for id_ in list(self.miss_timeout.keys()):
                if self.miss_timeout[id_] < datetime_:
                    self._write_alarm(id_, CrabStatus.MISSED)
                    del self.miss_timeout[id_]

            for id_ in list(self.timeout.keys()):
                if self.timeout[id_] < datetime_:
                    self._write_alarm(id_, CrabStatus.TIMEOUT)
                    del self.timeout[id_]