Esempio n. 1
0
 def _connectedEb(self, f):
     if f.check(errors.ConnectionFailedError):
         # switch the failure and return an UNKNOWN status
         msg = "Unable to connect to manager."
         f = failure.Failure(util.NagiosUnknown(msg))
         util.unknown(msg)
     if f.check(errors.ConnectionRefusedError):
         # switch the failure and return a CRITICAL status
         msg = "Manager refused connection."
         f = failure.Failure(util.NagiosCritical(msg))
         util.critical(msg)
     # all other failures get forwarded to the managerDeferred errback as-is
     self.managerDeferred.errback(f)
Esempio n. 2
0
 def _connectedEb(self, f):
     if f.check(errors.ConnectionFailedError):
         # switch the failure and return an UNKNOWN status
         msg = "Unable to connect to manager."
         f = failure.Failure(util.NagiosUnknown(msg))
         util.unknown(msg)
     if f.check(errors.ConnectionRefusedError):
         # switch the failure and return a CRITICAL status
         msg = "Manager refused connection."
         f = failure.Failure(util.NagiosCritical(msg))
         util.critical(msg)
     # all other failures get forwarded to the managerDeferred errback as-is
     self.managerDeferred.errback(f)
Esempio n. 3
0
    def do(self, args):
        processes = getProcesses(prefix='flumotion')

        # convert to a dict of (worker pid, component name) -> component pid
        components = {}
        for p in processes.values():
            if not p.cmd.startswith('flumotion-job'):
                continue

            # ignore workerPid 1, which is init - see orphaned for that
            if p.ppid == 1:
                continue

            t = (p.ppid, p.component)
            if not t in components.keys():
                components[t] = []

            components[t].append(str(p.pid))

        # count the number of tuples with more than one component running
        which = [(t, p) for t, p in components.items() if len(p) > 1]
        if not which:
            return util.ok('No multiple component jobs running.')

        l = []
        for (workerPid, component), pids in which:
            l.append('worker %d: component %s (%s)' %
                     (workerPid, component, ", ".join(pids)))

        return util.critical('%d multiple component job(s) running (%s).' %
                             (len(which), ", ".join(l)))
Esempio n. 4
0
 def _detect_flipflops(self, component_state):
     f = FlipFlopDetector(self.timeout, self.flipflops, self.mood_a,
                          self.mood_b, component_state)
     f.start()
     d = f.wait()
     return d.addCallbacks(util.ok, lambda f:
                               util.critical(f.getErrorMessage()))
Esempio n. 5
0
    def do(self, args):
        processes = getProcesses(prefix='flumotion')

        # convert to a dict of (worker pid, component name) -> component pid
        components = {}
        for p in processes.values():
            if not p.cmd.startswith('flumotion-job'):
                continue

            # ignore workerPid 1, which is init - see orphaned for that
            if p.ppid == 1:
                continue

            t = (p.ppid, p.component)
            if not t in components.keys():
                components[t] = []

            components[t].append(str(p.pid))

        # count the number of tuples with more than one component running
        which = [(t, p) for t, p in components.items() if len(p) > 1]
        if not which:
            return util.ok('No multiple component jobs running.')

        l = []
        for (workerPid, component), pids in which:
            l.append('worker %d: component %s (%s)' % (
                workerPid, component, ", ".join(pids)))

        return util.critical('%d multiple component job(s) running (%s).' % (
            len(which), ", ".join(l)))
Esempio n. 6
0
 def _detect_flipflops(self, component_state):
     f = FlipFlopDetector(self.timeout, self.flipflops, self.mood_a,
                          self.mood_b, component_state)
     f.start()
     d = f.wait()
     return d.addCallbacks(util.ok,
                           lambda f: util.critical(f.getErrorMessage()))
Esempio n. 7
0
    def do(self, args):
        which = getMultiple('flumotion-worke')
        if which:
            return util.critical(
                '%d worker service(s) running more than once (%s)' % (
                    len(which), ", ".join(which)))

        return util.ok('no worker services running more than once')
Esempio n. 8
0
    def do(self, args):
        which = getMultiple('flumotion-worke')
        if which:
            return util.critical(
                '%d worker service(s) running more than once (%s)' %
                (len(which), ", ".join(which)))

        return util.ok('no worker services running more than once')
Esempio n. 9
0
    def do(self, args):
        # get a list of pid, vsize and sort on vsize in reverse order
        processes = getProcesses(prefix='flumotion-job')
        orphaned = [str(pid) for pid, p in processes.items() if p.ppid == 1]
        if not orphaned:
            return util.ok('No orphaned job processes running.')

        return util.critical('%d orphaned job process(es) running (%s).' % (
            len(orphaned), ", ".join(orphaned)))
Esempio n. 10
0
    def do(self, args):
        # get a list of pid, vsize and sort on vsize in reverse order
        processes = getProcesses(prefix='flumotion-job')
        orphaned = [str(pid) for pid, p in processes.items() if p.ppid == 1]
        if not orphaned:
            return util.ok('No orphaned job processes running.')

        return util.critical('%d orphaned job process(es) running (%s).' %
                             (len(orphaned), ", ".join(orphaned)))
Esempio n. 11
0
    def do(self, args):
        if not args:
            return util.unknown('Please specify a log file to check.')
        if len(args) > 1:
            return util.unknown('Please specify only one log file to check.')

        command = "grep '%s' %s | tail -n 1" % (
            self.options.string, " ".join(args))
        self.debug('executing %s' % command)
        output = commands.getoutput(command)
        self.debug('output: %s' % output)

        if not output:
            return util.unknown('Could not find string %s in log file' %
                self.options.string)

        level = output[:5].strip()
        if level not in ['ERROR', 'WARN', 'INFO', 'DEBUG', 'LOG']:
            return util.unknown("Last line is not a log line: '%s'" % output)

        # matches flumotion.extern.log.log
        # level   pid     object   cat      time
        # 5 + 1 + 7 + 1 + 32 + 1 + 17 + 1 + 15 == 80
        position = 5 + 1 + 7 + 1 + 32 + 1 + 17 + 1

        # log timestrings are currently in local time, which might be a mistake
        timestring = output[position:position + 15]
        timetuple = time.strptime(timestring, "%b %d %H:%M:%S")
        now = time.time()
        nowtuple = time.localtime(now)

        # since the year does not get logged, assume the log line is from this
        # year, or last year if the delta becomes negative
        timelist = list(timetuple)
        timelist[0] = nowtuple[0]
        if time.mktime(tuple(timelist)) > time.mktime(nowtuple):
            self.debug('timestamp is past now, so assume it is from last year')
            timelist[0] = nowtuple[0] - 1

        # mktime also works in local time, which hopefully matches the log's
        # local time
        timestamp = time.mktime(tuple(timelist))
        delta = now - int(timestamp)

        msg = 'Last log line%s is %s old.' % (
            self.options.string and " with '%s'" % self.options.string or '',
            formatting.formatTime(delta, fractional=2))
        if delta > int(self.options.critical):
            return util.critical(msg)
        elif delta > int(self.options.warning):
            return util.warning(msg)
        else:
            return util.ok(msg)
Esempio n. 12
0
    def do(self, args):
        if not args:
            return util.unknown('Please specify a log file to check.')
        if len(args) > 1:
            return util.unknown('Please specify only one log file to check.')

        command = "grep '%s' %s | tail -n 1" % (self.options.string,
                                                " ".join(args))
        self.debug('executing %s' % command)
        output = commands.getoutput(command)
        self.debug('output: %s' % output)

        if not output:
            return util.unknown('Could not find string %s in log file' %
                                self.options.string)

        level = output[:5].strip()
        if level not in ['ERROR', 'WARN', 'INFO', 'DEBUG', 'LOG']:
            return util.unknown("Last line is not a log line: '%s'" % output)

        # matches flumotion.extern.log.log
        # level   pid     object   cat      time
        # 5 + 1 + 7 + 1 + 32 + 1 + 17 + 1 + 15 == 80
        position = 5 + 1 + 7 + 1 + 32 + 1 + 17 + 1

        # log timestrings are currently in local time, which might be a mistake
        timestring = output[position:position + 15]
        timetuple = time.strptime(timestring, "%b %d %H:%M:%S")
        now = time.time()
        nowtuple = time.localtime(now)

        # since the year does not get logged, assume the log line is from this
        # year, or last year if the delta becomes negative
        timelist = list(timetuple)
        timelist[0] = nowtuple[0]
        if time.mktime(tuple(timelist)) > time.mktime(nowtuple):
            self.debug('timestamp is past now, so assume it is from last year')
            timelist[0] = nowtuple[0] - 1

        # mktime also works in local time, which hopefully matches the log's
        # local time
        timestamp = time.mktime(tuple(timelist))
        delta = now - int(timestamp)

        msg = 'Last log line%s is %s old.' % (
            self.options.string and " with '%s'" % self.options.string
            or '', formatting.formatTime(delta, fractional=2))
        if delta > int(self.options.critical):
            return util.critical(msg)
        elif delta > int(self.options.warning):
            return util.warning(msg)
        else:
            return util.ok(msg)
Esempio n. 13
0
        def gotPlanetStateCb(result):
            self.debug("gotPlanetStateCb")
            c = util.findComponent(result, self._component)
            if not c:
                return util.unknown("Could not find component %s" % self._component)

            moodValue = c.get("mood")
            moodName = planet.moods.get(moodValue).name

            if moodName in self._critical:
                return util.critical("Component %s is %s" % (self._component, moodName))

            if moodName in self._warning:
                return util.warning("Component %s is %s" % (self._component, moodName))

            return util.ok("Component %s is %s" % (self._component, moodName))
Esempio n. 14
0
        def gotPlanetStateCb(result):
            self.debug('gotPlanetStateCb')
            c = util.findComponent(result, self._component)
            if not c:
                return util.unknown('Could not find component %s' %
                                    self._component)

            moodValue = c.get('mood')
            moodName = planet.moods.get(moodValue).name

            if moodName in self._critical:
                return util.critical('Component %s is %s' %
                                     (self._component, moodName))

            if moodName in self._warning:
                return util.warning('Component %s is %s' %
                                    (self._component, moodName))

            return util.ok('Component %s is %s' % (self._component, moodName))
Esempio n. 15
0
    def do(self, args):
        # get a list of pid, vsize and sort on vsize in reverse order
        l = []
        processes = getProcesses(prefix=self.prefix)
        if not processes:
            return util.ok('No job processes running.')

        for process in processes.values():
            l.append((process.pid, process.vsize))

        l.sort(key=lambda t: t[1])
        l.reverse()

        # check the one with the mostest
        pid, vsize = l[0]

        warning = parseSize(self.options.warning)
        critical = parseSize(self.options.critical)

        if vsize >= critical:
            # count number of critical jobs
            which = [t for t in l if t[1] >= critical]
            return util.critical(
                '%d %s(s) above critical level - highest is %d at %s' %
                (len(which), self.process_type, pid,
                 formatting.formatStorage(vsize)))

        if vsize >= warning:
            # count number of warning jobs
            which = [t for t in l if t[1] >= warning]
            return util.warning(
                '%d %s(s) above warning level - highest is %d at %s' %
                (len(which), self.process_type, pid,
                 formatting.formatStorage(vsize)))

        return util.ok(
            'No %s processes above warning level '
            '(highest is %d at %s)' %
            (self.process_type, pid, formatting.formatStorage(vsize)))
Esempio n. 16
0
    def do(self, args):
        # get a list of pid, vsize and sort on vsize in reverse order
        l = []
        processes = getProcesses(prefix=self.prefix)
        if not processes:
            return util.ok('No job processes running.')

        for process in processes.values():
            l.append((process.pid, process.vsize))

        l.sort(key=lambda t: t[1])
        l.reverse()

        # check the one with the mostest
        pid, vsize = l[0]

        warning = parseSize(self.options.warning)
        critical = parseSize(self.options.critical)

        if vsize >= critical:
            # count number of critical jobs
            which = [t for t in l if t[1] >= critical]
            return util.critical(
                '%d %s(s) above critical level - highest is %d at %s' % (
                    len(which), self.process_type, pid,
                    formatting.formatStorage(vsize)))

        if vsize >= warning:
            # count number of warning jobs
            which = [t for t in l if t[1] >= warning]
            return util.warning(
                '%d %s(s) above warning level - highest is %d at %s' % (
                    len(which), self.process_type, pid,
                    formatting.formatStorage(vsize)))

        return util.ok('No %s processes above warning level '
            '(highest is %d at %s)' % (
                self.process_type, pid, formatting.formatStorage(vsize)))
Esempio n. 17
0
 def failure(result):
     util.critical('Error: %s' % result)
     reactor.stop()
Esempio n. 18
0
 def failure(result):
     util.critical('Error: %s' % result)
     reactor.stop()
Esempio n. 19
0
 def noauthenticate(result):
     util.critical('Error: %s' % result)
     reactor.stop()
Esempio n. 20
0
 def critical(self, message):
     return util.critical('%s: %s [dump at %s]' %
         (self._url, message, self._tmpfile))
Esempio n. 21
0
 def _connectedEb(self, failure):
     if failure.check(errors.ConnectionFailedError):
         util.unknown("Unable to connect to manager.")
     if failure.check(errors.ConnectionRefusedError):
         util.critical("Manager refused connection.")
     self.managerDeferred.errback(failure)
Esempio n. 22
0
 def noauthenticate(result):
     util.critical('Error: %s' % result)
     reactor.stop()
Esempio n. 23
0
 def critical(self, message):
     return util.critical('%s: %s [dump at %s]' %
                          (self._url, message, self._tmpfile))