Example #1
0
    def do(self, args):
        processes = getProcesses(prefix='flumotion')

        # convert to a dict of (worker pid, component name) -> component pid
        components = {}
        for p in processes.values():
            if not p.cmd.startswith('flumotion-job'):
                continue

            # ignore workerPid 1, which is init - see orphaned for that
            if p.ppid == 1:
                continue

            t = (p.ppid, p.component)
            if not t in components.keys():
                components[t] = []

            components[t].append(str(p.pid))

        # count the number of tuples with more than one component running
        which = [(t, p) for t, p in components.items() if len(p) > 1]
        if not which:
            return util.ok('No multiple component jobs running.')

        l = []
        for (workerPid, component), pids in which:
            l.append('worker %d: component %s (%s)' % (
                workerPid, component, ", ".join(pids)))

        return util.critical('%d multiple component job(s) running (%s).' % (
            len(which), ", ".join(l)))
Example #2
0
    def do(self, args):
        processes = getProcesses(prefix='flumotion')

        # convert to a dict of (worker pid, component name) -> component pid
        components = {}
        for p in processes.values():
            if not p.cmd.startswith('flumotion-job'):
                continue

            # ignore workerPid 1, which is init - see orphaned for that
            if p.ppid == 1:
                continue

            t = (p.ppid, p.component)
            if not t in components.keys():
                components[t] = []

            components[t].append(str(p.pid))

        # count the number of tuples with more than one component running
        which = [(t, p) for t, p in components.items() if len(p) > 1]
        if not which:
            return util.ok('No multiple component jobs running.')

        l = []
        for (workerPid, component), pids in which:
            l.append('worker %d: component %s (%s)' %
                     (workerPid, component, ", ".join(pids)))

        return util.critical('%d multiple component job(s) running (%s).' %
                             (len(which), ", ".join(l)))
Example #3
0
    def do(self, args):
        which = getMultiple('flumotion-worke')
        if which:
            return util.critical(
                '%d worker service(s) running more than once (%s)' % (
                    len(which), ", ".join(which)))

        return util.ok('no worker services running more than once')
Example #4
0
    def do(self, args):
        which = getMultiple('flumotion-worke')
        if which:
            return util.critical(
                '%d worker service(s) running more than once (%s)' %
                (len(which), ", ".join(which)))

        return util.ok('no worker services running more than once')
Example #5
0
    def do(self, args):
        # get a list of pid, vsize and sort on vsize in reverse order
        processes = getProcesses(prefix='flumotion-job')
        orphaned = [str(pid) for pid, p in processes.items() if p.ppid == 1]
        if not orphaned:
            return util.ok('No orphaned job processes running.')

        return util.critical('%d orphaned job process(es) running (%s).' % (
            len(orphaned), ", ".join(orphaned)))
Example #6
0
    def do(self, args):
        # get a list of pid, vsize and sort on vsize in reverse order
        processes = getProcesses(prefix='flumotion-job')
        orphaned = [str(pid) for pid, p in processes.items() if p.ppid == 1]
        if not orphaned:
            return util.ok('No orphaned job processes running.')

        return util.critical('%d orphaned job process(es) running (%s).' %
                             (len(orphaned), ", ".join(orphaned)))
Example #7
0
    def do(self, args):
        if not args:
            return util.unknown('Please specify a log file to check.')
        if len(args) > 1:
            return util.unknown('Please specify only one log file to check.')

        command = "grep '%s' %s | tail -n 1" % (self.options.string,
                                                " ".join(args))
        self.debug('executing %s' % command)
        output = commands.getoutput(command)
        self.debug('output: %s' % output)

        if not output:
            return util.unknown('Could not find string %s in log file' %
                                self.options.string)

        level = output[:5].strip()
        if level not in ['ERROR', 'WARN', 'INFO', 'DEBUG', 'LOG']:
            return util.unknown("Last line is not a log line: '%s'" % output)

        # matches flumotion.extern.log.log
        # level   pid     object   cat      time
        # 5 + 1 + 7 + 1 + 32 + 1 + 17 + 1 + 15 == 80
        position = 5 + 1 + 7 + 1 + 32 + 1 + 17 + 1

        # log timestrings are currently in local time, which might be a mistake
        timestring = output[position:position + 15]
        timetuple = time.strptime(timestring, "%b %d %H:%M:%S")
        now = time.time()
        nowtuple = time.localtime(now)

        # since the year does not get logged, assume the log line is from this
        # year, or last year if the delta becomes negative
        timelist = list(timetuple)
        timelist[0] = nowtuple[0]
        if time.mktime(tuple(timelist)) > time.mktime(nowtuple):
            self.debug('timestamp is past now, so assume it is from last year')
            timelist[0] = nowtuple[0] - 1

        # mktime also works in local time, which hopefully matches the log's
        # local time
        timestamp = time.mktime(tuple(timelist))
        delta = now - int(timestamp)

        msg = 'Last log line%s is %s old.' % (
            self.options.string and " with '%s'" % self.options.string
            or '', formatting.formatTime(delta, fractional=2))
        if delta > int(self.options.critical):
            return util.critical(msg)
        elif delta > int(self.options.warning):
            return util.warning(msg)
        else:
            return util.ok(msg)
Example #8
0
    def do(self, args):
        if not args:
            return util.unknown('Please specify a log file to check.')
        if len(args) > 1:
            return util.unknown('Please specify only one log file to check.')

        command = "grep '%s' %s | tail -n 1" % (
            self.options.string, " ".join(args))
        self.debug('executing %s' % command)
        output = commands.getoutput(command)
        self.debug('output: %s' % output)

        if not output:
            return util.unknown('Could not find string %s in log file' %
                self.options.string)

        level = output[:5].strip()
        if level not in ['ERROR', 'WARN', 'INFO', 'DEBUG', 'LOG']:
            return util.unknown("Last line is not a log line: '%s'" % output)

        # matches flumotion.extern.log.log
        # level   pid     object   cat      time
        # 5 + 1 + 7 + 1 + 32 + 1 + 17 + 1 + 15 == 80
        position = 5 + 1 + 7 + 1 + 32 + 1 + 17 + 1

        # log timestrings are currently in local time, which might be a mistake
        timestring = output[position:position + 15]
        timetuple = time.strptime(timestring, "%b %d %H:%M:%S")
        now = time.time()
        nowtuple = time.localtime(now)

        # since the year does not get logged, assume the log line is from this
        # year, or last year if the delta becomes negative
        timelist = list(timetuple)
        timelist[0] = nowtuple[0]
        if time.mktime(tuple(timelist)) > time.mktime(nowtuple):
            self.debug('timestamp is past now, so assume it is from last year')
            timelist[0] = nowtuple[0] - 1

        # mktime also works in local time, which hopefully matches the log's
        # local time
        timestamp = time.mktime(tuple(timelist))
        delta = now - int(timestamp)

        msg = 'Last log line%s is %s old.' % (
            self.options.string and " with '%s'" % self.options.string or '',
            formatting.formatTime(delta, fractional=2))
        if delta > int(self.options.critical):
            return util.critical(msg)
        elif delta > int(self.options.warning):
            return util.warning(msg)
        else:
            return util.ok(msg)
Example #9
0
    def do(self, args):
        # get a list of pid, vsize and sort on vsize in reverse order
        l = []
        processes = getProcesses(prefix=self.prefix)
        if not processes:
            return util.ok('No job processes running.')

        for process in processes.values():
            l.append((process.pid, process.vsize))

        l.sort(key=lambda t: t[1])
        l.reverse()

        # check the one with the mostest
        pid, vsize = l[0]

        warning = parseSize(self.options.warning)
        critical = parseSize(self.options.critical)

        if vsize >= critical:
            # count number of critical jobs
            which = [t for t in l if t[1] >= critical]
            return util.critical(
                '%d %s(s) above critical level - highest is %d at %s' %
                (len(which), self.process_type, pid,
                 formatting.formatStorage(vsize)))

        if vsize >= warning:
            # count number of warning jobs
            which = [t for t in l if t[1] >= warning]
            return util.warning(
                '%d %s(s) above warning level - highest is %d at %s' %
                (len(which), self.process_type, pid,
                 formatting.formatStorage(vsize)))

        return util.ok(
            'No %s processes above warning level '
            '(highest is %d at %s)' %
            (self.process_type, pid, formatting.formatStorage(vsize)))
Example #10
0
    def do(self, args):
        # get a list of pid, vsize and sort on vsize in reverse order
        l = []
        processes = getProcesses(prefix=self.prefix)
        if not processes:
            return util.ok('No job processes running.')

        for process in processes.values():
            l.append((process.pid, process.vsize))

        l.sort(key=lambda t: t[1])
        l.reverse()

        # check the one with the mostest
        pid, vsize = l[0]

        warning = parseSize(self.options.warning)
        critical = parseSize(self.options.critical)

        if vsize >= critical:
            # count number of critical jobs
            which = [t for t in l if t[1] >= critical]
            return util.critical(
                '%d %s(s) above critical level - highest is %d at %s' % (
                    len(which), self.process_type, pid,
                    formatting.formatStorage(vsize)))

        if vsize >= warning:
            # count number of warning jobs
            which = [t for t in l if t[1] >= warning]
            return util.warning(
                '%d %s(s) above warning level - highest is %d at %s' % (
                    len(which), self.process_type, pid,
                    formatting.formatStorage(vsize)))

        return util.ok('No %s processes above warning level '
            '(highest is %d at %s)' % (
                self.process_type, pid, formatting.formatStorage(vsize)))
Example #11
0
        def gotPlanetStateCb(result):
            self.debug("gotPlanetStateCb")
            c = util.findComponent(result, self._component)
            if not c:
                return util.unknown("Could not find component %s" % self._component)

            moodValue = c.get("mood")
            moodName = planet.moods.get(moodValue).name

            if moodName in self._critical:
                return util.critical("Component %s is %s" % (self._component, moodName))

            if moodName in self._warning:
                return util.warning("Component %s is %s" % (self._component, moodName))

            return util.ok("Component %s is %s" % (self._component, moodName))
Example #12
0
        def gotPlanetStateCb(result):
            self.debug('gotPlanetStateCb')
            c = util.findComponent(result, self._component)
            if not c:
                return util.unknown('Could not find component %s' %
                                    self._component)

            moodValue = c.get('mood')
            moodName = planet.moods.get(moodValue).name

            if moodName in self._critical:
                return util.critical('Component %s is %s' %
                                     (self._component, moodName))

            if moodName in self._warning:
                return util.warning('Component %s is %s' %
                                    (self._component, moodName))

            return util.ok('Component %s is %s' % (self._component, moodName))
Example #13
0
 def ok(self, message):
     # remove tempfile with the stream if all goes ok
     os.remove(self._tmpfile)
     return util.ok('%s: %s' % (self._url, message))
Example #14
0
 def ok(self, message):
     # remove tempfile with the stream if all goes ok
     os.remove(self._tmpfile)
     return util.ok('%s: %s' % (self._url, message))