Esempio n. 1
0
    def prologue(self):
        """Checks the options given for settings and takes appropriate action.

        See _merge_options for the format.

        - if nagios_report is set, creates a SimpleNagios instance and prints the report.
        - if ha is set, checks if running on the correct host, set the appropriate nagios message and bail if not.
        - if locking_filename is set, take a lock. If the lock fails, bork and set the nagios exit accordingly.
        """

        # bail if nagios report is requested
        self.nagios_reporter = SimpleNagios(
            _cache=self.options.nagios_check_filename,
            _report_and_exit=self.options.nagios_report,
            _threshold=self.options.nagios_check_interval_threshold,
            _cache_user=self.options.nagios_user,
        )

        # check for HA host
        if self.options.ha and not proceed_on_ha_service(self.options.ha):
            self.log.warning(
                "Not running on the target host %s in the HA setup. Stopping."
                % (self.options.ha, ))
            self.nagios_reporter.ok("Not running on the HA master.")
            sys.exit(NAGIOS_EXIT_OK)

        if not self.options.disable_locking and not self.options.dry_run:
            self.lockfile = TimestampedPidLockfile(
                self.options.locking_filename,
                threshold=self.options.nagios_check_interval_threshold * 2)
            lock_or_bork(self.lockfile, self.nagios_reporter)

        self.log.info("%s has started" % (_script_name(sys.argv[0])))
Esempio n. 2
0
    def test_cache(self):
        """Test the caching"""
        (handle, filename) = tempfile.mkstemp()
        os.unlink(filename)

        n = SimpleNagios(_cache=filename, _cache_user=self.nagios_user)
        message = "mywarning"
        n.warning(message)
        os.close(handle)

        self.buffo.seek(0)
        self.buffo.truncate(0)

        raised_exception = None
        try:
            reporter_test = NagiosReporter('test_cache', filename, -1,
                                           self.nagios_user)
            reporter_test.report_and_exit()
        except SystemExit as err:
            raised_exception = err
        bo = self.buffo.getvalue().rstrip()

        self.assertEqual(bo, "WARNING %s" % message)
        self.assertEqual(raised_exception.code, NAGIOS_EXIT_WARNING[0])

        statres = os.stat(filename)

        self.assertFalse(statres.st_mode & stat.S_IROTH)
Esempio n. 3
0
    def test_eval(self):
        """Test the evaluation of the warning/critical level."""

        nagios = SimpleNagios(
            foo=100,
            foo_critical=90,
            bar=20,
        )

        nagios._eval()
Esempio n. 4
0
    def test_eval(self):
        """Test the evaluation of the warning/critical level."""

        nagios = SimpleNagios(
            foo=100,
            foo_critical=90,
            bar=20,
        )

        nagios._eval()
Esempio n. 5
0
    def prologue(self):
        """Checks the options given for settings and takes appropriate action.

        See _merge_options for the format.

        - if nagios_report is set, creates a SimpleNagios instance and prints the report.
        - if ha is set, checks if running on the correct host, set the appropriate nagios message and bail if not.
        - if locking_filename is set, take a lock. If the lock fails, bork and set the nagios exit accordingly.
        """

        # bail if nagios report is requested
        self.nagios_reporter = SimpleNagios(_cache=self.options.nagios_check_filename,
                                            _report_and_exit=self.options.nagios_report,
                                            _threshold=self.options.nagios_check_interval_threshold,
                                            _cache_user=self.options.nagios_user,
                                            )

        # check for HA host
        if self.options.ha and not proceed_on_ha_service(self.options.ha):
            self.log.warning("Not running on the target host %s in the HA setup. Stopping." % (self.options.ha,))
            self.nagios_reporter.ok("Not running on the HA master.")
            sys.exit(NAGIOS_EXIT_OK)

        if not self.options.disable_locking and not self.options.dry_run:
            self.lockfile = TimestampedPidLockfile(self.options.locking_filename,
                                                   threshold=self.options.nagios_check_interval_threshold * 2)
            lock_or_bork(self.lockfile, self.nagios_reporter)

        self.log.info("%s has started" % (_script_name(sys.argv[0])))
Esempio n. 6
0
    def test_cache(self):
        """Test the caching"""
        (handle, filename) = tempfile.mkstemp()
        os.unlink(filename)

        n = SimpleNagios(_cache=filename, _cache_user=self.nagios_user)
        message = "mywarning"
        n.warning(message)
        os.close(handle)

        self.buffo.seek(0)
        self.buffo.truncate(0)

        try:
            reporter_test = NagiosReporter('test_cache', filename, -1, self.nagios_user)
            reporter_test.report_and_exit()
        except SystemExit, e:
            pass
Esempio n. 7
0
    def test_world_readable(self):
        """Test world readable cache"""
        (handle, filename) = tempfile.mkstemp()
        os.unlink(filename)

        n = SimpleNagios(_cache=filename, _cache_user=self.nagios_user, _world_readable=True)
        n.ok("test")
        os.close(handle)

        try:
            reporter_test = NagiosReporter('test_cache', filename, -1, self.nagios_user)
            reporter_test.report_and_exit()
        except SystemExit:
            pass

        statres = os.stat(filename)

        self.assertTrue(statres.st_mode & stat.S_IROTH)
Esempio n. 8
0
    def _basic_test_single_instance(self, kwargs, message, nagios_exit):
        """Basic test"""

        self.buffo.seek(0)
        self.buffo.truncate(0)

        try:
            SimpleNagios(**kwargs)
        except SystemExit, e:
            pass
Esempio n. 9
0
    def test_cache(self):
        """Test the caching"""
        (handle, filename) = tempfile.mkstemp()
        os.unlink(filename)

        n = SimpleNagios(_cache=filename, _cache_user=self.nagios_user)
        message = "mywarning"
        n.warning(message)
        os.close(handle)

        self.buffo.seek(0)
        self.buffo.truncate(0)

        try:
            reporter_test = NagiosReporter('test_cache', filename, -1,
                                           self.nagios_user)
            reporter_test.report_and_exit()
        except SystemExit, e:
            pass
Esempio n. 10
0
    def test_world_readable(self):
        """Test world readable cache"""
        (handle, filename) = tempfile.mkstemp()
        os.unlink(filename)

        n = SimpleNagios(_cache=filename,
                         _cache_user=self.nagios_user,
                         _world_readable=True)
        n.ok("test")
        os.close(handle)

        try:
            reporter_test = NagiosReporter('test_cache', filename, -1,
                                           self.nagios_user)
            reporter_test.report_and_exit()
        except SystemExit:
            pass

        statres = os.stat(filename)

        self.assertTrue(statres.st_mode & stat.S_IROTH)
Esempio n. 11
0
def main():
    """Main function"""
    options = {
        'nagios_check_filename': ('filename of where the nagios check data is stored', str, 'store', NAGIOS_CHECK_FILENAME),
        'nagios_check_interval_threshold': ('threshold of nagios checks timing out', None, 'store', NAGIOS_CHECK_INTERVAL_THRESHOLD),
        'hosts': ('the hosts/clusters that should be contacted for job information', None, 'extend', []),
        'location': ('the location for storing the pickle file: gengar, muk', str, 'store', 'gengar'),
        'ha': ('high-availability master IP address', None, 'store', None),
        'dry-run': ('do not make any updates whatsoever', None, 'store_true', False),
    }

    opts = simple_option(options)

    nag = SimpleNagios(_cache=NAGIOS_CHECK_FILENAME)

    if opts.options.ha and not proceed_on_ha_service(opts.options.ha):
        _log.info("Not running on the target host in the HA setup. Stopping.")
        nag.ok("Not running on the HA master.")
    else:
        # parse config file
        clusters = {}
        for host in opts.options.hosts:
            master = opts.configfile_parser.get(host, "master")
            showq_path = opts.configfile_parser.get(host, "showq_path")
            mjobctl_path = opts.configfile_parser.get(host, "mjobctl_path")
            clusters[host] = {
                'master': master,
                'spath': showq_path,
                'mpath': mjobctl_path,
            }

        # process the new and previous data
        released_jobids, stats = process_hold(clusters, dry_run=opts.options.dry_run)

        # nagios state
        stats.update(RELEASEJOB_LIMITS)
        stats['message'] = "released %s jobs in hold" % len(released_jobids)
        nag._eval_and_exit(**stats)

    _log.info("Cached nagios state: %s %s" % (nag._final_state[0][1], nag._final_state[1]))
Esempio n. 12
0
    def _basic_test_single_instance_and_exit(self, fn, msg, message,
                                             nagios_exit):
        """Basic test"""

        self.buffo.seek(0)
        self.buffo.truncate(0)

        n = SimpleNagios()
        f = getattr(n, fn)
        try:
            f(msg)
        except SystemExit, e:
            pass
Esempio n. 13
0
    def _basic_test_single_instance(self, kwargs, message, nagios_exit):
        """Basic test"""

        self.buffo.seek(0)
        self.buffo.truncate(0)

        raised_exception = None
        try:
            SimpleNagios(**kwargs)
        except SystemExit as err:
            raised_exception = err

        bo = self.buffo.getvalue().rstrip()

        self.assertEqual(bo, message)
        self.assertEqual(raised_exception.code, nagios_exit[0])
Esempio n. 14
0
    def _basic_test_single_instance_and_exit(self, fn, msg, message,
                                             nagios_exit):
        """Basic test"""

        self.buffo.seek(0)
        self.buffo.truncate(0)

        nagios = SimpleNagios()
        func = getattr(nagios, fn)

        raised_exception = None
        try:
            func(msg)
        except SystemExit as err:
            raised_exception = err

        bo = self.buffo.getvalue().rstrip()

        self.assertEqual(bo, message)
        self.assertEqual(raised_exception.code, nagios_exit[0])
Esempio n. 15
0
class ExtendedSimpleOption(SimpleOption):
    """
    Extends the SimpleOption class to allow other checks to occur at script prologue and epilogue.

    - nagios reporting
    - checking if running on the designated HA host
    - locking on a file

    The prologue should be called at the start of the script; the epilogue at the end.
    """

    def __init__(self, options, run_prologue=True, excepthook=None, **kwargs):
        """Initialise.

        If run_prologue is True (default), we immediately execute the prologue.

        Note that if taking a lock is requested (default), and the lock cannot be
        acquire for some reason, the program will exit,
        """

        options_ = _merge_options(options)
        super(ExtendedSimpleOption, self).__init__(options_, **kwargs)

        self.nagios_reporter = None
        self.lockfile = None

        if run_prologue:
            self.prologue()

        if not excepthook:
            sys.excepthook = self.critical_exception_handler
        else:
            sys.excepthook = excepthook

        self.log = fancylogger.getLogger()

    def prologue(self):
        """Checks the options given for settings and takes appropriate action.

        See _merge_options for the format.

        - if nagios_report is set, creates a SimpleNagios instance and prints the report.
        - if ha is set, checks if running on the correct host, set the appropriate nagios message and bail if not.
        - if locking_filename is set, take a lock. If the lock fails, bork and set the nagios exit accordingly.
        """

        # bail if nagios report is requested
        self.nagios_reporter = SimpleNagios(_cache=self.options.nagios_check_filename,
                                            _report_and_exit=self.options.nagios_report,
                                            _threshold=self.options.nagios_check_interval_threshold,
                                            _cache_user=self.options.nagios_user,
                                            )

        # check for HA host
        if self.options.ha and not proceed_on_ha_service(self.options.ha):
            self.log.warning("Not running on the target host %s in the HA setup. Stopping." % (self.options.ha,))
            self.nagios_reporter.ok("Not running on the HA master.")
            sys.exit(NAGIOS_EXIT_OK)

        if not self.options.disable_locking and not self.options.dry_run:
            self.lockfile = TimestampedPidLockfile(self.options.locking_filename,
                                                   threshold=self.options.nagios_check_interval_threshold * 2)
            lock_or_bork(self.lockfile, self.nagios_reporter)

        self.log.info("%s has started" % (_script_name(sys.argv[0])))

    def _epilogue(self):
        if not self.options.disable_locking and not self.options.dry_run:
            release_or_bork(self.lockfile, self.nagios_reporter)

    def epilogue(self, nagios_message, nagios_thresholds=None):
        """Run at the end of a script, quitting gracefully if possible."""
        if nagios_thresholds is None:
            nagios_thresholds = {}

        self._epilogue()

        nagios_thresholds['message'] = nagios_message
        self.nagios_reporter._eval_and_exit(**nagios_thresholds)
        self.log.info("%s has finished" % (_script_name(sys.argv[0])))  # may not be reached

    def ok(self, nagios_message):
        """Run at the end of a script and force an OK exit."""
        self._epilogue()
        self.nagios_reporter.ok(nagios_message)

    def warning(self, nagios_message):
        """Run at the end of a script and force a Warning exit."""
        self._epilogue()
        self.nagios_reporter.warning(nagios_message)

    def critical(self, nagios_message):
        """Run at the end of a script and force a Critical exit."""
        self._epilogue()
        self.nagios_reporter.critical(nagios_message)

    def critical_exception_handler(self, tp, value, traceback):
        """
        Run at the end of a script and force a Critical exit.

        This function is meant to be used as sys.excepthook
        """
        self.log.exception("unhandled exception detected: %s - %s", tp, value)
        self.log.debug("traceback %s", traceback)
        message = "Script failure: %s - %s" % (tp, value)
        sys.exc_clear()
        self.critical(message)
Esempio n. 16
0
class ExtendedSimpleOption(SimpleOption):
    """
    Extends the SimpleOption class to allow other checks to occur at script prologue and epilogue.

    - nagios reporting
    - checking if running on the designated HA host
    - locking on a file

    The prologue should be called at the start of the script; the epilogue at the end.
    """
    def __init__(self, options, run_prologue=True, excepthook=None, **kwargs):
        """Initialise.

        If run_prologue is True (default), we immediately execute the prologue.

        Note that if taking a lock is requested (default), and the lock cannot be
        acquire for some reason, the program will exit,
        """

        options_ = _merge_options(options)
        super(ExtendedSimpleOption, self).__init__(options_, **kwargs)

        self.nagios_reporter = None
        self.lockfile = None

        if run_prologue:
            self.prologue()

        if not excepthook:
            sys.excepthook = self.critical_exception_handler
        else:
            sys.excepthook = excepthook

        self.log = fancylogger.getLogger()

    def prologue(self):
        """Checks the options given for settings and takes appropriate action.

        See _merge_options for the format.

        - if nagios_report is set, creates a SimpleNagios instance and prints the report.
        - if ha is set, checks if running on the correct host, set the appropriate nagios message and bail if not.
        - if locking_filename is set, take a lock. If the lock fails, bork and set the nagios exit accordingly.
        """

        # bail if nagios report is requested
        self.nagios_reporter = SimpleNagios(
            _cache=self.options.nagios_check_filename,
            _report_and_exit=self.options.nagios_report,
            _threshold=self.options.nagios_check_interval_threshold,
            _cache_user=self.options.nagios_user,
        )

        # check for HA host
        if self.options.ha and not proceed_on_ha_service(self.options.ha):
            self.log.warning(
                "Not running on the target host %s in the HA setup. Stopping."
                % (self.options.ha, ))
            self.nagios_reporter.ok("Not running on the HA master.")
            sys.exit(NAGIOS_EXIT_OK)

        if not self.options.disable_locking and not self.options.dry_run:
            self.lockfile = TimestampedPidLockfile(
                self.options.locking_filename,
                threshold=self.options.nagios_check_interval_threshold * 2)
            lock_or_bork(self.lockfile, self.nagios_reporter)

        self.log.info("%s has started" % (_script_name(sys.argv[0])))

    def _epilogue(self):
        if not self.options.disable_locking and not self.options.dry_run:
            release_or_bork(self.lockfile, self.nagios_reporter)

    def epilogue(self, nagios_message, nagios_thresholds=None):
        """Run at the end of a script, quitting gracefully if possible."""
        if nagios_thresholds is None:
            nagios_thresholds = {}

        self._epilogue()

        nagios_thresholds['message'] = nagios_message
        self.nagios_reporter._eval_and_exit(**nagios_thresholds)
        self.log.info("%s has finished" %
                      (_script_name(sys.argv[0])))  # may not be reached

    def ok(self, nagios_message):
        """Run at the end of a script and force an OK exit."""
        self._epilogue()
        self.nagios_reporter.ok(nagios_message)

    def warning(self, nagios_message):
        """Run at the end of a script and force a Warning exit."""
        self._epilogue()
        self.nagios_reporter.warning(nagios_message)

    def critical(self, nagios_message):
        """Run at the end of a script and force a Critical exit."""
        self._epilogue()
        self.nagios_reporter.critical(nagios_message)

    def critical_exception_handler(self, tp, value, traceback):
        """
        Run at the end of a script and force a Critical exit.

        This function is meant to be used as sys.excepthook
        """
        self.log.exception("unhandled exception detected: %s - %s", tp, value)
        self.log.debug("traceback %s", traceback)
        message = "Script failure: %s - %s" % (tp, value)
        sys.exc_clear()
        self.critical(message)