Example #1
0
    def __init__(self):
        self.config.populate()
        threading.current_thread(
        ).name = self.config.name  # rename main thread

        self.attach_signals()

        # init database connections different ones for different threads
        self._db = LibreNMS.DB(self.config)  # main
        self._services_db = LibreNMS.DB(self.config)  # services dispatch
        self._discovery_db = LibreNMS.DB(self.config)  # discovery dispatch

        self._lm = self.create_lock_manager()
        self.daily_timer = LibreNMS.RecurringTimer(
            self.config.update_frequency, self.run_maintenance, 'maintenance')
        self.stats_timer = LibreNMS.RecurringTimer(
            self.config.poller.frequency, self.log_performance_stats,
            'performance')
        self.is_master = False

        self.performance_stats = {
            'poller': PerformanceCounter(),
            'discovery': PerformanceCounter(),
            'services': PerformanceCounter()
        }
Example #2
0
    def __init__(self):
        self.start_time = time.time()
        self.config.populate()
        self._db = LibreNMS.DB(self.config)
        self.config.load_poller_config(self._db)

        threading.current_thread(
        ).name = self.config.name  # rename main thread
        self.attach_signals()

        self._lm = self.create_lock_manager()
        self.daily_timer = LibreNMS.RecurringTimer(
            self.config.update_frequency, self.run_maintenance, 'maintenance')
        self.stats_timer = LibreNMS.RecurringTimer(
            self.config.poller.frequency, self.log_performance_stats,
            'performance')
        if self.config.watchdog_enabled:
            info("Starting watchdog timer for log file: {}".format(
                self.config.watchdog_logfile))
            self.watchdog_timer = LibreNMS.RecurringTimer(
                self.config.poller.frequency, self.logfile_watchdog,
                'watchdog')
        else:
            info("Watchdog is disabled.")
        self.is_master = False
Example #3
0
    def __init__(self, config, lock_manager):
        """
        A TimedQueueManager to manage dispatch and workers for Alerts

        :param config: LibreNMS.ServiceConfig reference to the service config object
        :param lock_manager: the single instance of lock manager
        """
        TimedQueueManager.__init__(self, config, lock_manager, "alerting")
        self._db = LibreNMS.DB(self.config)
Example #4
0
    def __init__(self):
        self.config.populate()
        threading.current_thread().name = self.config.name  # rename main thread

        self.attach_signals()

        self._db = LibreNMS.DB(self.config)

        self._lm = self.create_lock_manager()
        self.daily_timer = LibreNMS.RecurringTimer(self.config.update_frequency, self.run_maintenance, 'maintenance')
        self.stats_timer = LibreNMS.RecurringTimer(self.config.poller.frequency, self.log_performance_stats, 'performance')
        self.is_master = False
Example #5
0
    def __init__(self, config, lock_manager, auto_start=True):
        """
        A TimedQueueManager to manage dispatch and workers for Alerts

        :param config: LibreNMS.ServiceConfig reference to the service config object
        :param lock_manager: the single instance of lock manager
        :param auto_start: automatically start worker threads
        """
        TimedQueueManager.__init__(self,
                                   config,
                                   lock_manager,
                                   'alerting',
                                   auto_start=auto_start)
        self._db = LibreNMS.DB(self.config)
Example #6
0
def wrapper(
    wrapper_type,  # Type: str
    amount_of_workers,  # Type: int
    config,  # Type: dict
    log_dir,  # Type: str
    _debug=False,  # Type: bool
):  # -> None
    """
    Actual code that runs various php scripts, in single node mode or distributed poller mode
    """

    global MEMC
    global IS_NODE
    global DISTRIBUTED_POLLING
    global MASTER_TAG
    global NODES_TAG
    global TIME_TAG
    global STEPPING

    # Setup wrapper dependent variables
    STEPPING = wrappers[wrapper_type]["stepping"]
    if wrapper_type == "poller":
        if "rrd" in config and "step" in config["rrd"]:
            STEPPING = config["rrd"]["step"]
        TIME_TAG = "." + str(get_time_tag(STEPPING))

    MASTER_TAG = "{}.master{}".format(wrapper_type, TIME_TAG)
    NODES_TAG = "{}.nodes{}".format(wrapper_type, TIME_TAG)

    #  <<<EOC
    if "distributed_poller_group" in config:
        poller_group = str(config["distributed_poller_group"])
    else:
        poller_group = False

    if (
        "distributed_poller" in config
        and "distributed_poller_memcached_host" in config
        and "distributed_poller_memcached_port" in config
        and config["distributed_poller"]
    ):
        try:
            import memcache

            MEMC = memcache.Client(
                [
                    config["distributed_poller_memcached_host"]
                    + ":"
                    + str(config["distributed_poller_memcached_port"])
                ]
            )
            if str(MEMC.get(MASTER_TAG)) == config["distributed_poller_name"]:
                logger.info("This system is already joined as the service master.")
                sys.exit(2)
            if memc_alive(wrapper_type):
                if MEMC.get(MASTER_TAG) is None:
                    logger.info("Registered as Master")
                    MEMC.set(MASTER_TAG, config["distributed_poller_name"], 10)
                    MEMC.set(NODES_TAG, 0, wrappers[wrapper_type]["nodes_stepping"])
                    IS_NODE = False
                else:
                    logger.info(
                        "Registered as Node joining Master {}".format(
                            MEMC.get(MASTER_TAG)
                        )
                    )
                    IS_NODE = True
                    MEMC.incr(NODES_TAG)
                DISTRIBUTED_POLLING = True
            else:
                logger.warning(
                    "Could not connect to memcached, disabling distributed service checks."
                )
                DISTRIBUTED_POLLING = False
                IS_NODE = False
        except SystemExit:
            raise
        except ImportError:
            logger.critical("ERROR: missing memcache python module:")
            logger.critical("On deb systems: apt-get install python3-memcache")
            logger.critical("On other systems: pip3 install python-memcached")
            logger.critical("Disabling distributed discovery.")
            DISTRIBUTED_POLLING = False
    else:
        DISTRIBUTED_POLLING = False
    # EOC

    s_time = time.time()

    devices_list = []

    if wrapper_type == "service":
        #  <<<EOC
        if poller_group is not False:
            query = (
                "SELECT DISTINCT(services.device_id) FROM services LEFT JOIN devices ON "
                "services.device_id = devices.device_id WHERE devices.poller_group IN({}) AND "
                "devices.disabled = 0".format(poller_group)
            )
        else:
            query = (
                "SELECT DISTINCT(services.device_id) FROM services LEFT JOIN devices ON "
                "services.device_id = devices.device_id WHERE devices.disabled = 0"
            )
        # EOC
    elif wrapper_type in ["discovery", "poller"]:
        """
        This query specificly orders the results depending on the last_discovered_timetaken variable
        Because this way, we put the devices likely to be slow, in the top of the queue
        thus greatening our chances of completing _all_ the work in exactly the time it takes to
        discover the slowest device! cool stuff he
        """
        #  <<<EOC
        if poller_group is not False:
            query = (
                "SELECT device_id FROM devices WHERE poller_group IN ({}) AND "
                "disabled = 0 ORDER BY last_polled_timetaken DESC".format(poller_group)
            )
        else:
            query = "SELECT device_id FROM devices WHERE disabled = 0 ORDER BY last_polled_timetaken DESC"
        # EOC
    else:
        logger.critical("Bogus wrapper type called")
        sys.exit(3)

    sconfig = DBConfig(config)
    db_connection = LibreNMS.DB(sconfig)
    cursor = db_connection.query(query)
    devices = cursor.fetchall()
    for row in devices:
        devices_list.append(int(row[0]))

    #  <<<EOC
    if DISTRIBUTED_POLLING and not IS_NODE:
        query = "SELECT max(device_id),min(device_id) FROM {}".format(
            wrappers[wrapper_type]["table_name"]
        )
        cursor = db_connection.query(query)
        devices = cursor.fetchall()
        maxlocks = devices[0][0] or 0
        minlocks = devices[0][1] or 0
    # EOC

    poll_queue = queue.Queue()
    print_queue = queue.Queue()

    # Don't have more threads than workers
    amount_of_devices = len(devices_list)
    if amount_of_workers > amount_of_devices:
        amount_of_workers = amount_of_devices

    logger.info(
        "starting the {} check at {} with {} threads for {} devices".format(
            wrapper_type,
            time.strftime("%Y-%m-%d %H:%M:%S"),
            amount_of_workers,
            amount_of_devices,
        )
    )

    for device_id in devices_list:
        poll_queue.put(device_id)

    for _ in range(amount_of_workers):
        worker = threading.Thread(
            target=poll_worker,
            kwargs={
                "poll_queue": poll_queue,
                "print_queue": print_queue,
                "config": config,
                "log_dir": log_dir,
                "wrapper_type": wrapper_type,
                "debug": _debug,
            },
        )
        worker.setDaemon(True)
        worker.start()

    pworker = threading.Thread(
        target=print_worker,
        kwargs={"print_queue": print_queue, "wrapper_type": wrapper_type},
    )
    pworker.setDaemon(True)
    pworker.start()

    try:
        poll_queue.join()
        print_queue.join()
    except (KeyboardInterrupt, SystemExit):
        raise

    total_time = int(time.time() - s_time)

    end_msg = "{}-wrapper checked {} devices in {} seconds with {} workers with {} errors".format(
        wrapper_type, DISCOVERED_DEVICES_COUNT, total_time, amount_of_workers, ERRORS
    )
    if ERRORS == 0:
        logger.info(end_msg)
    else:
        logger.error(end_msg)

    #  <<<EOC
    if DISTRIBUTED_POLLING or memc_alive(wrapper_type):
        master = MEMC.get(MASTER_TAG)
        if master == config["distributed_poller_name"] and not IS_NODE:
            logger.info("Wait for all service-nodes to finish")
            nodes = MEMC.get(NODES_TAG)
            while nodes is not None and nodes > 0:
                try:
                    time.sleep(1)
                    nodes = MEMC.get(NODES_TAG)
                except:
                    pass
            logger.info("Clearing Locks for {}".format(NODES_TAG))
            x = minlocks
            while x <= maxlocks:
                MEMC.delete("{}.device.{}".format(wrapper_type, x))
                x = x + 1
            logger.info("{} Locks Cleared".format(x))
            logger.info("Clearing Nodes")
            MEMC.delete(MASTER_TAG)
            MEMC.delete(NODES_TAG)
        else:
            MEMC.decr(NODES_TAG)
        logger.info("Finished {}.".format(time.strftime("%Y-%m-%d %H:%M:%S")))
    # EOC

    # Update poller statistics
    if wrapper_type == "poller":
        query = "UPDATE pollers SET last_polled=NOW(), devices='{}', time_taken='{}' WHERE poller_name='{}'".format(
            DISCOVERED_DEVICES_COUNT, total_time, config["distributed_poller_name"]
        )
        cursor = db_connection.query(query)
        if cursor.rowcount < 1:
            query = "INSERT INTO pollers SET poller_name='{}', last_polled=NOW(), devices='{}', time_taken='{}'".format(
                config["distributed_poller_name"], DISCOVERED_DEVICES_COUNT, total_time
            )
            db_connection.query(query)

    db_connection.close()

    if total_time > wrappers[wrapper_type]["total_exec_time"]:
        logger.warning(
            "the process took more than {} seconds to finish, you need faster hardware or more threads".format(
                wrappers[wrapper_type]["total_exec_time"]
            )
        )
        logger.warning(
            "in sequential style service checks the elapsed time would have been: {} seconds".format(
                REAL_DURATION
            )
        )
        show_stopper = False
        for device in PER_DEVICE_DURATION:
            if PER_DEVICE_DURATION[device] > wrappers[wrapper_type]["nodes_stepping"]:
                logger.warning(
                    "device {} is taking too long: {} seconds".format(
                        device, PER_DEVICE_DURATION[device]
                    )
                )
                show_stopper = True
        if show_stopper:
            logger.error(
                "Some devices are taking more than {} seconds, the script cannot recommend you what to do.".format(
                    wrappers[wrapper_type]["nodes_stepping"]
                )
            )
        else:
            recommend = int(total_time / STEPPING * amount_of_workers + 1)
            logger.warning(
                "Consider setting a minimum of {} threads. (This does not constitute professional advice!)".format(
                    recommend
                )
            )
        sys.exit(2)