Ejemplo n.º 1
0
class DefaultManager(object):
    """The default storage manager used by all shadow classes.

    Mostly uses helper methods in shadow classes to perform its work.

    """

    _logger = ipdevpoll.ContextLogger()

    def __init__(self, cls, containers):
        """Creates a storage manager.

        :param cls: The Shadow subclass this instance will work with.
        :param containers: A ContainerRepository instance.

        """
        self.cls = cls
        self.containers = containers

    def prepare(self):
        """Prepares managed shadows in containers"""
        self.cls.prepare_for_save(self.containers)

    def save(self):
        """Saves managed shadows in containers"""
        for obj in self.get_managed():
            obj.save(self.containers)

    def cleanup(self):
        """Runs any necessary cleanup hooks after save is done"""
        self.cls.cleanup_after_save(self.containers)

    def get_managed(self):
        """Returns the list of container objects managed by this instance"""
        if self.cls in self.containers:
            return self.containers[self.cls].values()
        else:
            return []

    def __repr__(self):
        return "%s(%r, %r(...))" % (
            self.__class__.__name__,
            self.cls,
            self.containers.__class__.__name__,
        )
Ejemplo n.º 2
0
    def __init__(mcs, name, bases, dct):
        try:
            shadowclass = dct['__shadowclass__']
        except KeyError:
            shadowclass = dct['__shadowclass__'] = None

        super(MetaShadow, mcs).__init__(name, bases, dct)

        if shadowclass is None:
            return

        _meta = getattr(shadowclass, '_meta')
        setattr(mcs, '_meta', _meta)
        field_names = [f.name for f in _meta.fields]
        setattr(mcs, '_fields', field_names)
        for fname in field_names:
            setattr(mcs, fname, None)

        setattr(mcs, '_logger', ipdevpoll.ContextLogger())
        MetaShadow.shadowed_classes[shadowclass] = mcs
Ejemplo n.º 3
0
class NetboxJobScheduler(object):
    """Netbox job schedule handler.

    An instance of this class takes care of scheduling, running and
    rescheduling of a single JobHandler for a single netbox.

    """
    job_counters = {}
    job_queues = {}
    global_job_queue = []
    global_intensity = config.ipdevpoll_conf.getint('ipdevpoll',
                                                    'max_concurrent_jobs')
    _logger = ipdevpoll.ContextLogger()

    def __init__(self, job, netbox, pool):
        self.job = job
        self.netbox = netbox
        self.pool = pool
        self._log_context = dict(job=job.name, sysname=netbox.sysname)
        self._logger.debug("initializing %r job scheduling for %s", job.name,
                           netbox.sysname)
        self.cancelled = False
        self._deferred = Deferred()
        self._next_call = None
        self._last_job_started_at = 0
        self.running = False
        self._start_time = None
        self._current_job = None
        self.callLater = reactor.callLater

    def get_current_runtime(self):
        """Returns time elapsed since the start of the job as a timedelta."""
        return datetime.datetime.now() - self._start_time

    def start(self):
        """Start polling schedule."""
        self._next_call = self.callLater(0, self.run_job)
        return self._deferred

    def cancel(self):
        """Cancel scheduling of this job for this box.

        Future runs will not be scheduled after this.

        """
        if self.cancelled:
            self._logger.debug("cancel: Job %r already cancelled for %s",
                               self.job.name, self.netbox.sysname)
            return

        if self._next_call.active():
            self._next_call.cancel()
            self._logger.debug("cancel: Job %r cancelled for %s",
                               self.job.name, self.netbox.sysname)
        else:
            self._logger.debug(
                "cancel: Job %r cancelled for %s, "
                "though no next run was scheduled", self.job.name,
                self.netbox.sysname)

        self.cancelled = True
        self.cancel_running_job()
        self._deferred.callback(self)

    def cancel_running_job(self):
        if self._current_job:
            self._logger.debug('Cancelling running job')
            self.pool.cancel(self._current_job)

    def run_job(self, dummy=None):
        if self.is_running():
            self._logger.info(
                "Previous %r job is still running for %s, "
                "not running again now.", self.job.name, self.netbox.sysname)
            return

        if self.is_job_limit_reached():
            self._logger.debug(
                "intensity limit reached for %s - waiting to "
                "run for %s", self.job.name, self.netbox.sysname)
            self.queue_myself(self.get_job_queue())
            return

        if self.is_global_limit_reached():
            self._logger.debug(
                "global intensity limit reached - waiting to "
                "run for %s", self.netbox.sysname)
            self.queue_myself(self.global_job_queue)
            return

        # We're ok to start a polling run.
        try:
            self._start_time = datetime.datetime.now()
            deferred = self.pool.execute_job(self.job.name,
                                             self.netbox.id,
                                             plugins=self.job.plugins,
                                             interval=self.job.interval)
            self._current_job = deferred
        except Exception:
            self._log_unhandled_error(Failure())
            self.reschedule(60)
            return

        self.count_job()
        self._last_job_started_at = time.time()

        deferred.addErrback(self._adjust_intensity_on_snmperror)
        deferred.addCallbacks(self._reschedule_on_success,
                              self._reschedule_on_failure)
        deferred.addErrback(self._log_unhandled_error)

        deferred.addCallback(self._unregister_handler)

    def is_running(self):
        return self.running

    @classmethod
    def _adjust_intensity_on_snmperror(cls, failure):
        if (failure.check(AbortedJobError)
                and isinstance(failure.value.cause, SnmpError)):

            open_sessions = AgentProxy.count_open_sessions()
            new_limit = int(ceil(open_sessions * 0.90))
            if new_limit < cls.global_intensity:
                cls._logger.warning("Setting global intensity limit to %d",
                                    new_limit)
                cls.global_intensity = new_limit
        return failure

    def _update_counters(self, success):
        prefix = metric_prefix_for_ipdevpoll_job(self.netbox.sysname,
                                                 self.job.name)
        counter_path = (prefix +
                        (".success-count" if success else ".failure-count"))
        _COUNTERS.increment(counter_path)
        _COUNTERS.start()

    def _reschedule_on_success(self, result):
        """Reschedules the next normal run of this job."""
        delay = max(0, self.job.interval - self.get_runtime())
        self.reschedule(delay)
        if result:
            self._log_finished_job(True)
        else:
            self._logger.debug("job did nothing")
        self._update_counters(True if result else None)
        return result

    def _reschedule_on_failure(self, failure):
        """Examines the job failure and reschedules the job if needed."""
        if failure.check(SuggestedReschedule):
            delay = int(failure.value.delay)
        else:
            # within 5-10 minutes, but no longer than set interval
            delay = min(self.job.interval, randint(5 * 60, 10 * 60))
        self.reschedule(delay)
        self._log_finished_job(False)
        self._update_counters(False)
        failure.trap(AbortedJobError)

    def _log_finished_job(self, success=True):
        status = "completed" if success else "failed"
        runtime = datetime.timedelta(seconds=self.get_runtime())
        next_time = self.get_time_to_next_run()
        if next_time is not None:
            if next_time <= 0:
                delta = "right now"
            else:
                delta = "in %s" % datetime.timedelta(seconds=next_time)
            self._logger.info("%s for %s %s in %s. next run %s.",
                              self.job.name, self.netbox.sysname, status,
                              runtime, delta)
        else:
            self._logger.info("%s in %s. no next run scheduled", status,
                              runtime)

    def get_runtime(self):
        """Returns the number of seconds passed since the start of last job"""
        return time.time() - self._last_job_started_at

    def get_time_to_next_run(self):
        """Returns the number of seconds until the next job starts"""
        if self._next_call.active():
            return self._next_call.getTime() - time.time()

    def reschedule(self, delay):
        """Reschedules the next run of of this job"""
        if self.cancelled:
            self._logger.debug("ignoring request to reschedule cancelled job")
            return

        next_time = datetime.datetime.now() + datetime.timedelta(seconds=delay)

        self._logger.debug("Next %r job for %s will be in %d seconds (%s)",
                           self.job.name, self.netbox.sysname, delay,
                           next_time)

        if self._next_call.active():
            self._next_call.reset(delay)
        else:
            self._next_call = self.callLater(delay, self.run_job)

    def _log_unhandled_error(self, failure):
        if not failure.check(db.ResetDBConnectionError):
            log_unhandled_failure(self._logger, failure,
                                  "Unhandled exception raised by JobHandler")

    def _unregister_handler(self, result):
        """Remove a JobHandler from internal data structures."""
        if self.running:
            self.uncount_job()
            self.unqueue_next_job()
            self.unqueue_next_global_job()
        return result

    def count_job(self):
        current_count = self.__class__.job_counters.get(self.job.name, 0)
        current_count += 1
        self.__class__.job_counters[self.job.name] = current_count
        self.running = True

    def uncount_job(self):
        current_count = self.__class__.job_counters.get(self.job.name, 0)
        current_count -= 1
        self.__class__.job_counters[self.job.name] = max(current_count, 0)
        self.running = False
        self._current_job = None

    def get_job_count(self):
        return self.__class__.job_counters.get(self.job.name, 0)

    def is_job_limit_reached(self):
        "Returns True if the number of jobs >= the job intensity limit"
        return (self.job.intensity > 0
                and self.get_job_count() >= self.job.intensity)

    @classmethod
    def is_global_limit_reached(cls):
        "Returns True if the global number of jobs >= global intensity limit"
        return cls.get_global_job_count() >= cls.global_intensity

    @classmethod
    def get_global_job_count(cls):
        if cls.job_counters:
            return sum(cls.job_counters.values())
        else:
            return 0

    def queue_myself(self, queue):
        queue.append(self)

    def unqueue_next_job(self):
        "Unqueues the next waiting job"
        queue = self.get_job_queue()
        if queue and not self.is_job_limit_reached():
            handler = queue.pop(0)
            return handler.start()

    @classmethod
    def unqueue_next_global_job(cls):
        "Unqueues the next job waiting because of the global intensity setting"
        if not cls.is_global_limit_reached():
            for index, handler in enumerate(cls.global_job_queue):
                if not handler.is_job_limit_reached():
                    del cls.global_job_queue[index]
                    return handler.start()

    def get_job_queue(self):
        if self.job.name not in self.job_queues:
            self.job_queues[self.job.name] = []
        return self.job_queues[self.job.name]
Ejemplo n.º 4
0
class JobScheduler(object):
    active_schedulers = set()
    job_logging_loop = None
    netbox_reload_interval = 2 * 60.0  # seconds
    netbox_reload_loop = None
    _logger = ipdevpoll.ContextLogger()

    def __init__(self, job, pool):
        """Initializes a job schedule from the job descriptor."""
        self._log_context = dict(job=job.name)
        self.job = job
        self.pool = pool
        self.netboxes = NetboxLoader()
        self.active_netboxes = {}

        self.active_schedulers.add(self)

    @classmethod
    def initialize_from_config_and_run(cls, pool, onlyjob=None):
        descriptors = config.get_jobs()
        schedulers = [
            JobScheduler(d, pool) for d in descriptors
            if not onlyjob or (d.name == onlyjob)
        ]
        for scheduler in schedulers:
            scheduler.run()

    def run(self):
        """Initiate scheduling of this job."""
        signals.netbox_type_changed.connect(self.on_netbox_type_changed)
        self._setup_active_job_logging()
        self._start_netbox_reload_loop()

    def _start_netbox_reload_loop(self):
        if not self.netbox_reload_loop:
            self.netbox_reload_loop = task.LoopingCall(self._reload_netboxes)
        if self.netbox_reload_loop.running:
            self.netbox_reload_loop.stop()
        self.netbox_reload_loop.start(interval=self.netbox_reload_interval,
                                      now=True)

    def on_netbox_type_changed(self, netbox_id, new_type, **_kwargs):
        """Performs various cleanup and reload actions on a netbox type change
        signal.

        The netbox' data are cleaned up, and the next netbox data reload is
        scheduled to take place immediately.

        """
        sysname = netbox_id in self.netboxes and \
            self.netboxes[netbox_id].sysname or str(netbox_id)
        self._logger.info("Cancelling all jobs for %s due to type change.",
                          sysname)
        self.cancel_netbox_scheduler(netbox_id)

        df = db.run_in_thread(shadows.Netbox.cleanup_replaced_netbox,
                              netbox_id, new_type)
        return df.addCallback(lambda x: self._start_netbox_reload_loop())

    def _setup_active_job_logging(self):
        if self.__class__.job_logging_loop is None:
            loop = task.LoopingCall(self.__class__.log_active_jobs)
            self.__class__.job_logging_loop = loop
            loop.start(interval=5 * 60.0, now=False)

    def _reload_netboxes(self):
        """Reload the set of netboxes to poll and update schedules."""
        deferred = self.netboxes.load_all()
        deferred.addCallbacks(self._process_reloaded_netboxes,
                              self._handle_reload_failures)
        db.django_debug_cleanup()
        return deferred

    def _process_reloaded_netboxes(self, result):
        """Process the result of a netbox reload and update schedules."""
        (new_ids, removed_ids, changed_ids) = result

        # Deschedule removed and changed boxes
        for netbox_id in removed_ids.union(changed_ids):
            self.cancel_netbox_scheduler(netbox_id)

        # Schedule new and changed boxes
        def _lastupdated(netboxid):
            return self.netboxes[netboxid].last_updated.get(
                self.job.name, datetime.datetime.min)

        new_and_changed = sorted(new_ids.union(changed_ids), key=_lastupdated)
        for netbox_id in new_and_changed:
            self.add_netbox_scheduler(netbox_id)

    def _handle_reload_failures(self, failure):
        failure.trap(db.ResetDBConnectionError)
        self._logger.error("Reloading the IP device list failed because the "
                           "database connection was reset")

    def add_netbox_scheduler(self, netbox_id):
        netbox = self.netboxes[netbox_id]
        scheduler = NetboxJobScheduler(self.job, netbox, self.pool)
        self.active_netboxes[netbox_id] = scheduler
        return scheduler.start()

    def cancel_netbox_scheduler(self, netbox_id):
        if netbox_id not in self.active_netboxes:
            return
        scheduler = self.active_netboxes[netbox_id]
        scheduler.cancel()
        del self.active_netboxes[netbox_id]

    @classmethod
    def reload(cls):
        """Reload netboxes for all jobs"""
        for scheduler in cls.active_schedulers:
            scheduler._reload_netboxes()

    @classmethod
    def log_active_jobs(cls, level=logging.DEBUG):
        """Debug logs a list of running job handlers.

        The handlers will be sorted by descending runtime.

        """
        jobs = [(netbox_scheduler.netbox.sysname, netbox_scheduler.job.name,
                 netbox_scheduler.get_current_runtime())
                for scheduler in cls.active_schedulers
                for netbox_scheduler in scheduler.active_netboxes.values()
                if netbox_scheduler.is_running()]
        jobs.sort(key=itemgetter(2), reverse=True)
        table_formatter = SimpleTableFormatter(jobs)

        logger = logging.getLogger("%s.joblist" % __name__)
        if jobs:
            logger.log(level, "currently active jobs (%d):\n%s", len(jobs),
                       table_formatter)
        else:
            logger.log(level, "no active jobs (%d JobHandlers)",
                       JobHandler.get_instance_count())
Ejemplo n.º 5
0
class NetboxLoader(dict):
    """Loads netboxes from the database, synchronously or asynchronously.

    Access as a dictionary to retrieve information about the loaded
    netboxes.  The dictionary keys are netbox table primary keys, the
    values are shadows.Netbox objects.

    """
    _logger = ipdevpoll.ContextLogger()

    def __init__(self):
        super(NetboxLoader, self).__init__()
        self.peak_count = 0
        # touch _logger to initialize logging context right away
        # pylint: disable=W0104
        self._logger

    @autocommit
    def load_all_s(self):
        """Synchronously load netboxes from database.

        Returns:

          A three-tuple, (new_ids, lost_ids, changed_ids), whose elements are
          sets of netbox IDs.

          - The first set are IDs that a new since the last load operation.

          - The second is the set of IDs that have been removed since the last
            load operation.

          - The third is the set of IDs of netboxes whose information have
            changed in the database since the last load operation.

        """
        related = ('room__location', 'type__vendor', 'category',
                   'organization', 'device')
        snmp_up_query = """SELECT COUNT(*) = 0
                           FROM alerthist
                           WHERE alerthist.netboxid = netbox.netboxid
                             AND eventtypeid='snmpAgentState'
                             AND end_time >= 'infinity' """
        queryset = (manage.Netbox.objects.select_related(*related).extra(
            select={'snmp_up': snmp_up_query}))
        netbox_list = storage.shadowify_queryset(queryset)
        netbox_dict = dict((netbox.id, netbox) for netbox in netbox_list)

        times = load_last_updated_times()
        for netbox in netbox_list:
            netbox.last_updated = times.get(netbox.id, {})

        django_debug_cleanup()

        previous_ids = set(self.keys())
        current_ids = set(netbox_dict.keys())
        lost_ids = previous_ids.difference(current_ids)
        new_ids = current_ids.difference(previous_ids)

        same_ids = previous_ids.intersection(current_ids)
        changed_ids = set(i for i in same_ids
                          if is_netbox_changed(self[i], netbox_dict[i]))

        # update self
        for i in lost_ids:
            del self[i]
        for i in new_ids:
            self[i] = netbox_dict[i]
        for i in same_ids:
            self[i].copy(netbox_dict[i])

        self.peak_count = max(self.peak_count, len(self))

        anything_changed = len(new_ids) or len(lost_ids) or len(changed_ids)
        log = self._logger.info if anything_changed else self._logger.debug

        log(
            "Loaded %d netboxes from database "
            "(%d new, %d removed, %d changed, %d peak)", len(netbox_dict),
            len(new_ids), len(lost_ids), len(changed_ids), self.peak_count)

        return (new_ids, lost_ids, changed_ids)

    def load_all(self):
        """Asynchronously load netboxes from database."""
        return run_in_thread(self.load_all_s)
Ejemplo n.º 6
0
class NetboxLoader(dict):
    """Loads netboxes from the database, synchronously or asynchronously.

    Access as a dictionary to retrieve information about the loaded
    netboxes.  The dictionary keys are netbox table primary keys, the
    values are shadows.Netbox objects.

    """
    _logger = ipdevpoll.ContextLogger()

    def __init__(self):
        super(NetboxLoader, self).__init__()
        self.peak_count = 0
        # touch _logger to initialize logging context right away
        # pylint: disable=W0104
        self._logger

    def load_all_s(self):
        """Synchronously load netboxes from database.

        Returns:

          A three-tuple, (new_ids, lost_ids, changed_ids), whose elements are
          sets of netbox IDs.

          - The first set are IDs that a new since the last load operation.

          - The second is the set of IDs that have been removed since the last
            load operation.

          - The third is the set of IDs of netboxes whose information have
            changed in the database since the last load operation.

        """
        related = ('room__location', 'type__vendor', 'category',
                   'organization')
        snmp_down = set(
            event.AlertHistory.objects.unresolved(
                'snmpAgentState').values_list('netbox__id', flat=True))
        self._logger.debug("These netboxes have active snmpAgentStates: %r",
                           snmp_down)
        queryset = manage.Netbox.objects.filter(deleted_at__isnull=True)

        filter_groups_included = get_netbox_filter('groups_included')
        if filter_groups_included:
            queryset = queryset.filter(groups__id__in=filter_groups_included)

        filter_groups_excluded = get_netbox_filter('groups_excluded')
        if filter_groups_excluded:
            queryset = queryset.exclude(groups__id__in=filter_groups_excluded)

        queryset = list(queryset.select_related(*related))
        for netbox in queryset:
            netbox.snmp_up = netbox.id not in snmp_down
        netbox_list = storage.shadowify_queryset(queryset)
        netbox_dict = dict((netbox.id, netbox) for netbox in netbox_list)

        times = load_last_updated_times()
        for netbox in netbox_list:
            netbox.last_updated = times.get(netbox.id, {})

        django_debug_cleanup()

        previous_ids = set(self.keys())
        current_ids = set(netbox_dict.keys())
        lost_ids = previous_ids.difference(current_ids)
        new_ids = current_ids.difference(previous_ids)

        same_ids = previous_ids.intersection(current_ids)
        changed_ids = set(i for i in same_ids
                          if is_netbox_changed(self[i], netbox_dict[i]))

        # update self
        for i in lost_ids:
            del self[i]
        for i in new_ids:
            self[i] = netbox_dict[i]
        for i in same_ids:
            self[i].copy(netbox_dict[i])

        self.peak_count = max(self.peak_count, len(self))

        anything_changed = len(new_ids) or len(lost_ids) or len(changed_ids)
        log = self._logger.info if anything_changed else self._logger.debug

        log(
            "Loaded %d netboxes from database "
            "(%d new, %d removed, %d changed, %d peak)", len(netbox_dict),
            len(new_ids), len(lost_ids), len(changed_ids), self.peak_count)

        return (new_ids, lost_ids, changed_ids)

    def load_all(self):
        """Asynchronously load netboxes from database."""
        return run_in_thread(self.load_all_s)