Esempio n. 1
0
class JobHandler(object):
    """Handles a single polling job against a single netbox.

    An instance of this class performs a polling job, as described by
    a job specification in the config file, for a single netbox.  It
    will handle the dispatch of polling plugins, and contain state
    information for the job.

    """
    _logger = ContextLogger()
    _queue_logger = ContextLogger(suffix='queue')
    _timing_logger = ContextLogger(suffix='timings')
    _start_time = datetime.datetime.min

    def __init__(self, name, netbox, plugins=None):
        self.name = name
        self.netbox = netbox
        self.cancelled = threading.Event()

        self.plugins = plugins or []
        self._logger.debug("Job %r initialized with plugins: %r", self.name,
                           self.plugins)
        self.containers = storage.ContainerRepository()
        self.storage_queue = []

        # Initialize netbox in container
        nb = self.container_factory(shadows.Netbox,
                                    key=None,
                                    id=netbox.id,
                                    sysname=netbox.sysname)

        self.agent = None

    def _create_agentproxy(self):
        if self.agent:
            self._destroy_agentproxy()

        if not self.netbox.read_only:
            self.agent = None
            return

        port = ports.next()
        self.agent = AgentProxy(self.netbox.ip,
                                161,
                                community=self.netbox.read_only,
                                snmpVersion='v%s' % self.netbox.snmp_version,
                                protocol=port.protocol,
                                snmp_parameters=snmp_parameter_factory(
                                    self.netbox))
        try:
            self.agent.open()
        except SnmpError, error:
            self.agent.close()
            session_count = self.agent.count_open_sessions()
            job_count = self.get_instance_count()
            self._logger.error(
                "%s (%d currently open SNMP sessions, %d job handlers)", error,
                session_count, job_count)
            raise AbortedJobError("Cannot open SNMP session", cause=error)
        else:
Esempio n. 2
0
class ProcessAMP(amp.AMP):
    """Modify AMP protocol to allow running over process pipes"""

    _logger = ContextLogger()

    def __init__(self, is_worker, **kwargs):
        super(ProcessAMP, self).__init__(**kwargs)
        self.is_worker = is_worker
        self.lost_handler = None

    def makeConnection(self, transport):
        if not hasattr(transport, 'getPeer'):
            setattr(transport, 'getPeer', lambda: "peer")
        if not hasattr(transport, 'getHost'):
            setattr(transport, 'getHost', lambda: "host")
        super(ProcessAMP, self).makeConnection(transport)

    def connectionLost(self, reason):
        super(ProcessAMP, self).connectionLost(reason)
        if self.is_worker:
            if reactor.running:
                reactor.stop()
        else:
            if self.lost_handler:
                self.lost_handler(self, reason)
Esempio n. 3
0
class Worker(object):
    """This class holds information about one worker process as seen from
    the worker pool"""

    _logger = ContextLogger()

    def __init__(self, pool, threadpoolsize, max_jobs):
        self.active_jobs = 0
        self.total_jobs = 0
        self.max_concurrent_jobs = 0
        self.pool = pool
        self.threadpoolsize = threadpoolsize
        self.max_jobs = max_jobs

    @inlineCallbacks
    def start(self):
        args = [control.get_process_command(), '--worker', '-f', '-s', '-P']
        if self.threadpoolsize:
            args.append('--threadpoolsize=%d' % self.threadpoolsize)
        endpoint = ProcessEndpoint(reactor, control.get_process_command(),
                                   args, os.environ)
        factory = protocol.Factory()
        factory.protocol = lambda: ProcessAMP(is_worker=False,
                                              locator=JobHandler())
        self.process = yield endpoint.connect(factory)
        self.process.lost_handler = self._worker_died
        returnValue(self)

    def done(self):
        return self.max_jobs and (self.total_jobs >= self.max_jobs)

    def _worker_died(self, worker, reason):
        if not self.done():
            self._logger.warning("Lost worker {worker} with {jobs} "
                                 "active jobs".format(
                                     worker=worker,
                                     jobs=self.active_jobs))
        elif self.active_jobs:
            self._logger.warning("Worker {worker} exited with {jobs} "
                                 "active jobs".format(
                                     worker=worker,
                                     jobs=self.active_jobs))
        else:
            self._logger.debug("Worker {worker} exited normally"
                               .format(worker=worker))
        self.pool.worker_died(self)

    def execute(self, serial, command, **kwargs):
        self.active_jobs += 1
        self.total_jobs += 1
        self.max_concurrent_jobs = max(self.active_jobs,
                                       self.max_concurrent_jobs)
        deferred = self.process.callRemote(command, serial=serial, **kwargs)
        if self.done():
            self.process.callRemote(Shutdown)
        return deferred

    def cancel(self, serial):
        return self.process.callRemote(Cancel, serial=serial)
Esempio n. 4
0
class JobHandler(amp.CommandLocator):
    """Resolve actions for jobs received over AMP"""

    _logger = ContextLogger()

    def __init__(self):
        super(JobHandler, self).__init__()
        self.jobs = dict()
        self.done = False

    def job_done(self, result, serial):
        if serial in self.jobs:
            del self.jobs[serial]
        if self.done and not self.jobs:
            reactor.callLater(3, reactor.stop)
        return result

    @Job.responder
    def execute_job(self, netbox, job, plugins, interval, serial):
        self._logger.debug(
            "Process {pid} received job {job} for"
            " netbox {netbox}"
            " with plugins {plugins}".format(pid=os.getpid(),
                                             job=job,
                                             netbox=netbox,
                                             plugins=",".join(plugins)), )
        job = jobs.JobHandler(job, netbox, plugins, interval)
        self.jobs[serial] = job
        deferred = job.run()
        deferred.addBoth(self.job_done, serial)
        deferred.addCallback(lambda x: {'result': x, 'reschedule': 0})

        def handle_reschedule(failure):
            failure.trap(jobs.SuggestedReschedule)
            return {'reschedule': failure.value.delay, "result": False}

        deferred.addErrback(handle_reschedule)
        return deferred

    @Cancel.responder
    def cancel(self, serial):
        if serial in self.jobs:
            self.jobs[serial].cancel()
        return {}

    @Shutdown.responder
    def shutdown(self):
        self.done = True
        return {}

    def log_jobs(self):
        self._logger.info("Got {jobs} active jobs".format(jobs=len(self.jobs)))
        for job in self.jobs.values():
            self._logger.info("{job} {netbox} {plugins}".format(
                job=job.name,
                netbox=job.netbox,
                plugins=", ".join(job.plugins)))
Esempio n. 5
0
class JobHandler(object):
    """Handles a single polling job against a single netbox.

    An instance of this class performs a polling job, as described by
    a job specification in the config file, for a single netbox.  It
    will handle the dispatch of polling plugins, and contain state
    information for the job.

    """
    _logger = ContextLogger()
    _queue_logger = ContextLogger(suffix='queue')
    _timing_logger = ContextLogger(suffix='timings')
    _start_time = datetime.datetime.min

    def __init__(self, name, netbox, plugins=None, interval=None):
        self.name = name
        self.netbox_id = netbox
        self.netbox = None
        self.cancelled = threading.Event()
        self.interval = interval

        self.plugins = plugins or []
        self._log_context = {}
        self.containers = storage.ContainerRepository()
        self.storage_queue = []

        self.agent = None

    def _create_agentproxy(self):
        if self.agent:
            self._destroy_agentproxy()

        if not self.netbox.read_only:
            self.agent = None
            return

        port = next(ports)
        self.agent = AgentProxy(self.netbox.ip,
                                161,
                                community=self.netbox.read_only,
                                snmpVersion='v%s' % self.netbox.snmp_version,
                                protocol=port.protocol,
                                snmp_parameters=snmp_parameter_factory(
                                    self.netbox))
        try:
            self.agent.open()
        except SnmpError as error:
            self.agent.close()
            session_count = self.agent.count_open_sessions()
            job_count = self.get_instance_count()
            self._logger.error(
                "%s (%d currently open SNMP sessions, %d job handlers)", error,
                session_count, job_count)
            raise AbortedJobError("Cannot open SNMP session", cause=error)
        else:
            self._logger.debug("AgentProxy created for %s: %s",
                               self.netbox.sysname, self.agent)

    def _destroy_agentproxy(self):
        if self.agent:
            self.agent.close()
        self.agent = None

    @defer.inlineCallbacks
    def _find_plugins(self):
        """Populate the internal plugin list with plugin class instances."""
        from nav.ipdevpoll.config import ipdevpoll_conf

        plugin_classes = [
            plugin_registry[name] for name in self._get_valid_plugins()
        ]
        willing_plugins = yield self._get_willing_plugins(plugin_classes)

        plugins = [
            cls(self.netbox,
                agent=self.agent,
                containers=self.containers,
                config=ipdevpoll_conf) for cls in willing_plugins
        ]

        if not plugins:
            defer.returnValue(None)

        defer.returnValue(plugins)

    def _get_valid_plugins(self):
        valid_plugins, invalid_plugins = splitby(
            lambda name: name in plugin_registry, self.plugins)
        if list(invalid_plugins):
            self._logger.error(
                "Non-existent plugins were configured for job "
                "%r (ignoring them): %r", self.name, list(invalid_plugins))
        return valid_plugins

    @defer.inlineCallbacks
    def _get_willing_plugins(self, plugin_classes):
        willing_plugins = []
        unwilling_plugins = []
        for cls in plugin_classes:
            try:
                can_handle = yield defer.maybeDeferred(cls.can_handle,
                                                       self.netbox)
            except db.ResetDBConnectionError:
                raise
            except Exception:
                self._logger.exception(
                    "Unhandled exception from can_handle(): %r", cls)
                can_handle = False
            if can_handle:
                willing_plugins.append(cls)
            else:
                unwilling_plugins.append(cls)

        for (willingness, plugins) in [('unwilling', unwilling_plugins),
                                       ('willing', willing_plugins)]:
            if plugins:
                self._logger.debug("%s plugins: %r", willingness,
                                   [cls.__name__ for cls in plugins])
            else:
                self._logger.debug("no %s plugins", willingness)

        defer.returnValue(willing_plugins)

    def _iterate_plugins(self, plugins):
        """Iterates plugins."""
        plugins = iter(plugins)

        def log_plugin_failure(failure, plugin_instance):
            if failure.check(TimeoutError, defer.TimeoutError):
                self._logger.debug("Plugin %s reported a timeout",
                                   plugin_instance.alias,
                                   exc_info=True)
                raise AbortedJobError("Plugin %s reported a timeout" %
                                      plugin_instance.alias)
            elif failure.check(SuggestedReschedule):
                self._logger.debug(
                    "Plugin %s suggested a reschedule in "
                    "%d seconds", plugin_instance, failure.value.delay)
            elif failure.check(db.ResetDBConnectionError):
                pass
            else:
                log_unhandled_failure(
                    self._logger, failure,
                    "Plugin %s reported an unhandled failure", plugin_instance)
            return failure

        def next_plugin(result=None):
            self._raise_if_cancelled()
            try:
                plugin_instance = next(plugins)
            except StopIteration:
                return result

            self._logger.debug("Now calling plugin: %s", plugin_instance)
            self._start_plugin_timer(plugin_instance)

            df = defer.maybeDeferred(plugin_instance.handle)
            df.addErrback(self._stop_plugin_timer)
            df.addErrback(log_plugin_failure, plugin_instance)
            df.addCallback(self._stop_plugin_timer)
            df.addCallback(next_plugin)
            return df

        return next_plugin()

    @defer.inlineCallbacks
    def run(self):
        """Starts a polling job for netbox.

        :returns: A Deferred, whose result will be True when the job did
                  something, or False when the job did nothing (i.e. no
                  plugins ran).

        """
        self.netbox = yield db.run_in_thread(dataloader.load_netbox,
                                             self.netbox_id)
        self._log_context.update(
            dict(job=self.name, sysname=self.netbox.sysname))
        self._logger.debug("Job %r started with plugins: %r", self.name,
                           self.plugins)
        # Initialize netbox in container
        nb = self._container_factory(shadows.Netbox,
                                     key=None,
                                     id=self.netbox.id,
                                     sysname=self.netbox.sysname)

        self._create_agentproxy()
        plugins = yield self._find_plugins()
        self._reset_timers()
        if not plugins:
            self._destroy_agentproxy()
            defer.returnValue(False)

        self._logger.debug("Starting job %r for %s", self.name,
                           self.netbox.sysname)

        def wrap_up_job(_result):
            self._logger.debug("Job %s for %s done.", self.name,
                               self.netbox.sysname)
            self._log_timings()
            return True

        def plugin_failure(failure):
            self._log_timings()
            if not failure.check(AbortedJobError):
                raise AbortedJobError("Job aborted due to plugin failure",
                                      cause=failure.value)
            return failure

        def save_failure(failure):
            if not failure.check(db.ResetDBConnectionError):
                log_unhandled_failure(
                    self._logger, failure,
                    "Save stage failed with unhandled error")
            self._log_timings()
            raise AbortedJobError("Job aborted due to save failure",
                                  cause=failure.value)

        def log_abort(failure):
            if failure.check(SuggestedReschedule):
                return failure
            if failure.check(AbortedJobError):
                self._logger.error("Job %r for %s aborted: %s", self.name,
                                   self.netbox.sysname, failure.value)
            return failure

        def save(result):
            if self.cancelled.isSet():
                return wrap_up_job(result)

            df = self._save_container()
            df.addErrback(save_failure)
            df.addCallback(wrap_up_job)
            return df

        # pylint is unable to find reactor members:
        # pylint: disable=E1101
        shutdown_trigger_id = reactor.addSystemEventTrigger(
            "before", "shutdown", self.cancel)

        def cleanup(result):
            self._destroy_agentproxy()
            reactor.removeSystemEventTrigger(shutdown_trigger_id)
            return result

        def log_externally_success(result):
            self._log_job_externally(True if result else None)
            return result

        def log_externally_failure(result):
            self._log_job_externally(False)
            return result

        # The action begins here
        df = self._iterate_plugins(plugins)
        df.addErrback(plugin_failure)
        df.addCallback(save)
        df.addErrback(log_abort)
        df.addBoth(cleanup)
        df.addCallbacks(log_externally_success, log_externally_failure)
        yield df
        defer.returnValue(True)

    def cancel(self):
        """Cancels a running job.

        Job stops at the earliest convenience.
        """
        self.cancelled.set()
        self._logger.info("Cancelling running job")

    def _reset_timers(self):
        self._start_time = datetime.datetime.now()
        self._plugin_times = []

    def _start_plugin_timer(self, plugin):
        now = datetime.datetime.now()
        timings = [plugin.__class__.__name__, now, now]
        self._plugin_times.append(timings)

    def _stop_plugin_timer(self, result=None):
        timings = self._plugin_times[-1]
        timings[-1] = datetime.datetime.now()
        return result

    def _log_timings(self):
        stop_time = datetime.datetime.now()
        job_total = stop_time - self._start_time

        times = [(plugin, stop - start)
                 for (plugin, start, stop) in self._plugin_times]
        plugin_total = sum((i[1] for i in times), datetime.timedelta(0))

        times.append(("Plugin total", plugin_total))
        times.append(("Job total", job_total))
        times.append(("Job overhead", job_total - plugin_total))

        log_text = []
        longest_label = max(len(i[0]) for i in times)
        format = "%%-%ds: %%s" % longest_label

        for plugin, delta in times:
            log_text.append(format % (plugin, delta))

        dashes = "-" * max(len(i) for i in log_text)
        log_text.insert(-3, dashes)
        log_text.insert(-2, dashes)

        log_text.insert(
            0, "Job %r timings for %s:" % (self.name, self.netbox.sysname))

        self._timing_logger.debug("\n".join(log_text))

    def get_current_runtime(self):
        """Returns time elapsed since the start of the job as a timedelta."""
        return datetime.datetime.now() - self._start_time

    def _save_container(self):
        """
        Parses the container and finds a sane storage order. We do this
        so we get ForeignKeys stored before the objects that are using them
        are stored.
        """
        @db.cleanup_django_debug_after
        def complete_save_cycle():
            # Traverse all the classes in the container repository and
            # generate the storage queue
            self._populate_storage_queue()
            # Prepare all shadow objects for storage.
            self._prepare_containers_for_save()
            # Actually save to the database
            result = self._perform_save()
            self._log_timed_result(result, "Storing to database complete")
            # Do cleanup for the known container classes.
            self._cleanup_containers_after_save()

        df = db.run_in_thread(complete_save_cycle)
        return df

    def _prepare_containers_for_save(self):
        """Runs every queued manager's prepare routine"""
        for manager in self.storage_queue:
            self._raise_if_cancelled()
            manager.prepare()

    def _cleanup_containers_after_save(self):
        """Runs every queued manager's cleanup routine"""
        self._logger.debug("Running cleanup routines for %d managers: %r",
                           len(self.storage_queue), self.storage_queue)
        try:
            for manager in self.storage_queue:
                self._raise_if_cancelled()
                manager.cleanup()
        except AbortedJobError:
            raise
        except Exception:
            self._logger.exception(
                "Caught exception during cleanup. "
                "Last manager = %r", manager)
            import django.db
            if django.db.connection.queries:
                self._logger.error("The last query was: %s",
                                   django.db.connection.queries[-1])
            raise

    def _log_timed_result(self, res, msg):
        self._logger.debug(msg + " (%0.3f ms)" % res)

    def _perform_save(self):
        start_time = time.time()
        manager = None
        try:
            self._log_containers("containers before save")

            for manager in self.storage_queue:
                self._raise_if_cancelled()
                manager.save()

            end_time = time.time()
            total_time = (end_time - start_time) * 1000.0

            self._log_containers("containers after save")

            return total_time
        except AbortedJobError:
            raise
        except Exception:
            self._logger.exception(
                "Caught exception during save. "
                "Last manager = %s. Last model = %s", manager,
                getattr(manager, 'cls', None))
            import django.db
            if django.db.connection.queries:
                self._logger.error("The last query was: %s",
                                   django.db.connection.queries[-1])
            raise

    def _log_containers(self, prefix=None):
        log = self._queue_logger
        if not log.isEnabledFor(logging.DEBUG):
            return
        log.debug("%s%s", prefix and "%s: " % prefix,
                  pprint.pformat(dict(self.containers)))

    def _populate_storage_queue(self):
        """Naive population of the storage queue.

        Assuming there are no inter-dependencies between instances of a single
        shadow class, the only relevant ordering is the one between the
        container types themselves.  This method will only order instances
        according to the dependency (topological) order of their classes.

        """
        for shadow_class in self.containers.sortedkeys():
            manager = shadow_class.manager(shadow_class, self.containers)
            self.storage_queue.append(manager)

    def _container_factory(self, container_class, key, *args, **kwargs):
        """Container factory function"""
        return self.containers.factory(key, container_class, *args, **kwargs)

    def _raise_if_cancelled(self):
        """Raises an AbortedJobError if the current job is cancelled"""
        if self.cancelled.isSet():
            raise AbortedJobError("Job was already cancelled")

    @classmethod
    def get_instance_count(cls):
        """Returns the number of JobHandler instances as seen by the garbage
        collector.

        """
        return len([o for o in gc.get_objects() if isinstance(o, cls)])

    # pylint: disable=W0703
    @defer.inlineCallbacks
    def _log_job_externally(self, success=True):
        """Logs a job to the database"""
        duration = self.get_current_runtime()
        duration_in_seconds = (duration.days * 86400 + duration.seconds +
                               duration.microseconds / 1e6)
        timestamp = time.time()

        def _create_record(timestamp):
            netbox = manage.Netbox.objects.get(id=self.netbox.id)
            if netbox.deleted_at:
                _logger.info(
                    "Not logging job to db; delete of this IP device"
                    " was requested at %s", netbox.deleted_at)
                return

            log = manage.IpdevpollJobLog(
                netbox_id=self.netbox.id,
                job_name=self.name,
                end_time=datetime.datetime.fromtimestamp(timestamp),
                duration=duration_in_seconds,
                success=success,
                interval=self.interval)
            log.save()

        def _log_to_graphite():
            prefix = metric_prefix_for_ipdevpoll_job(self.netbox.sysname,
                                                     self.name)
            runtime_path = prefix + ".runtime"
            runtime = (runtime_path, (timestamp, duration_in_seconds))
            send_metrics([runtime])

        _log_to_graphite()
        try:
            yield db.run_in_thread(_create_record, timestamp)
        except db.ResetDBConnectionError:
            pass  # this is being logged all over the place at the moment
        except Exception as error:
            _logger.warning("failed to log job to database: %s", error)
Esempio n. 6
0
class MibRetriever(object):
    """Base class for functioning MIB retriever classes."""
    mib = None
    __metaclass__ = MibRetrieverMaker
    _logger = ContextLogger()

    def __init__(self, agent_proxy):
        """Create a new instance tied to an AgentProxy instance."""
        super(MibRetriever, self).__init__()
        self.agent_proxy = agent_proxy
        # touch _logger to initialize logging context right away
        # pylint: disable=W0104
        self._logger

    @defer.inlineCallbacks
    def get_next(self, object_name):
        oid = self.nodes[object_name].oid
        result = yield self.agent_proxy.walk(str(oid))
        if hasattr(result, 'items'):
            result = result.items()
        for key, value in result:
            if oid.is_a_prefix_of(key):
                defer.returnValue(value)

    def retrieve_column(self, column_name):
        """Retrieve the contents of a single MIB table column.

        Returns a deferred whose result is a dictionary:

          { row_index: column_value }

        """
        node = self.nodes[column_name]
        if node.raw_mib_data['nodetype'] != 'column':
            self._logger.debug("%s is not a table column", column_name)

        def resultFormatter(result):
            formatted_result = {}
            # result keys may be OID objects/tuples or strings, depending on
            # snmp library used
            if node.oid not in result and str(node.oid) not in result:
                self._logger.debug(
                    "%s (%s) seems to be unsupported, result "
                    "keys were: %r", column_name, node.oid, result.keys())
                return {}
            varlist = result.get(node.oid, result.get(str(node.oid), None))

            for oid, value in varlist.items():
                # Extract index information from oid
                row_index = OID(oid).strip_prefix(node.oid)
                formatted_result[row_index] = value

            return formatted_result

        deferred = self.agent_proxy.getTable([str(node.oid)])
        deferred.addCallback(resultFormatter)
        return deferred

    def retrieve_columns(self, column_names):
        """Retrieve a set of table columns.

        The table columns may come from different tables, as long as
        the table rows are indexed the same way.

        Returns a deferred whose result is a dictionary:

          { row_index: MibTableResultRow instance }

        """
        def sortkey(col):
            return self.nodes[col].oid

        columns = iter(sorted(column_names, key=sortkey))

        final_result = {}
        my_deferred = defer.Deferred()

        def result_aggregate(result, column):
            for row_index, value in result.items():
                if row_index not in final_result:
                    final_result[row_index] = \
                        MibTableResultRow(row_index, column_names)
                final_result[row_index][column] = value
            return True

        # schedule the next iteration (i.e. collect next column)
        def schedule_next(result=None):
            try:
                column = columns.next()
            except StopIteration:
                my_deferred.callback(final_result)
                return
            deferred = self.retrieve_column(column)
            deferred.addCallback(result_aggregate, column)
            deferred.addCallback(schedule_next)
            deferred.addErrback(my_deferred.errback)

        reactor.callLater(0, schedule_next)
        return my_deferred

    def retrieve_table(self, table_name):
        """Table retriever and formatter.

        Retrieves an entire MIB table.  Returns a deferred whose
        result is a dictionary:

          { row_index: MibTableResultRow instance }

        Each dictionary key is a row index (an oid suffix tuple).  Each
        dictionary value is a MibTableResultRow instance, which can be accessed
        as both a dictionary and a list.

        """
        table = self.tables[table_name]

        def resultFormatter(result):
            formatted_result = {}
            for varlist in result.values():
                # Build a table structure
                for oid in sorted(varlist.keys()):
                    if not table.table.oid.is_a_prefix_of(oid):
                        raise MibRetrieverError(
                            "Received wrong response from client,"
                            "%s is not in %s" % (oid, table.table.oid))

                    # Extract table position of value
                    oid_suffix = OID(oid).strip_prefix(table.row.oid)
                    column_no = oid_suffix[0]
                    row_index = oid_suffix[1:]
                    if column_no not in table.reverse_column_index:
                        self._logger.warning(
                            "device response has bad table index %s in %s::%s, "
                            "ignoring", oid_suffix, self.mib['moduleName'],
                            table_name)
                        continue
                    column_name = table.reverse_column_index[column_no]

                    if row_index not in formatted_result:
                        formatted_result[row_index] = \
                            MibTableResultRow(row_index, table.columns.keys())
                    formatted_result[row_index][column_name] = varlist[oid]

            return formatted_result

        deferred = self.agent_proxy.getTable([str(table.table.oid)])
        deferred.addCallback(resultFormatter)
        return deferred

    @classmethod
    def translate_result(cls, result):
        """Translate result values to pythonic values according to object
        syntax.

        Given a table result from one of this object's retrievers,
        every column object will have it's to_python translation rules
        applied.  This is useful to insert into a callback chain for
        result formatting.

        """
        for row in result.values():
            for column in row.keys():
                if column in cls.nodes:
                    row[column] = cls.nodes[column].to_python(row[column])
        return result
Esempio n. 7
0
class WorkerPool(object):
    """This class represent a pool of worker processes to which jobs can
    be scheduled"""

    _logger = ContextLogger()

    def __init__(self, workers, max_jobs, threadpoolsize=None):
        twisted.internet.endpoints.log = HackLog
        self.workers = set()
        self.target_count = workers
        self.max_jobs = max_jobs
        self.threadpoolsize = threadpoolsize
        for i in range(self.target_count):
            self._spawn_worker()
        self.serial = 0
        self.jobs = dict()

    def worker_died(self, worker):
        self.workers.remove(worker)
        if not worker.done():
            self._spawn_worker()

    @inlineCallbacks
    def _spawn_worker(self):
        worker = yield Worker(self, self.threadpoolsize, self.max_jobs).start()
        self.workers.add(worker)

    def _cleanup(self, result, deferred):
        serial, worker = self.jobs[deferred]
        del self.jobs[deferred]
        worker.active_jobs -= 1
        return result

    def _execute(self, command, **kwargs):
        ready_workers = [w for w in self.workers if not w.done()]
        if not ready_workers:
            raise RuntimeError("No ready workers")
        worker = min(ready_workers, key=lambda x: x.active_jobs)
        self.serial += 1
        deferred = worker.execute(self.serial, command, **kwargs)
        if worker.done():
            self._spawn_worker()
        self.jobs[deferred] = (self.serial, worker)
        deferred.addBoth(self._cleanup, deferred)
        return deferred

    def cancel(self, deferred):
        if deferred not in self.jobs:
            self._logger.debug("Cancelling job that isn't known")
            return
        serial, worker = self.jobs[deferred]
        return worker.cancel(serial)

    def execute_job(self, job, netbox, plugins=None, interval=None):
        deferred = self._execute(Job,
                                 job=job,
                                 netbox=netbox,
                                 plugins=plugins,
                                 interval=interval)

        def handle_reschedule(result):
            reschedule = result.get('reschedule', 0)
            if reschedule:
                raise jobs.SuggestedReschedule(delay=reschedule)
            return result

        deferred.addCallback(handle_reschedule)
        deferred.addCallback(lambda x: x['result'])
        return deferred

    def log_summary(self):
        self._logger.info("{active} out of {target} workers running".format(
            active=len(self.workers), target=self.target_count))
        for worker in self.workers:
            self._logger.info(" - ready {ready} active {active}"
                              " max {max} total {total}".format(
                                  ready=not worker.done(),
                                  active=worker.active_jobs,
                                  max=worker.max_concurrent_jobs,
                                  total=worker.total_jobs))