Ejemplo n.º 1
0
    def test_pub_on_different_subtypes(self):
        ar = event.AsyncResult()
        gq = queue.Queue()
        self.count = 0

        def cb(event, *args, **kwargs):
            self.count += 1
            gq.put(event)
            if event.description == "end":
                ar.set()

        sub = EventSubscriber(event_type="ResourceModifiedEvent",
                              sub_type="st1",
                              callback=cb)
        sub.start()

        pub1 = EventPublisher(event_type="ResourceModifiedEvent")
        pub2 = EventPublisher(event_type="ContainerLifecycleEvent")

        pub1.publish_event(origin="two", sub_type="st2", description="2")
        pub2.publish_event(origin="three", sub_type="st1", description="3")
        pub1.publish_event(origin="one", sub_type="st1", description="1")
        pub1.publish_event(origin="four", sub_type="st1", description="end")

        ar.get(timeout=5)
        sub.stop()

        res = []
        for x in xrange(self.count):
            res.append(gq.get(timeout=5))

        self.assertEquals(len(res), 2)
        self.assertEquals(res[0].description, "1")
Ejemplo n.º 2
0
    def test_pub_on_different_subtypes(self):
        ar = event.AsyncResult()
        gq = queue.Queue()
        self.count = 0

        def cb(event, *args, **kwargs):
            self.count += 1
            gq.put(event)
            if event.description == "end":
                ar.set()

        sub = EventSubscriber(event_type="ResourceModifiedEvent", sub_type="st1", callback=cb)
        sub.start()

        pub1 = EventPublisher(event_type="ResourceModifiedEvent")
        pub2 = EventPublisher(event_type="ContainerLifecycleEvent")

        pub1.publish_event(origin="two", sub_type="st2", description="2")
        pub2.publish_event(origin="three", sub_type="st1", description="3")
        pub1.publish_event(origin="one", sub_type="st1", description="1")
        pub1.publish_event(origin="four", sub_type="st1", description="end")

        ar.get(timeout=5)
        sub.stop()

        res = []
        for x in xrange(self.count):
            res.append(gq.get(timeout=5))

        self.assertEquals(len(res), 2)
        self.assertEquals(res[0].description, "1")
Ejemplo n.º 3
0
class TransformPrime(TransformDataProcess):
    binding = ['output']
    '''
    Transforms which have an incoming stream and an outgoing stream.

    Parameters:
      process.stream_id      Outgoing stream identifier.
      process.exchange_point Route's exchange point.
      process.routing_key    Route's routing key.
      process.queue_name     Name of the queue to listen on.
      process.routes         streams,actor for each route {(stream_input_id, stream_output_id):actor} 
    Either the stream_id or both the exchange_point and routing_key need to be provided.
    '''
    def on_start(self):
        TransformDataProcess.on_start(self)
        self.pubsub_management = PubsubManagementServiceProcessClient(
            process=self)
        self.stored_values = StoredValueManager(self.container)
        self.input_data_product_ids = self.CFG.get_safe(
            'process.input_products', [])
        self.output_data_product_ids = self.CFG.get_safe(
            'process.output_products', [])
        self.lookup_docs = self.CFG.get_safe('process.lookup_docs', [])
        self.new_lookups = Queue()
        self.lookup_monitor = EventSubscriber(
            event_type=OT.ExternalReferencesUpdatedEvent,
            callback=self._add_lookups,
            auto_delete=True)
        self.lookup_monitor.start()

    def on_quit(self):
        self.lookup_monitor.stop()
        TransformDataProcess.on_quit(self)

    def _add_lookups(self, event, *args, **kwargs):
        if event.origin in self.input_data_product_ids + self.output_data_product_ids:
            if isinstance(event.reference_keys, list):
                self.new_lookups.put(event.reference_keys)

    @memoize_lru(100)
    def read_stream_def(self, stream_id):
        return self.pubsub_management.read_stream_definition(
            stream_id=stream_id)

    def recv_packet(self, msg, stream_route, stream_id):
        process_routes = self.CFG.get_safe('process.routes', {})
        for stream_in_id, routes in process_routes.iteritems():
            if stream_id == stream_in_id:
                for stream_out_id, actor in routes.iteritems():
                    if actor is None:
                        rdt_out = self._execute_transform(
                            msg, (stream_in_id, stream_out_id))
                        self.publish(rdt_out.to_granule(), stream_out_id)
                    else:
                        outgoing = self._execute_actor(
                            msg, actor, (stream_in_id, stream_out_id))
                        self.publish(outgoing, stream_out_id)

    def publish(self, msg, stream_out_id):
        publisher = getattr(self, stream_out_id)
        publisher.publish(msg)

    def _load_actor(self, actor):
        '''
        Returns callable execute method if it exists, otherwise it raises a BadRequest
        '''
        try:
            module = __import__(actor['module'], fromlist=[''])
        except ImportError:
            log.exception('Actor could not be loaded')
            raise
        try:
            cls = getattr(module, actor['class'])
        except AttributeError:
            log.exception('Module %s does not have class %s', repr(module),
                          actor['class'])
            raise
        try:
            execute = getattr(cls, 'execute')
        except AttributeError:
            log.exception('Actor class does not contain execute method')
            raise
        return execute

    def _execute_actor(self, msg, actor, streams):
        stream_in_id, stream_out_id = streams
        stream_def_out = self.read_stream_def(stream_out_id)
        params = self.CFG.get_safe('process.params', {})
        config = self.CFG.get_safe('process')
        #do the stuff with the actor
        params['stream_def'] = stream_def_out._id
        executor = self._load_actor(actor)
        try:
            rdt_out = executor(msg, None, config, params, None)
        except:
            log.exception('Error running actor for %s', self.id)
            raise
        return rdt_out

    def _merge_pdicts(self, pdict1, pdict2):
        incoming_pdict = ParameterDictionary.load(pdict1)
        outgoing_pdict = ParameterDictionary.load(pdict2)

        merged_pdict = ParameterDictionary()
        for k, v in incoming_pdict.iteritems():
            ordinal, v = v
            if k not in merged_pdict:
                merged_pdict.add_context(v)
        for k, v in outgoing_pdict.iteritems():
            ordinal, v = v
            if k not in merged_pdict:
                merged_pdict.add_context(v)
        return merged_pdict

    def _merge_rdt(self, stream_def_in, stream_def_out):
        incoming_pdict_dump = stream_def_in.parameter_dictionary
        outgoing_pdict_dump = stream_def_out.parameter_dictionary

        merged_pdict = self._merge_pdicts(incoming_pdict_dump,
                                          outgoing_pdict_dump)
        rdt_temp = RecordDictionaryTool(param_dictionary=merged_pdict)
        return rdt_temp

    def _get_lookup_value(self, lookup_value):
        if not self.new_lookups.empty():
            new_values = self.new_lookups.get()
            self.lookup_docs = new_values + self.lookup_docs

        lookup_value_document_keys = self.lookup_docs
        for key in lookup_value_document_keys:
            try:
                document = self.stored_values.read_value(key)
                if lookup_value in document:
                    return document[lookup_value]
            except NotFound:
                log.warning('Specified lookup document does not exist')

        return None

    def _execute_transform(self, msg, streams):
        stream_in_id, stream_out_id = streams
        stream_def_in = self.read_stream_def(stream_in_id)
        stream_def_out = self.read_stream_def(stream_out_id)

        rdt_temp = self._merge_rdt(stream_def_in, stream_def_out)

        rdt_in = RecordDictionaryTool.load_from_granule(msg)
        for field in rdt_temp.fields:
            if not isinstance(
                    rdt_temp._pdict.get_context(field).param_type,
                    ParameterFunctionType):
                try:
                    rdt_temp[field] = rdt_in[field]
                except KeyError:
                    pass

        rdt_temp.fetch_lookup_values()

        for lookup_field in rdt_temp.lookup_values():
            s = lookup_field
            stored_value = self._get_lookup_value(
                rdt_temp.context(s).lookup_value)
            if stored_value is not None:
                rdt_temp[s] = stored_value

        for field in rdt_temp.fields:
            if isinstance(
                    rdt_temp._pdict.get_context(field).param_type,
                    ParameterFunctionType):
                rdt_temp[field] = rdt_temp[field]

        rdt_out = RecordDictionaryTool(stream_definition_id=stream_def_out._id)

        for field in rdt_out.fields:
            rdt_out[field] = rdt_temp[field]

        return rdt_out
Ejemplo n.º 4
0
class ContainerManager(object):
    def __init__(self, container, handlers=DEFAULT_HANDLERS):
        self.container = container
        self.running = False
        # make sure start() completes before an event is handled,
        # and any event is either handled before stop() begins,
        # or the handler begins after stop() completes and the event is dropped
        self.lock = Lock()
        self.handlers = handlers[:]

    def start(self):
        self.container.stats_mgr = ContainerStatsManager(self.container)
        self.container.stats_mgr.start()

        ## create queue listener and publisher
        self.sender = EventPublisher(event_type="ContainerManagementResult")
        self.receiver = EventSubscriber(event_type="ContainerManagementRequest", callback=self._receive_event)
        with self.lock:
            self.running = True
            self.receiver.start()
        log.debug('Container ready for container management requests')

    def stop(self):
        log.debug('container management stopping')
        with self.lock:
            self.receiver.stop()
            self.sender.close()
            self.running = False
        log.debug('container management stopped')

        self.container.stats_mgr.stop()

    def add_handler(self, handler):
        self.handlers.append(handler)

    def _get_handlers(self, action):
        out = []
        for handler in self.handlers:
            if handler.can_handle_request(action):
                out.append(handler)
        return out

    def _receive_event(self, event, headers):
        with self.lock:
            if not isinstance(event, ContainerManagementRequest):
                log.trace('ignoring wrong type event: %r', event)
                return
            if not self.running:
                log.warn('ignoring admin message received after shutdown: %s', event.action)
                return
            predicate = ContainerSelector.from_object(event.predicate)
            if predicate.should_handle(self.container):
                log.trace('handling admin message: %s', event.action)
                self._perform_action(event.action)
            else:
                log.trace('ignoring admin action: %s', event.action)
                if SEND_RESULT_IF_NOT_SELECTED:
                    self.sender.publish_event(origin=self.container.id, action=event.action, outcome='not selected')
                    log.debug('received action: %s, outcome: not selected', event.action)

    def _perform_action(self, action):
        handlers = self._get_handlers(action)
        if not handlers:
            log.info('action accepted but no handlers found: %s', action)
            result = 'unhandled'
            self.sender.publish_event(origin=self.container.id, action=action, outcome=str(result))
            log.debug('received action: %s, outcome: %s', action, result)
        else:
            for handler in handlers:
                try:
                    result = handler.handle_request(action) or "completed"
                except Exception,e:
                    log.error("handler %r failed to perform action: %s", handler, action, exc_info=True)
                    result = e
                self.sender.publish_event(origin=self.container.id, action=action, outcome=str(result))
                log.debug('performed action: %s, outcome: %s', action, result)
Ejemplo n.º 5
0
class EventPersister(SimpleProcess):
    def on_init(self):
        # Time in between event persists
        self.persist_interval = float(
            self.CFG.get_safe("process.event_persister.persist_interval", 1.0))

        self.persist_blacklist = self.CFG.get_safe(
            "process.event_persister.persist_blacklist", {})

        self._event_type_blacklist = [
            entry['event_type'] for entry in self.persist_blacklist
            if entry.get('event_type', None) and len(entry) == 1
        ]
        self._complex_blacklist = [
            entry for entry in self.persist_blacklist
            if not (entry.get('event_type', None) and len(entry) == 1)
        ]
        if self._complex_blacklist:
            log.warn(
                "EventPersister does not yet support complex blacklist expressions: %s",
                self._complex_blacklist)

        # Holds received events FIFO in synchronized queue
        self.event_queue = Queue()

        # Temporarily holds list of events to persist while datastore operation are not yet completed
        # This is where events to persist will remain if datastore operation fails occasionally.
        self.events_to_persist = None

        # Number of unsuccessful consecutive attempts to persist during loop
        self.failure_count = 0

        # bookkeeping for greenlet
        self._persist_greenlet = None
        self._terminate_persist = Event(
        )  # when set, exits the persister greenlet

        # The event subscriber
        self.event_sub = None

        process_plugin_defs = self.CFG.get_safe(
            "process.event_persister.process_plugins", {}) or {}

        # Registered event process plugins
        self.process_plugins = {}
        for plugin_name, plugin_cls, plugin_args in process_plugin_defs:
            try:
                plugin = named_any(plugin_cls)(**plugin_args)
                self.process_plugins[plugin_name] = plugin
                log.info("Loaded event processing plugin %s (%s)", plugin_name,
                         plugin_cls)
            except Exception as ex:
                log.error(
                    "Cannot instantiate event processing plugin %s (%s): %s",
                    plugin_name, plugin_cls, ex)

    def on_start(self):
        # Persister thread
        self._persist_greenlet = spawn(self._persister_loop,
                                       self.persist_interval)
        log.debug(
            'EventPersister persist greenlet started in "%s" (interval %s)',
            self.__class__.__name__, self.persist_interval)

        # Event subscription
        self.event_sub = EventSubscriber(pattern=EventSubscriber.ALL_EVENTS,
                                         callback=self._on_event,
                                         queue_name="event_persister",
                                         auto_delete=False)

        self.event_sub.start()

    def on_quit(self):
        # Stop event subscriber
        self.event_sub.stop()

        # tell the trigger greenlet we're done
        self._terminate_persist.set()

        # wait on the greenlets to finish cleanly
        self._persist_greenlet.join(timeout=5)

        # Check if there are still unsaved events in the queue and persist them
        leftover_events = self.event_queue.qsize()
        if leftover_events:
            log.info(
                "Storing {} events during event_persister shutdown".format(
                    leftover_events))
            events_to_process = [
                self.event_queue.get() for x in xrange(leftover_events)
            ]
            events_to_persist = [
                x for x in events_to_process if not self._in_blacklist(x)
            ]
            try:
                self._persist_events(events_to_persist)
            except Exception:
                log.exception("Could not persist all events")

    def _on_event(self, event, *args, **kwargs):
        self.event_queue.put(event)

    def _in_blacklist(self, event):
        if event.type_ in self._event_type_blacklist:
            return True
        if event.base_types:
            for base_type in event.base_types:
                if base_type in self._event_type_blacklist:
                    return True
            # TODO: Complex event blacklist
        return False

    def _persister_loop(self, persist_interval):
        log.debug('Starting event persister thread with persist_interval=%s',
                  persist_interval)

        # Event.wait returns False on timeout (and True when set in on_quit), so we use this to both exit cleanly and do our timeout in a loop
        while not self._terminate_persist.wait(timeout=persist_interval):
            try:
                # leftover events_to_persist indicate previous attempt did not succeed
                if self.events_to_persist and self.failure_count > 2:
                    bad_events = []
                    log.warn("Attempting to persist %s events individually" %
                             (len(self.events_to_persist)))
                    for event in self.events_to_persist:
                        try:
                            self.container.event_repository.put_event(event)
                        except Exception:
                            bad_events.append(event)

                    if len(self.events_to_persist) != len(bad_events):
                        log.warn(
                            "Succeeded to persist some of the events - rest must be bad"
                        )
                        self._log_events(bad_events)
                    elif bad_events:
                        log.error("Discarding %s events after %s attempts!!" %
                                  (len(bad_events), self.failure_count))
                        self._log_events(bad_events)

                    self.events_to_persist = None
                    self.failure_count = 0

                elif self.events_to_persist:
                    # There was an error last time and we need to retry
                    log.info("Retry persisting %s events" %
                             len(self.events_to_persist))
                    self._persist_events(self.events_to_persist)
                    self.events_to_persist = None

                # process ALL events (not retried on fail like peristing is)
                events_to_process = [
                    self.event_queue.get()
                    for x in xrange(self.event_queue.qsize())
                ]
                # only persist events not in blacklist
                self.events_to_persist = [
                    x for x in events_to_process if not self._in_blacklist(x)
                ]

                try:
                    self._persist_events(self.events_to_persist)
                finally:
                    self._process_events(events_to_process)
                self.events_to_persist = None
                self.failure_count = 0
            except Exception as ex:
                # Note: Persisting events may fail occasionally during test runs (when the "events" datastore is force
                # deleted and recreated). We'll log and keep retrying forever.
                log.exception(
                    "Failed to persist %s received events. Will retry next cycle"
                    % len(self.events_to_persist))
                self.failure_count += 1
                self._log_events(self.events_to_persist)

    def _persist_events(self, event_list):
        if event_list:
            self.container.event_repository.put_events(event_list)

    def _process_events(self, event_list):
        for plugin_name, plugin in self.process_plugins.iteritems():
            try:
                plugin.process_events(event_list)
            except Exception as ex:
                log.exception("Error processing events in plugin %s",
                              plugin_name)

    def _log_events(self, events):
        events_str = pprint.pformat([event.__dict__
                                     for event in events]) if events else ""
        log.warn("EVENTS:\n%s", events_str)
Ejemplo n.º 6
0
class EventPersister(StandaloneProcess):

    def on_init(self):
        # Time in between event persists
        self.persist_interval = float(self.CFG.get_safe("process.event_persister.persist_interval", 1.0))

        self.persist_blacklist = self.CFG.get_safe("process.event_persister.persist_blacklist", {})

        self._event_type_blacklist = [entry['event_type'] for entry in self.persist_blacklist if entry.get('event_type', None) and len(entry) == 1]
        self._complex_blacklist = [entry for entry in self.persist_blacklist if not (entry.get('event_type', None) and len(entry) == 1)]
        if self._complex_blacklist:
            log.warn("EventPersister does not yet support complex blacklist expressions: %s", self._complex_blacklist)

        # Holds received events FIFO in syncronized queue
        self.event_queue = Queue()

        # Temporarily holds list of events to persist while datastore operation are not yet completed
        # This is where events to persist will remain if datastore operation fails occasionally.
        self.events_to_persist = None

        # Number of unsuccessful attempts to persist in a row
        self.failure_count = 0

        # bookkeeping for greenlet
        self._persist_greenlet = None
        self._terminate_persist = Event() # when set, exits the persister greenlet

        # The event subscriber
        self.event_sub = None

        # Registered event process plugins
        self.process_plugins = {}
        for plugin_name, plugin_cls, plugin_args in PROCESS_PLUGINS:
            try:
                plugin = named_any(plugin_cls)(**plugin_args)
                self.process_plugins[plugin_name]= plugin
                log.info("Loaded event processing plugin %s (%s)", plugin_name, plugin_cls)
            except Exception as ex:
                log.error("Cannot instantiate event processing plugin %s (%s): %s", plugin_name, plugin_cls, ex)


    def on_start(self):
        # Persister thread
        self._persist_greenlet = spawn(self._persister_loop, self.persist_interval)
        log.debug('EventPersister persist greenlet started in "%s" (interval %s)', self.__class__.__name__, self.persist_interval)

        # Event subscription
        self.event_sub = EventSubscriber(pattern=EventSubscriber.ALL_EVENTS,
                                         callback=self._on_event,
                                         queue_name="event_persister")

        self.event_sub.start()

    def on_quit(self):
        # Stop event subscriber
        self.event_sub.stop()

        # tell the trigger greenlet we're done
        self._terminate_persist.set()

        # wait on the greenlets to finish cleanly
        self._persist_greenlet.join(timeout=5)

    def _on_event(self, event, *args, **kwargs):
        self.event_queue.put(event)

    def _in_blacklist(self, event):
        if event.type_ in self._event_type_blacklist:
            return True
        if event.base_types:
            for base_type in event.base_types:
                if base_type in self._event_type_blacklist:
                    return True
            # TODO: Complex event blacklist
        return False

    def _persister_loop(self, persist_interval):
        log.debug('Starting event persister thread with persist_interval=%s', persist_interval)

        # Event.wait returns False on timeout (and True when set in on_quit), so we use this to both exit cleanly and do our timeout in a loop
        while not self._terminate_persist.wait(timeout=persist_interval):
            try:
                # leftover events_to_persist indicate previous attempt did not succeed
                if self.events_to_persist and self.failure_count > 2:
                    bad_events = []
                    log.warn("Attempting to persist %s events individually" % (len(self.events_to_persist)))
                    for event in self.events_to_persist:
                        try:
                            self.container.event_repository.put_event(event)
                        except Exception:
                            bad_events.append(event)

                    if len(self.events_to_persist) != len(bad_events):
                        log.warn("Succeeded to persist some of the events - rest must be bad")
                        self._log_events(bad_events)
                    elif bad_events:
                        log.error("Discarding %s events after %s attempts!!" % (len(bad_events), self.failure_count))
                        self._log_events(bad_events)

                    self.events_to_persist = None
                    self.failure_count = 0

                elif self.events_to_persist:
                    # There was an error last time and we need to retry
                    log.info("Retry persisting %s events" % len(self.events_to_persist))
                    self._persist_events(self.events_to_persist)
                    self.events_to_persist = None

                # process ALL events (not retried on fail like peristing is)
                events_to_process = [self.event_queue.get() for x in xrange(self.event_queue.qsize())]
                # only persist events not in blacklist
                self.events_to_persist = [x for x in events_to_process if not self._in_blacklist(x)]

                try:
                    self._persist_events(self.events_to_persist)
                finally:
                    self._process_events(events_to_process)
                self.events_to_persist = None
                self.failure_count = 0
            except Exception as ex:
                # Note: Persisting events may fail occasionally during test runs (when the "events" datastore is force
                # deleted and recreated). We'll log and keep retrying forever.
                log.exception("Failed to persist %s received events. Will retry next cycle" % len(self.events_to_persist))
                self.failure_count += 1
                self._log_events(self.events_to_persist)

    def _persist_events(self, event_list):
        if event_list:
            self.container.event_repository.put_events(event_list)

    def _process_events(self, event_list):
        for plugin_name, plugin in self.process_plugins.iteritems():
            try:
                plugin.process_events(event_list)
            except Exception as ex:
                log.exception("Error processing events in plugin %s", plugin_name)

    def _log_events(self, events):
        events_str = pprint.pformat([event.__dict__ for event in events]) if events else ""
        log.warn("EVENTS:\n%s", events_str)
Ejemplo n.º 7
0
class GovernanceController(object):
    """
    This is a singleton object which handles governance functionality in the container.
    """

    def __init__(self,container):
        log.debug('GovernanceController.__init__()')
        self.container = container
        self.enabled = False
        self.interceptor_by_name_dict = dict()
        self.interceptor_order = []
        self.policy_decision_point_manager = None
        self.governance_dispatcher = None

        # Holds a list per service operation of policy methods to check before the op in a process is allowed to be called
        self._service_op_preconditions = dict()

        self._is_container_org_boundary = False
        self._container_org_name = None
        self._container_org_id = None

    def start(self):

        log.debug("GovernanceController starting ...")

        self._CFG = CFG

        self.enabled = CFG.get_safe('interceptor.interceptors.governance.config.enabled', False)

        log.info("GovernanceInterceptor enabled: %s" % str(self.enabled))

        self.policy_event_subscriber = None

        #containers default to not Org Boundary and ION Root Org
        self._is_container_org_boundary = CFG.get_safe('container.org_boundary',False)
        self._container_org_name = CFG.get_safe('container.org_name', CFG.get_safe('system.root_org', 'ION'))
        self._container_org_id = None
        self._system_root_org_name = CFG.get_safe('system.root_org', 'ION')

        self._is_root_org_container = (self._container_org_name == self._system_root_org_name)

        self.system_actor_id = None
        self.system_actor_user_header = None

        if self.enabled:

            config = CFG.get_safe('interceptor.interceptors.governance.config')

            self.initialize_from_config(config)

            self.policy_event_subscriber = EventSubscriber(event_type=OT.PolicyEvent, callback=self.policy_event_callback)
            self.policy_event_subscriber.start()

            self.rr_client = ResourceRegistryServiceProcessClient(node=self.container.node, process=self.container)
            self.policy_client = PolicyManagementServiceProcessClient(node=self.container.node, process=self.container)

    def initialize_from_config(self, config):

        self.governance_dispatcher = GovernanceDispatcher()

        self.policy_decision_point_manager = PolicyDecisionPointManager(self)

        if 'interceptor_order' in config:
            self.interceptor_order = config['interceptor_order']

        if 'governance_interceptors' in config:
            gov_ints = config['governance_interceptors']

            for name in gov_ints:
                interceptor_def = gov_ints[name]

                # Instantiate and put in by_name array
                parts = interceptor_def["class"].split('.')
                modpath = ".".join(parts[:-1])
                classname = parts[-1]
                module = __import__(modpath, fromlist=[classname])
                classobj = getattr(module, classname)
                classinst = classobj()

                # Put in by_name_dict for possible re-use
                self.interceptor_by_name_dict[name] = classinst

    def stop(self):
        log.debug("GovernanceController stopping ...")

        if self.policy_event_subscriber is not None:
            self.policy_event_subscriber.stop()


    @property
    def is_container_org_boundary(self):
        return self._is_container_org_boundary

    @property
    def container_org_name(self):
        return self._container_org_name

    @property
    def system_root_org_name(self):
        return self._system_root_org_name

    @property
    def is_root_org_container(self):
        return self._is_root_org_container

    @property
    def CFG(self):
        return self._CFG


    @property
    def rr(self):
        """
        Returns the active resource registry instance or client.

        Used to directly contact the resource registry via the container if available,
        otherwise the messaging client to the RR service is returned.
        """
        if self.container.has_capability('RESOURCE_REGISTRY'):
            return self.container.resource_registry

        return self.rr_client


    def get_container_org_boundary_id(self):
        """
        Returns the permanent org identifier configured for this container
        @return:
        """

        if not self._is_container_org_boundary:
            return None

        if self._container_org_id is None:
            orgs, _ = self.rr.find_resources(restype=RT.Org)
            for org in orgs:
                if org.org_governance_name == self._container_org_name:
                    self._container_org_id = org._id
                    break

        return self._container_org_id

    def process_incoming_message(self,invocation):
        """
        The GovernanceController hook into the incoming message interceptor stack
        @param invocation:
        @return:
        """
        self.process_message(invocation, self.interceptor_order,'incoming' )
        return self.governance_dispatcher.handle_incoming_message(invocation)

    def process_outgoing_message(self,invocation):
        """
        The GovernanceController hook into the outgoing message interceptor stack
        @param invocation:
        @return:
        """
        self.process_message(invocation, reversed(self.interceptor_order),'outgoing')
        return self.governance_dispatcher.handle_outgoing_message(invocation)

    def process_message(self,invocation,interceptor_list, method):
        """
        The GovernanceController hook to iterate over the interceptors to call each one and evaluate the annotations
        to see what actions should be done.
        @TODO - may want to make this more dynamic instead of hard coded for the moment.
        @param invocation:
        @param interceptor_list:
        @param method:
        @return:
        """
        for int_name in interceptor_list:
            class_inst = self.interceptor_by_name_dict[int_name]
            getattr(class_inst, method)(invocation)

            #Stop processing message if an issue with the message was found by an interceptor.
            if ( invocation.message_annotations.has_key(GovernanceDispatcher.CONVERSATION__STATUS_ANNOTATION) and invocation.message_annotations[GovernanceDispatcher.CONVERSATION__STATUS_ANNOTATION] == GovernanceDispatcher.STATUS_REJECT) or\
               ( invocation.message_annotations.has_key(GovernanceDispatcher.POLICY__STATUS_ANNOTATION) and invocation.message_annotations[GovernanceDispatcher.POLICY__STATUS_ANNOTATION] == GovernanceDispatcher.STATUS_REJECT) :
                break

        return invocation



    #Manage all of the policies in the container

    def policy_event_callback(self, *args, **kwargs):
        """
        The generic policy event call back for dispatching policy related events

        @param args:
        @param kwargs:
        @return:
        """
        #Need to check to set here to set after the system actor is created
        if self.system_actor_id is None:
            system_actor = get_system_actor()
            if system_actor is not None:
                self.system_actor_id = system_actor._id
                self.system_actor_user_header = get_system_actor_header()

        policy_event = args[0]
        if policy_event.type_ == OT.ResourcePolicyEvent:
            self.resource_policy_event_callback(*args, **kwargs)
        elif policy_event.type_ == OT.RelatedResourcePolicyEvent:
            self.resource_policy_event_callback(*args, **kwargs)
        elif policy_event.type_ == OT.ServicePolicyEvent:
            self.service_policy_event_callback(*args, **kwargs)

    def resource_policy_event_callback(self, *args, **kwargs):
        """
        The ResourcePolicyEvent handler

        @param args:
        @param kwargs:
        @return:
        """
        resource_policy_event = args[0]
        log.debug('Resource policy event received: %s', str(resource_policy_event.__dict__))

        policy_id = resource_policy_event.origin
        resource_id = resource_policy_event.resource_id
        delete_policy = True if resource_policy_event.sub_type == 'DeletePolicy' else False

        self.update_resource_access_policy(resource_id, delete_policy)

    def service_policy_event_callback(self, *args, **kwargs):
        """
        The ServicePolicyEvent handler

        @param args:
        @param kwargs:
        @return:
        """
        service_policy_event = args[0]
        log.debug('Service policy event received: %s', str(service_policy_event.__dict__))

        policy_id = service_policy_event.origin
        service_name = service_policy_event.service_name
        service_op = service_policy_event.op
        delete_policy = True if service_policy_event.sub_type == 'DeletePolicy' else False

        if service_name:
            if self.container.proc_manager.is_local_service_process(service_name):
                self.update_service_access_policy(service_name, service_op, delete_policy=delete_policy)
            elif self.container.proc_manager.is_local_agent_process(service_name):
                self.update_service_access_policy(service_name, service_op, delete_policy=delete_policy)

        else:
            self.update_common_service_access_policy()



    def reset_policy_cache(self):
        """
        The function to empty and reload the container's policy caches

        @return:
        """
        log.info('Resetting policy cache')

        #First remove all cached polices and precondition functions that are not hard-wired
        self._reset_container_policy_caches()

        #Then load the common service access policies since they are shared across services
        self.update_common_service_access_policy()

        #Now iterate over the processes running in the container and reload their policies
        proc_list = self.container.proc_manager.list_local_processes()
        for proc in proc_list:
            self.update_container_policies(proc)


    def _reset_container_policy_caches(self):
        self.policy_decision_point_manager.clear_policy_cache()
        self.unregister_all_process_policy_preconditions()

    def update_container_policies(self, process_instance, safe_mode=False):
        """
        This must be called after registering a new process to load any applicable policies

        @param process_instance:
        @return:
        """

        #This method can be called before policy management service is available during system startup
        if safe_mode and not self._is_policy_management_service_available():
            if not is_testing():
                log.warn("Requested update_container_policies() but ignore - Policy MS not available")
            return

        #Need to check to set here to set after the system actor is created
        if self.system_actor_id is None:
            system_actor = get_system_actor()
            if system_actor is not None:
                self.system_actor_id = system_actor._id
                self.system_actor_user_header = get_system_actor_header()

        if process_instance._proc_type == SERVICE_PROCESS_TYPE:

            # look to load any existing policies for this service

            self.update_service_access_policy(process_instance._proc_listen_name)

        elif process_instance._proc_type == AGENT_PROCESS_TYPE:

            # look to load any existing policies for this agent service
            if process_instance.resource_type is None:
                self.update_service_access_policy(process_instance.name)
            else:
                self.update_service_access_policy(process_instance.resource_type)

            if process_instance.resource_id:
                # look to load any existing policies for this resource
                self.update_resource_access_policy(process_instance.resource_id)


    def update_resource_access_policy(self, resource_id, delete_policy=False):

        if self.policy_decision_point_manager is not None:

            try:
                policy_rules = self.policy_client.get_active_resource_access_policy_rules(resource_id, headers=self.system_actor_user_header)
                self.policy_decision_point_manager.load_resource_policy_rules(resource_id, policy_rules)

            except Exception, e:
                #If the resource does not exist, just ignore it - but log a warning.
                log.warn("The resource %s is not found or there was an error applying access policy: %s" % ( resource_id, e.message))
Ejemplo n.º 8
0
class GovernanceController(object):
    """
    This is a singleton object which handles governance functionality in the container.
    Registers event callback for PolicyEvent to update local policies on change.
    """

    def __init__(self, container):
        log.debug('GovernanceController.__init__()')
        self.container = container
        self.enabled = False
        self.interceptor_by_name_dict = {}
        self.interceptor_order = []
        self.policy_decision_point_manager = None
        self.governance_dispatcher = None

        # Holds a list per service operation of policy methods to be called before operation is invoked
        self._service_op_preconditions = {}
        # Holds a list per process operation of policy methods to be called before operation is invoked
        self._process_op_preconditions = {}

        self._is_container_org_boundary = False
        self._container_org_name = None
        self._container_org_id = None

        # For policy debugging purposes. Keeps a list of most recent policy updates for later readout
        self._policy_update_log = []
        self._policy_snapshot = None

    def start(self):
        log.debug("GovernanceController starting ...")
        self._CFG = CFG

        self.enabled = CFG.get_safe('interceptor.interceptors.governance.config.enabled', False)
        if not self.enabled:
            log.warn("GovernanceInterceptor disabled by configuration")
        self.policy_event_subscriber = None

        # Containers default to not Org Boundary and ION Root Org
        self._is_container_org_boundary = CFG.get_safe('container.org_boundary', False)
        self._container_org_name = CFG.get_safe('container.org_name', CFG.get_safe('system.root_org', 'ION'))
        self._container_org_id = None
        self._system_root_org_name = CFG.get_safe('system.root_org', 'ION')

        self._is_root_org_container = (self._container_org_name == self._system_root_org_name)

        self.system_actor_id = None
        self.system_actor_user_header = None

        self.rr_client = ResourceRegistryServiceProcessClient(process=self.container)
        self.policy_client = PolicyManagementServiceProcessClient(process=self.container)

        if self.enabled:
            config = CFG.get_safe('interceptor.interceptors.governance.config')
            self.initialize_from_config(config)

            self.policy_event_subscriber = EventSubscriber(event_type=OT.PolicyEvent, callback=self.policy_event_callback)
            self.policy_event_subscriber.start()

            self._policy_snapshot = self._get_policy_snapshot()
            self._log_policy_update("start_governance_ctrl", message="Container start")

    def initialize_from_config(self, config):
        self.governance_dispatcher = GovernanceDispatcher()
        self.policy_decision_point_manager = PolicyDecisionPointManager(self)

        self.interceptor_order = config.get('interceptor_order', None) or []
        gov_ints = config.get('governance_interceptors', None) or {}
        for name in gov_ints:
            interceptor_def = gov_ints[name]
            classobj = named_any(interceptor_def["class"])
            classinst = classobj()
            self.interceptor_by_name_dict[name] = classinst

    def _ensure_system_actor(self):
        """Make sure we have a handle for the system actor"""
        if self.system_actor_id is None:
            system_actor = get_system_actor()
            if system_actor is not None:
                self.system_actor_id = system_actor._id
                self.system_actor_user_header = get_system_actor_header(system_actor)

    def stop(self):
        log.debug("GovernanceController stopping ...")

        if self.policy_event_subscriber is not None:
            self.policy_event_subscriber.stop()

    @property
    def is_container_org_boundary(self):
        return self._is_container_org_boundary

    @property
    def container_org_name(self):
        return self._container_org_name

    @property
    def system_root_org_name(self):
        return self._system_root_org_name

    @property
    def is_root_org_container(self):
        return self._is_root_org_container

    @property
    def CFG(self):
        return self._CFG


    @property
    def rr(self):
        """Returns the active resource registry instance if available in the container or service client.
        """
        if self.container.has_capability('RESOURCE_REGISTRY'):
            return self.container.resource_registry
        return self.rr_client


    def get_container_org_boundary_id(self):
        """Returns the permanent org identifier configured for this container
        """
        if not self._is_container_org_boundary:
            return None

        if self._container_org_id is None:
            org_ids, _ = self.rr.find_resources_ext(restype=RT.Org, attr_name="org_governance_name",
                                                    attr_value=self._container_org_name, id_only=True)
            if org_ids:
                self._container_org_id = org_ids[0]

        return self._container_org_id

    # --- Interceptor management

    def process_incoming_message(self, invocation):
        """The GovernanceController hook into the incoming message interceptor stack
        """
        self.process_message(invocation, self.interceptor_order, Invocation.PATH_IN)
        return self.governance_dispatcher.handle_incoming_message(invocation)

    def process_outgoing_message(self, invocation):
        """The GovernanceController hook into the outgoing message interceptor stack
        """
        self.process_message(invocation, reversed(self.interceptor_order), Invocation.PATH_OUT)
        return self.governance_dispatcher.handle_outgoing_message(invocation)

    def process_message(self, invocation, interceptor_list, method):
        """
        The GovernanceController hook to iterate over the interceptors to call each one and
        evaluate the annotations to see what actions should be done.
        """
        for int_name in interceptor_list:
            interceptor_obj = self.interceptor_by_name_dict[int_name]
            interceptor_func = getattr(interceptor_obj, method)
            # Invoke interceptor function for designated path
            interceptor_func(invocation)

            # Stop processing message if an issue with the message was found by an interceptor
            if invocation.message_annotations.get(GovernanceDispatcher.CONVERSATION__STATUS_ANNOTATION, None) == GovernanceDispatcher.STATUS_REJECT or \
               invocation.message_annotations.get(GovernanceDispatcher.POLICY__STATUS_ANNOTATION, None) == GovernanceDispatcher.STATUS_REJECT:
                break

        return invocation

    # --- Container policy management

    def policy_event_callback(self, policy_event, *args, **kwargs):
        """Generic policy event handler for dispatching policy related events.
        """
        self._ensure_system_actor()

        log.info("Received policy event: %s", policy_event)

        if policy_event.type_ == OT.ResourcePolicyEvent:
            self.resource_policy_event_callback(policy_event, *args, **kwargs)
        elif policy_event.type_ == OT.RelatedResourcePolicyEvent:
            self.resource_policy_event_callback(policy_event, *args, **kwargs)
        elif policy_event.type_ == OT.ServicePolicyEvent:
            self.service_policy_event_callback(policy_event, *args, **kwargs)

        self._log_policy_update("policy_event_callback",
                                message="Event processed",
                                event=policy_event)

    def service_policy_event_callback(self, service_policy_event, *args, **kwargs):
        """The ServicePolicyEvent handler
        """
        log.debug('Service policy event: %s', str(service_policy_event.__dict__))

        policy_id = service_policy_event.origin
        service_name = service_policy_event.service_name
        service_op = service_policy_event.op
        delete_policy = True if service_policy_event.sub_type == 'DeletePolicy' else False

        if service_name:
            if self.container.proc_manager.is_local_service_process(service_name):
                self.update_service_access_policy(service_name, service_op, delete_policy=delete_policy)
            elif self.container.proc_manager.is_local_agent_process(service_name):
                self.update_service_access_policy(service_name, service_op, delete_policy=delete_policy)

        else:
            self.update_common_service_access_policy()

    def resource_policy_event_callback(self, resource_policy_event, *args, **kwargs):
        """The ResourcePolicyEvent handler
        """
        log.debug('Resource policy event: %s', str(resource_policy_event.__dict__))

        policy_id = resource_policy_event.origin
        resource_id = resource_policy_event.resource_id
        delete_policy = True if resource_policy_event.sub_type == 'DeletePolicy' else False

        self.update_resource_access_policy(resource_id, delete_policy)

    def reset_policy_cache(self):
        """Empty and reload the container's policy caches.
        Reload by getting policy for each of the container's processes and common policy.
        """
        log.info('Resetting policy cache')

        # First remove all cached polices and operation precondition functions
        self._clear_container_policy_caches()

        # Load the common service access policies since they are shared across services
        self.update_common_service_access_policy()

        # Iterate over the processes running in the container and reload their policies
        proc_list = self.container.proc_manager.list_local_processes()
        for proc in proc_list:
            self.update_process_policies(proc, force_update=False)

        self._log_policy_update("reset_policy_cache")

    def _clear_container_policy_caches(self):
        self.policy_decision_point_manager.clear_policy_cache()
        self.unregister_all_process_policy_preconditions()

    def update_process_policies(self, process_instance, safe_mode=False, force_update=True):
        """
        Load any applicable process policies for a container process.
        To be called by when spawning a new process, or when policy is reset.
        @param process_instance  The ION process for which to load policy
        @param safe_mode  If True, will not attempt to read policy if Policy MS not available
        """
        # NOTE: During restart, we rely on the bootstrap code to remove registration of Policy MS
        if safe_mode and not self._is_policy_management_service_available():
            if not is_testing() and (process_instance.name not in {"resource_registry", "system_management",
                    "directory", "identity_management"} and process_instance._proc_name != "event_persister"):
                # We are in the early phases of bootstrapping
                log.warn("update_process_policies(%s) - No update. Policy MS not available", process_instance._proc_name)

            self._log_policy_update("update_process_policies",
                                    message="No update. Policy MS not available",
                                    process=process_instance)
            return

        self._ensure_system_actor()

        if process_instance._proc_type == PROCTYPE_SERVICE:
            self.update_service_access_policy(process_instance._proc_listen_name, force_update=force_update)

        elif process_instance._proc_type == PROCTYPE_AGENT:
            # Load any existing policies for this agent with type or name
            if process_instance.resource_type is None:
                self.update_service_access_policy(process_instance.name, force_update=force_update)
            else:
                self.update_service_access_policy(process_instance.resource_type, force_update=force_update)

            if process_instance.resource_id:
                # Load any existing policies for this resource
                self.update_resource_access_policy(process_instance.resource_id, force_update=force_update)

        self._log_policy_update("update_process_policies",
                                message="Checked",
                                process=process_instance)

    def update_common_service_access_policy(self, delete_policy=False):
        """Update policy common to all services"""
        if self.policy_decision_point_manager is None:
            return

        try:
            rules = self.policy_client.get_active_service_access_policy_rules(
                    service_name='', org_name=self._container_org_name,
                    headers=self.system_actor_user_header)
            self.policy_decision_point_manager.set_common_service_policy_rules(rules)

        except Exception as e:
            # If the resource does not exist, just ignore it - but log a warning.
            log.warn("There was an error applying access policy: %s" % e.message)

    def update_service_access_policy(self, service_name, service_op='', delete_policy=False, force_update=True):
        """Update policy for a service"""
        if self.policy_decision_point_manager is None:
            return
        if not force_update and not service_op and self.policy_decision_point_manager.has_service_policy(service_name):
            log.info("Skipping update of service %s policy - already cached", service_name)
            return

        try:
            if service_op:
                policies = self.policy_client.get_active_service_operation_preconditions(
                        service_name=service_name, op=service_op, org_name=self._container_org_name,
                        headers=self.system_actor_user_header)
            else:
                policies = self.policy_client.get_active_service_access_policy_rules(
                        service_name=service_name, org_name=self._container_org_name,
                        headers=self.system_actor_user_header)

            # First update any access policy rules
            svc_access_policy = [p for p in policies
                                 if p.policy_type in (PolicyTypeEnum.COMMON_SERVICE_ACCESS, PolicyTypeEnum.SERVICE_ACCESS)]
            self.policy_decision_point_manager.set_service_policy_rules(service_name, svc_access_policy)

            # Next update any precondition policies
            svc_preconditions = [p for p in policies
                                 if p.policy_type == PolicyTypeEnum.SERVICE_OP_PRECOND]

            # There can be several local processes for a service
            procs = self.container.proc_manager.get_local_service_processes(service_name)
            for proc in procs:
                if svc_preconditions:
                    for op_pre_policy in svc_preconditions:
                        for pre_check in op_pre_policy.preconditions:
                            self.unregister_process_operation_precondition(proc, op_pre_policy.op, pre_check)
                            if not delete_policy:
                                self.register_process_operation_precondition(proc, op_pre_policy.op, pre_check)
                else:
                    # Unregister all, just in case
                    self.unregister_all_process_operation_precondition(proc, service_op)

        except Exception as ex:
            # If the resource does not exist, just ignore it - but log a warning.
            log.warn("Error applying access policy for service %s: %s" % (service_name, ex.message))

    def update_resource_access_policy(self, resource_id, delete_policy=False, force_update=True):
        """Update policy for a resource (such as a device fronted by an agent process)"""
        if self.policy_decision_point_manager is None:
            return
        if self.policy_decision_point_manager.has_resource_policy(resource_id):
            return

        try:
            policy_list = self.policy_client.get_active_resource_access_policy_rules(
                    resource_id, headers=self.system_actor_user_header)
            self.policy_decision_point_manager.set_resource_policy_rules(resource_id, policy_list)

        except Exception as e:
            # If the resource does not exist, just ignore it - but log a warning.
            log.warn("There was an error applying access policy for resource %s: %s", resource_id, e.message)

    def update_process_access_policy(self, process_key, service_op='', delete_policy=False, force_update=True):
        pass
        # procs, op_preconditions = [], None
        # try:
        #     # There can be several local processes for a service all with different names
        #     procs = self.container.proc_manager.get_local_service_processes(service_name)
        #     if procs:
        #         op_preconditions = self.policy_client.get_active_service_operation_preconditions(
        #                 service_name=service_name, op=service_op, org_name=self._container_org_name,
        #                 headers=self.system_actor_user_header)
        # except Exception as ex:
        #     # If the resource does not exist, just ignore it - but log a warning.
        #     log.warn("Error applying precondition access policy for service %s: %s" % (service_name, ex.message))
        #
        # for proc in procs:
        #     try:
        #         if op_preconditions:
        #             for op in op_preconditions:
        #                 for pre in op.preconditions:
        #                     self.unregister_process_operation_precondition(proc, op.op, pre)
        #                     if not delete_policy:
        #                         self.register_process_operation_precondition(proc, op.op, pre)
        #         else:
        #             # Unregister all, just in case
        #             self.unregister_all_process_operation_precondition(proc, service_op)
        #     except Exception as ex:
        #         # If the resource does not exist, just ignore it - but log a warning.
        #         log.warn("Error applying precondition access policy for process %s of service %s: %s" % (proc, service_name, ex.message))


    def get_active_policies(self):
        container_policies = dict()
        container_policies['common_service_access'] = self.policy_decision_point_manager.load_common_service_pdp
        container_policies['service_access'] = {k: v for (k, v) in self.policy_decision_point_manager.service_policy_decision_point.iteritems() if v is not None}
        container_policies['resource_access'] = {k: v for (k, v) in self.policy_decision_point_manager.resource_policy_decision_point.iteritems() if v is not None}
        container_policies['service_operation'] = dict(self._service_op_preconditions)

        #log.info(container_policies)
        return container_policies

    def _is_policy_management_service_available(self):
        """
        Method to verify if the Policy Management Service is running in the system. If the container cannot connect to
        the RR then assume it is remote container so do not try to access Policy Management Service
        """
        policy_service = get_service_registry().is_service_available('policy_management', True)
        if policy_service:
            return True
        return False

    def _get_policy_snapshot(self):
        """Debugging helper that snapshot copies the current container's policy state.
        """
        policy_snap = {}
        policy_snap["snap_ts"] = get_ion_ts()

        policies = self.get_active_policies()
        common_list = []
        policy_snap["common_pdp"] = common_list
        for rule in policies.get("common_service_access", {}).policy.rules:
            rule_dict = dict(id=rule.id, description=rule.description, effect=rule.effect.value)
            common_list.append(rule_dict)

        service_dict = {}
        policy_snap["service_pdp"] = service_dict
        for (svc_name, sp) in policies.get("service_access", {}).iteritems():
            for rule in sp.policy.rules:
                if svc_name not in service_dict:
                    service_dict[svc_name] = []
                rule_dict = dict(id=rule.id, description=rule.description, effect=rule.effect.value)
                service_dict[svc_name].append(rule_dict)

        service_pre_dict = {}
        policy_snap["service_precondition"] = service_pre_dict
        for (svc_name, sp) in policies.get("service_operation", {}).iteritems():
            for op, f in sp.iteritems():
                if svc_name not in service_pre_dict:
                    service_pre_dict[svc_name] = []
                service_pre_dict[svc_name].append(op)

        resource_dict = {}
        policy_snap["resource_pdp"] = resource_dict
        for (res_name, sp) in policies.get("resource_access", {}).iteritems():
            for rule in sp.policy.rules:
                if res_name not in service_dict:
                    resource_dict[res_name] = []
                rule_dict = dict(id=rule.id, description=rule.description, effect=rule.effect.value)
                resource_dict[res_name].append(rule_dict)

        return policy_snap

    def _log_policy_update(self, update_type=None, message=None, event=None, process=None):
        policy_update_dict = {}
        policy_update_dict["update_ts"] = get_ion_ts()
        policy_update_dict["update_type"] = update_type or ""
        policy_update_dict["message"] = message or ""
        if event:
            policy_update_dict["event._id"] = getattr(event, "_id", "")
            policy_update_dict["event.ts_created"] = getattr(event, "ts_created", "")
            policy_update_dict["event.type_"] = getattr(event, "type_", "")
            policy_update_dict["event.sub_type"] = getattr(event, "sub_type", "")
        if process:
            policy_update_dict["proc._proc_name"] = getattr(process, "_proc_name", "")
            policy_update_dict["proc.name"] = getattr(process, "name", "")
            policy_update_dict["proc._proc_listen_name"] = getattr(process, "_proc_listen_name", "")
            policy_update_dict["proc.resource_type"] = getattr(process, "resource_type", "")
            policy_update_dict["proc.resource_id"] = getattr(process, "resource_id", "")
        any_change = False   # Change can only be detected in number/names of policy not content
        snapshot = self._policy_snapshot
        policy_now = self._get_policy_snapshot()
        # Comparison of snapshot to current policy
        try:
            def compare_policy(pol_cur, pol_snap, key, res):
                pol_cur_set = {d["id"] if isinstance(d, dict) else d for d in pol_cur}
                pol_snap_set = {d["id"] if isinstance(d, dict) else d for d in pol_snap}
                if pol_cur_set != pol_snap_set:
                    policy_update_dict["snap.%s.%s.added" % (key, res)] = pol_cur_set - pol_snap_set
                    policy_update_dict["snap.%s.%s.removed" % (key, res)] = pol_snap_set - pol_cur_set
                    log.debug("Policy changed for %s.%s: %s vs %s" % (key, res, pol_cur_set, pol_snap_set))
                    return True
                return False
            policy_update_dict["snap.snap_ts"] = snapshot["snap_ts"]
            for key in ("common_pdp", "service_pdp", "service_precondition", "resource_pdp"):
                pol_snap = snapshot[key]
                pol_cur = policy_now[key]
                if isinstance(pol_cur, dict):
                    for res in pol_cur.keys():
                        pol_list = pol_cur[res]
                        snap_list = pol_snap.get(res, [])
                        any_change = compare_policy(pol_list, snap_list, key, res) or any_change
                elif isinstance(pol_cur, list):
                    any_change = compare_policy(pol_cur, pol_snap, key, "common") or any_change

            policy_update_dict["snap.policy_changed"] = str(any_change)
        except Exception as ex:
            log.warn("Cannot compare current policy to prior snapshot", exc_info=True)

        self._policy_update_log.append(policy_update_dict)
        self._policy_update_log = self._policy_update_log[-100:]
        self._policy_snapshot = policy_now

        if any_change:
            log.debug("Container policy changed. Cause: %s/%s" % (update_type, message))
        else:
            log.debug("Container policy checked but no change. Cause: %s/%s" % (update_type, message))

    # --- Methods for managing operation specific preconditions

    def get_process_operation_dict(self, process_name, auto_add=True):
        if process_name in self._service_op_preconditions:
            return self._service_op_preconditions[process_name]

        if auto_add:
            self._service_op_preconditions[process_name] = dict()
            return self._service_op_preconditions[process_name]

        return None

    def register_process_operation_precondition(self, process, operation, precondition):
        """
        This method is used to register process operation precondition functions
        with the governance controller. The endpoint code will call check_process_operation_preconditions()
        below before calling the business logic operation and if any of
        the precondition functions return False, then the request is denied as Unauthorized.

        At some point, this should be refactored to by another interceptor, but at the operation level.
        """
        if not hasattr(process, operation):
            raise NotFound("The operation %s does not exist for the %s process" % (operation, process.name))

        if type(precondition) == types.MethodType and precondition.im_self != process:
            raise NotFound("The method %s does not exist for the %s process." % (str(precondition), process.name))

        process_op_conditions = self.get_process_operation_dict(process.name)
        if operation in process_op_conditions:
            process_op_conditions[operation].append(precondition)
        else:
            preconditions = list()
            preconditions.append(precondition)
            process_op_conditions[operation] = preconditions

    def unregister_all_process_operation_precondition(self, process, operation):
        """
        This method removes all precondition functions registered with an operation on a process.
        Care should be taken with this call, as it can remove "hard wired" preconditions that are
        directly registered by processes in a container.
        """
        process_op_conditions = self.get_process_operation_dict(process.name, auto_add=False)
        if process_op_conditions is not None and operation in process_op_conditions:
            del process_op_conditions[operation]

    def unregister_process_operation_precondition(self, process, operation, precondition):
        """
        This method removes a specific precondition function registered with an operation on a process.
        Care should be taken with this call, as it can remove "hard wired" preconditions that are
        directly registered by processes in a container.
        """
        #Just skip this if there operation is not passed in.
        if operation is None:
            return

        if not hasattr(process, operation):
            raise NotFound("The operation %s does not exist for the %s service" % (operation, process.name))

        process_op_conditions = self.get_process_operation_dict(process.name, auto_add=False)
        if process_op_conditions is not None and operation in process_op_conditions:
            preconditions = process_op_conditions[operation]
            preconditions[:] = [pre for pre in preconditions if not pre == precondition]
            if not preconditions:
                del process_op_conditions[operation]

    def unregister_all_process_policy_preconditions(self):
        """
        This method removes all precondition functions registered with an operation on a process.
        It will not remove "hard wired" preconditions that are directly registered by processes in a container.
        """
        for proc in self._service_op_preconditions:
            process_op_conditions = self.get_process_operation_dict(proc, auto_add=False)
            if process_op_conditions is not None:
                for op in process_op_conditions:
                    preconditions = process_op_conditions[op]
                    preconditions[:] = [pre for pre in preconditions if type(pre) == types.FunctionType]

    def check_process_operation_preconditions(self, process, msg, headers):
        """
        This method is called by the ION endpoint to execute any process operation preconditions functions before
        allowing the operation to be called.
        """
        operation = headers.get('op', None)
        if operation is None:
            return

        process_op_conditions = self.get_process_operation_dict(process.name, auto_add=False)
        if process_op_conditions is not None and operation in process_op_conditions:
            preconditions = process_op_conditions[operation]
            for precond in reversed(preconditions):
                if type(precond) in (types.MethodType, types.FunctionType):
                    # Handle precondition which are built-in functions
                    try:
                        ret_val, ret_message = precond(msg, headers)
                    except Exception as e:
                        # TODD - Catching all exceptions and logging as errors, don't want to stop processing for this right now
                        log.error('Executing precondition function: %s for operation: %s - %s so it will be ignored.' %
                                  (precond.__name__, operation, e.message))
                        ret_val = True
                        ret_message = ''

                    if not ret_val:
                        raise Unauthorized(ret_message)

                elif isinstance(precond, basestring):
                    try:
                        # See if this is method within the endpoint process, if so call it
                        method = getattr(process, precond, None)
                        if method:
                            ret_val, ret_message = method(msg, headers)
                        else:
                            # It is not a method in the process, so try to execute as a simple python function
                            exec precond
                            pref = locals()["precondition_func"]
                            ret_val, ret_message = pref(process, msg, headers)

                    except Exception as e:
                        # TODD - Catching all exceptions and logging as errors, don't want to stop processing for this right now
                        log.error('Executing precondition function: %s for operation: %s - %s so it will be ignored.' %
                                  (precond, operation, e.message))
                        ret_val = True
                        ret_message = ''

                    if not ret_val:
                        raise Unauthorized(ret_message)
Ejemplo n.º 9
0
class EOIRegistrationProcess(SimpleProcess):
    def on_start(self):
        self.data_source_subscriber = EventSubscriber(
            event_type=OT.ResourceModifiedEvent,
            origin_type=RT.DataSource,
            callback=self._register_data_source)
        self.provider_subscriber = EventSubscriber(
            event_type=OT.ResourceModifiedEvent,
            origin_type=RT.ExternalDataProvider,
            callback=self._register_provider)
        self.data_source_subscriber.start()
        self.provider_subscriber.start()

        self.rr = self.container.resource_registry

        self.using_eoi_services = CFG.get_safe('eoi.meta.use_eoi_services',
                                               False)
        self.server = CFG.get_safe(
            'eoi.importer_service.server', "localhost") + ":" + str(
                CFG.get_safe('eoi.importer_service.port', 8844))

        log.info("Using geoservices=" + str(self.using_eoi_services))
        if not self.using_eoi_services:
            log.warn("not using geoservices...")

        self.importer_service_available = self.check_for_importer_service()
        if not self.importer_service_available:
            log.warn("not using importer service...")

    def check_for_importer_service(self):
        '''
        only gets run on start, used to identify if importer service is available
        '''
        try:
            r = requests.get(self.server + '/service=alive&name=ooi&id=ooi')
            log.info("importer service available, status code: %s",
                     str(r.status_code))
            #alive service returned ok
            if r.status_code == 200:
                return True
            else:
                return False
        except Exception as e:
            #SERVICE IS REALLY NOT AVAILABLE
            log.warn("importer service is really not available...%s", e)
            return False

    def _register_data_source(self, event, *args, **kwargs):
        '''
        used to create a harvester
        '''
        if self.importer_service_available:
            obj = self.rr.read(event.origin)
            data_fields = []
            for attrname, value in vars(obj).iteritems():
                #generate th param list to pass to importer service using field names
                if attrname is not "contact":
                    f = attrname.replace("_", "") + "=" + str(obj[attrname])
                    data_fields.append(f)

            param_list = '&'.join(data_fields)

            request_string = self.server + '/service=' + CREATE_HARVESTER + "&" + param_list
            r = requests.get(request_string)

    def _register_provider(self, event, *args, **kwargs):
        if self.importer_service_available:
            #print "provider id:", event.origin
            pass

    def on_quit(self):
        self.data_source_subscriber.stop()
        self.provider_subscriber.stop()
Ejemplo n.º 10
0
class GovernanceController(object):
    """
    This is a singleton object which handles governance functionality in the container.
    Registers event callback for PolicyEvent to update local policies on change.
    """
    def __init__(self, container):
        log.debug('GovernanceController.__init__()')
        self.container = container
        self.enabled = False
        self.interceptor_by_name_dict = {}
        self.interceptor_order = []
        self.policy_decision_point_manager = None
        self.governance_dispatcher = None

        # Holds a list per service operation of policy methods to be called before operation is invoked
        self._service_op_preconditions = {}
        # Holds a list per process operation of policy methods to be called before operation is invoked
        self._process_op_preconditions = {}

        self._is_container_org_boundary = False
        self._container_org_name = None
        self._container_org_id = None

        # For policy debugging purposes. Keeps a list of most recent policy updates for later readout
        self._policy_update_log = []
        self._policy_snapshot = None

    def start(self):
        log.debug("GovernanceController starting ...")
        self._CFG = CFG

        self.enabled = CFG.get_safe(
            'interceptor.interceptors.governance.config.enabled', False)
        if not self.enabled:
            log.warn("GovernanceInterceptor disabled by configuration")
        self.policy_event_subscriber = None

        # Containers default to not Org Boundary and ION Root Org
        self._is_container_org_boundary = CFG.get_safe(
            'container.org_boundary', False)
        self._container_org_name = CFG.get_safe(
            'container.org_name', CFG.get_safe('system.root_org', 'ION'))
        self._container_org_id = None
        self._system_root_org_name = CFG.get_safe('system.root_org', 'ION')

        self._is_root_org_container = (
            self._container_org_name == self._system_root_org_name)

        self.system_actor_id = None
        self.system_actor_user_header = None

        self.rr_client = ResourceRegistryServiceProcessClient(
            process=self.container)
        self.policy_client = PolicyManagementServiceProcessClient(
            process=self.container)

        if self.enabled:
            config = CFG.get_safe('interceptor.interceptors.governance.config')
            self.initialize_from_config(config)

            self.policy_event_subscriber = EventSubscriber(
                event_type=OT.PolicyEvent, callback=self.policy_event_callback)
            self.policy_event_subscriber.start()

            self._policy_snapshot = self._get_policy_snapshot()
            self._log_policy_update("start_governance_ctrl",
                                    message="Container start")

    def initialize_from_config(self, config):
        self.governance_dispatcher = GovernanceDispatcher()
        self.policy_decision_point_manager = PolicyDecisionPointManager(self)

        self.interceptor_order = config.get('interceptor_order', None) or []
        gov_ints = config.get('governance_interceptors', None) or {}
        for name in gov_ints:
            interceptor_def = gov_ints[name]
            classobj = named_any(interceptor_def["class"])
            classinst = classobj()
            self.interceptor_by_name_dict[name] = classinst

    def _ensure_system_actor(self):
        """Make sure we have a handle for the system actor"""
        if self.system_actor_id is None:
            system_actor = get_system_actor()
            if system_actor is not None:
                self.system_actor_id = system_actor._id
                self.system_actor_user_header = get_system_actor_header(
                    system_actor)

    def stop(self):
        log.debug("GovernanceController stopping ...")

        if self.policy_event_subscriber is not None:
            self.policy_event_subscriber.stop()

    @property
    def is_container_org_boundary(self):
        return self._is_container_org_boundary

    @property
    def container_org_name(self):
        return self._container_org_name

    @property
    def system_root_org_name(self):
        return self._system_root_org_name

    @property
    def is_root_org_container(self):
        return self._is_root_org_container

    @property
    def CFG(self):
        return self._CFG

    @property
    def rr(self):
        """Returns the active resource registry instance if available in the container or service client.
        """
        if self.container.has_capability('RESOURCE_REGISTRY'):
            return self.container.resource_registry
        return self.rr_client

    def get_container_org_boundary_id(self):
        """Returns the permanent org identifier configured for this container
        """
        if not self._is_container_org_boundary:
            return None

        if self._container_org_id is None:
            org_ids, _ = self.rr.find_resources_ext(
                restype=RT.Org,
                attr_name="org_governance_name",
                attr_value=self._container_org_name,
                id_only=True)
            if org_ids:
                self._container_org_id = org_ids[0]

        return self._container_org_id

    # --- Interceptor management

    def process_incoming_message(self, invocation):
        """The GovernanceController hook into the incoming message interceptor stack
        """
        self.process_message(invocation, self.interceptor_order,
                             Invocation.PATH_IN)
        return self.governance_dispatcher.handle_incoming_message(invocation)

    def process_outgoing_message(self, invocation):
        """The GovernanceController hook into the outgoing message interceptor stack
        """
        self.process_message(invocation, reversed(self.interceptor_order),
                             Invocation.PATH_OUT)
        return self.governance_dispatcher.handle_outgoing_message(invocation)

    def process_message(self, invocation, interceptor_list, method):
        """
        The GovernanceController hook to iterate over the interceptors to call each one and
        evaluate the annotations to see what actions should be done.
        """
        for int_name in interceptor_list:
            interceptor_obj = self.interceptor_by_name_dict[int_name]
            interceptor_func = getattr(interceptor_obj, method)
            # Invoke interceptor function for designated path
            interceptor_func(invocation)

            # Stop processing message if an issue with the message was found by an interceptor
            if invocation.message_annotations.get(GovernanceDispatcher.CONVERSATION__STATUS_ANNOTATION, None) == GovernanceDispatcher.STATUS_REJECT or \
               invocation.message_annotations.get(GovernanceDispatcher.POLICY__STATUS_ANNOTATION, None) == GovernanceDispatcher.STATUS_REJECT:
                break

        return invocation

    # --- Container policy management

    def policy_event_callback(self, policy_event, *args, **kwargs):
        """Generic policy event handler for dispatching policy related events.
        """
        self._ensure_system_actor()

        log.info("Received policy event: %s", policy_event)

        if policy_event.type_ == OT.ResourcePolicyEvent:
            self.resource_policy_event_callback(policy_event, *args, **kwargs)
        elif policy_event.type_ == OT.RelatedResourcePolicyEvent:
            self.resource_policy_event_callback(policy_event, *args, **kwargs)
        elif policy_event.type_ == OT.ServicePolicyEvent:
            self.service_policy_event_callback(policy_event, *args, **kwargs)

        self._log_policy_update("policy_event_callback",
                                message="Event processed",
                                event=policy_event)

    def service_policy_event_callback(self, service_policy_event, *args,
                                      **kwargs):
        """The ServicePolicyEvent handler
        """
        log.debug('Service policy event: %s',
                  str(service_policy_event.__dict__))

        policy_id = service_policy_event.origin
        service_name = service_policy_event.service_name
        service_op = service_policy_event.op
        delete_policy = True if service_policy_event.sub_type == 'DeletePolicy' else False

        if service_name:
            if self.container.proc_manager.is_local_service_process(
                    service_name):
                self.update_service_access_policy(service_name,
                                                  service_op,
                                                  delete_policy=delete_policy)
            elif self.container.proc_manager.is_local_agent_process(
                    service_name):
                self.update_service_access_policy(service_name,
                                                  service_op,
                                                  delete_policy=delete_policy)

        else:
            self.update_common_service_access_policy()

    def resource_policy_event_callback(self, resource_policy_event, *args,
                                       **kwargs):
        """The ResourcePolicyEvent handler
        """
        log.debug('Resource policy event: %s',
                  str(resource_policy_event.__dict__))

        policy_id = resource_policy_event.origin
        resource_id = resource_policy_event.resource_id
        delete_policy = True if resource_policy_event.sub_type == 'DeletePolicy' else False

        self.update_resource_access_policy(resource_id, delete_policy)

    def reset_policy_cache(self):
        """Empty and reload the container's policy caches.
        Reload by getting policy for each of the container's processes and common policy.
        """
        log.info('Resetting policy cache')

        # First remove all cached polices and operation precondition functions
        self._clear_container_policy_caches()

        # Load the common service access policies since they are shared across services
        self.update_common_service_access_policy()

        # Iterate over the processes running in the container and reload their policies
        proc_list = self.container.proc_manager.list_local_processes()
        for proc in proc_list:
            self.update_process_policies(proc, force_update=False)

        self._log_policy_update("reset_policy_cache")

    def _clear_container_policy_caches(self):
        self.policy_decision_point_manager.clear_policy_cache()
        self.unregister_all_process_policy_preconditions()

    def update_process_policies(self,
                                process_instance,
                                safe_mode=False,
                                force_update=True):
        """
        Load any applicable process policies for a container process.
        To be called by when spawning a new process, or when policy is reset.
        @param process_instance  The ION process for which to load policy
        @param safe_mode  If True, will not attempt to read policy if Policy MS not available
        """
        # NOTE: During restart, we rely on the bootstrap code to remove registration of Policy MS
        if safe_mode and not self._is_policy_management_service_available():
            if not is_testing() and (process_instance.name not in {
                    "resource_registry", "system_management", "directory",
                    "identity_management"
            } and process_instance._proc_name != "event_persister"):
                # We are in the early phases of bootstrapping
                log.warn(
                    "update_process_policies(%s) - No update. Policy MS not available",
                    process_instance._proc_name)

            self._log_policy_update(
                "update_process_policies",
                message="No update. Policy MS not available",
                process=process_instance)
            return

        self._ensure_system_actor()

        if process_instance._proc_type == PROCTYPE_SERVICE:
            self.update_service_access_policy(
                process_instance._proc_listen_name, force_update=force_update)

        elif process_instance._proc_type == PROCTYPE_AGENT:
            # Load any existing policies for this agent with type or name
            if process_instance.resource_type is None:
                self.update_service_access_policy(process_instance.name,
                                                  force_update=force_update)
            else:
                self.update_service_access_policy(
                    process_instance.resource_type, force_update=force_update)

            if process_instance.resource_id:
                # Load any existing policies for this resource
                self.update_resource_access_policy(
                    process_instance.resource_id, force_update=force_update)

        self._log_policy_update("update_process_policies",
                                message="Checked",
                                process=process_instance)

    def update_common_service_access_policy(self, delete_policy=False):
        """Update policy common to all services"""
        if self.policy_decision_point_manager is None:
            return

        try:
            rules = self.policy_client.get_active_service_access_policy_rules(
                service_name='',
                org_name=self._container_org_name,
                headers=self.system_actor_user_header)
            self.policy_decision_point_manager.set_common_service_policy_rules(
                rules)

        except Exception as e:
            # If the resource does not exist, just ignore it - but log a warning.
            log.warn("There was an error applying access policy: %s" %
                     e.message)

    def update_service_access_policy(self,
                                     service_name,
                                     service_op='',
                                     delete_policy=False,
                                     force_update=True):
        """Update policy for a service"""
        if self.policy_decision_point_manager is None:
            return
        if not force_update and not service_op and self.policy_decision_point_manager.has_service_policy(
                service_name):
            log.info("Skipping update of service %s policy - already cached",
                     service_name)
            return

        try:
            if service_op:
                policies = self.policy_client.get_active_service_operation_preconditions(
                    service_name=service_name,
                    op=service_op,
                    org_name=self._container_org_name,
                    headers=self.system_actor_user_header)
            else:
                policies = self.policy_client.get_active_service_access_policy_rules(
                    service_name=service_name,
                    org_name=self._container_org_name,
                    headers=self.system_actor_user_header)

            # First update any access policy rules
            svc_access_policy = [
                p for p in policies
                if p.policy_type in (PolicyTypeEnum.COMMON_SERVICE_ACCESS,
                                     PolicyTypeEnum.SERVICE_ACCESS)
            ]
            self.policy_decision_point_manager.set_service_policy_rules(
                service_name, svc_access_policy)

            # Next update any precondition policies
            svc_preconditions = [
                p for p in policies
                if p.policy_type == PolicyTypeEnum.SERVICE_OP_PRECOND
            ]

            # There can be several local processes for a service
            procs = self.container.proc_manager.get_local_service_processes(
                service_name)
            for proc in procs:
                if svc_preconditions:
                    for op_pre_policy in svc_preconditions:
                        for pre_check in op_pre_policy.preconditions:
                            self.unregister_process_operation_precondition(
                                proc, op_pre_policy.op, pre_check)
                            if not delete_policy:
                                self.register_process_operation_precondition(
                                    proc, op_pre_policy.op, pre_check)
                else:
                    # Unregister all, just in case
                    self.unregister_all_process_operation_precondition(
                        proc, service_op)

        except Exception as ex:
            # If the resource does not exist, just ignore it - but log a warning.
            log.warn("Error applying access policy for service %s: %s" %
                     (service_name, ex.message))

    def update_resource_access_policy(self,
                                      resource_id,
                                      delete_policy=False,
                                      force_update=True):
        """Update policy for a resource (such as a device fronted by an agent process)"""
        if self.policy_decision_point_manager is None:
            return
        if self.policy_decision_point_manager.has_resource_policy(resource_id):
            return

        try:
            policy_list = self.policy_client.get_active_resource_access_policy_rules(
                resource_id, headers=self.system_actor_user_header)
            self.policy_decision_point_manager.set_resource_policy_rules(
                resource_id, policy_list)

        except Exception as e:
            # If the resource does not exist, just ignore it - but log a warning.
            log.warn(
                "There was an error applying access policy for resource %s: %s",
                resource_id, e.message)

    def update_process_access_policy(self,
                                     process_key,
                                     service_op='',
                                     delete_policy=False,
                                     force_update=True):
        pass
        # procs, op_preconditions = [], None
        # try:
        #     # There can be several local processes for a service all with different names
        #     procs = self.container.proc_manager.get_local_service_processes(service_name)
        #     if procs:
        #         op_preconditions = self.policy_client.get_active_service_operation_preconditions(
        #                 service_name=service_name, op=service_op, org_name=self._container_org_name,
        #                 headers=self.system_actor_user_header)
        # except Exception as ex:
        #     # If the resource does not exist, just ignore it - but log a warning.
        #     log.warn("Error applying precondition access policy for service %s: %s" % (service_name, ex.message))
        #
        # for proc in procs:
        #     try:
        #         if op_preconditions:
        #             for op in op_preconditions:
        #                 for pre in op.preconditions:
        #                     self.unregister_process_operation_precondition(proc, op.op, pre)
        #                     if not delete_policy:
        #                         self.register_process_operation_precondition(proc, op.op, pre)
        #         else:
        #             # Unregister all, just in case
        #             self.unregister_all_process_operation_precondition(proc, service_op)
        #     except Exception as ex:
        #         # If the resource does not exist, just ignore it - but log a warning.
        #         log.warn("Error applying precondition access policy for process %s of service %s: %s" % (proc, service_name, ex.message))

    def get_active_policies(self):
        container_policies = dict()
        container_policies[
            'common_service_access'] = self.policy_decision_point_manager.load_common_service_pdp
        container_policies['service_access'] = {
            k: v
            for (k, v) in self.policy_decision_point_manager.
            service_policy_decision_point.iteritems() if v is not None
        }
        container_policies['resource_access'] = {
            k: v
            for (k, v) in self.policy_decision_point_manager.
            resource_policy_decision_point.iteritems() if v is not None
        }
        container_policies['service_operation'] = dict(
            self._service_op_preconditions)

        #log.info(container_policies)
        return container_policies

    def _is_policy_management_service_available(self):
        """
        Method to verify if the Policy Management Service is running in the system. If the container cannot connect to
        the RR then assume it is remote container so do not try to access Policy Management Service
        """
        policy_service = get_service_registry().is_service_available(
            'policy_management', True)
        if policy_service:
            return True
        return False

    def _get_policy_snapshot(self):
        """Debugging helper that snapshot copies the current container's policy state.
        """
        policy_snap = {}
        policy_snap["snap_ts"] = get_ion_ts()

        policies = self.get_active_policies()
        common_list = []
        policy_snap["common_pdp"] = common_list
        for rule in policies.get("common_service_access", {}).policy.rules:
            rule_dict = dict(id=rule.id,
                             description=rule.description,
                             effect=rule.effect.value)
            common_list.append(rule_dict)

        service_dict = {}
        policy_snap["service_pdp"] = service_dict
        for (svc_name, sp) in policies.get("service_access", {}).iteritems():
            for rule in sp.policy.rules:
                if svc_name not in service_dict:
                    service_dict[svc_name] = []
                rule_dict = dict(id=rule.id,
                                 description=rule.description,
                                 effect=rule.effect.value)
                service_dict[svc_name].append(rule_dict)

        service_pre_dict = {}
        policy_snap["service_precondition"] = service_pre_dict
        for (svc_name, sp) in policies.get("service_operation",
                                           {}).iteritems():
            for op, f in sp.iteritems():
                if svc_name not in service_pre_dict:
                    service_pre_dict[svc_name] = []
                service_pre_dict[svc_name].append(op)

        resource_dict = {}
        policy_snap["resource_pdp"] = resource_dict
        for (res_name, sp) in policies.get("resource_access", {}).iteritems():
            for rule in sp.policy.rules:
                if res_name not in service_dict:
                    resource_dict[res_name] = []
                rule_dict = dict(id=rule.id,
                                 description=rule.description,
                                 effect=rule.effect.value)
                resource_dict[res_name].append(rule_dict)

        return policy_snap

    def _log_policy_update(self,
                           update_type=None,
                           message=None,
                           event=None,
                           process=None):
        policy_update_dict = {}
        policy_update_dict["update_ts"] = get_ion_ts()
        policy_update_dict["update_type"] = update_type or ""
        policy_update_dict["message"] = message or ""
        if event:
            policy_update_dict["event._id"] = getattr(event, "_id", "")
            policy_update_dict["event.ts_created"] = getattr(
                event, "ts_created", "")
            policy_update_dict["event.type_"] = getattr(event, "type_", "")
            policy_update_dict["event.sub_type"] = getattr(
                event, "sub_type", "")
        if process:
            policy_update_dict["proc._proc_name"] = getattr(
                process, "_proc_name", "")
            policy_update_dict["proc.name"] = getattr(process, "name", "")
            policy_update_dict["proc._proc_listen_name"] = getattr(
                process, "_proc_listen_name", "")
            policy_update_dict["proc.resource_type"] = getattr(
                process, "resource_type", "")
            policy_update_dict["proc.resource_id"] = getattr(
                process, "resource_id", "")
        any_change = False  # Change can only be detected in number/names of policy not content
        snapshot = self._policy_snapshot
        policy_now = self._get_policy_snapshot()
        # Comparison of snapshot to current policy
        try:

            def compare_policy(pol_cur, pol_snap, key, res):
                pol_cur_set = {
                    d["id"] if isinstance(d, dict) else d
                    for d in pol_cur
                }
                pol_snap_set = {
                    d["id"] if isinstance(d, dict) else d
                    for d in pol_snap
                }
                if pol_cur_set != pol_snap_set:
                    policy_update_dict["snap.%s.%s.added" %
                                       (key, res)] = pol_cur_set - pol_snap_set
                    policy_update_dict["snap.%s.%s.removed" %
                                       (key, res)] = pol_snap_set - pol_cur_set
                    log.debug("Policy changed for %s.%s: %s vs %s" %
                              (key, res, pol_cur_set, pol_snap_set))
                    return True
                return False

            policy_update_dict["snap.snap_ts"] = snapshot["snap_ts"]
            for key in ("common_pdp", "service_pdp", "service_precondition",
                        "resource_pdp"):
                pol_snap = snapshot[key]
                pol_cur = policy_now[key]
                if isinstance(pol_cur, dict):
                    for res in pol_cur.keys():
                        pol_list = pol_cur[res]
                        snap_list = pol_snap.get(res, [])
                        any_change = compare_policy(pol_list, snap_list, key,
                                                    res) or any_change
                elif isinstance(pol_cur, list):
                    any_change = compare_policy(pol_cur, pol_snap, key,
                                                "common") or any_change

            policy_update_dict["snap.policy_changed"] = str(any_change)
        except Exception as ex:
            log.warn("Cannot compare current policy to prior snapshot",
                     exc_info=True)

        self._policy_update_log.append(policy_update_dict)
        self._policy_update_log = self._policy_update_log[-100:]
        self._policy_snapshot = policy_now

        if any_change:
            log.debug("Container policy changed. Cause: %s/%s" %
                      (update_type, message))
        else:
            log.debug("Container policy checked but no change. Cause: %s/%s" %
                      (update_type, message))

    # --- Methods for managing operation specific preconditions

    def get_process_operation_dict(self, process_name, auto_add=True):
        if process_name in self._service_op_preconditions:
            return self._service_op_preconditions[process_name]

        if auto_add:
            self._service_op_preconditions[process_name] = dict()
            return self._service_op_preconditions[process_name]

        return None

    def register_process_operation_precondition(self, process, operation,
                                                precondition):
        """
        This method is used to register process operation precondition functions
        with the governance controller. The endpoint code will call check_process_operation_preconditions()
        below before calling the business logic operation and if any of
        the precondition functions return False, then the request is denied as Unauthorized.

        At some point, this should be refactored to by another interceptor, but at the operation level.
        """
        if not hasattr(process, operation):
            raise NotFound(
                "The operation %s does not exist for the %s process" %
                (operation, process.name))

        if type(precondition
                ) == types.MethodType and precondition.im_self != process:
            raise NotFound("The method %s does not exist for the %s process." %
                           (str(precondition), process.name))

        process_op_conditions = self.get_process_operation_dict(process.name)
        if operation in process_op_conditions:
            process_op_conditions[operation].append(precondition)
        else:
            preconditions = list()
            preconditions.append(precondition)
            process_op_conditions[operation] = preconditions

    def unregister_all_process_operation_precondition(self, process,
                                                      operation):
        """
        This method removes all precondition functions registered with an operation on a process.
        Care should be taken with this call, as it can remove "hard wired" preconditions that are
        directly registered by processes in a container.
        """
        process_op_conditions = self.get_process_operation_dict(process.name,
                                                                auto_add=False)
        if process_op_conditions is not None and operation in process_op_conditions:
            del process_op_conditions[operation]

    def unregister_process_operation_precondition(self, process, operation,
                                                  precondition):
        """
        This method removes a specific precondition function registered with an operation on a process.
        Care should be taken with this call, as it can remove "hard wired" preconditions that are
        directly registered by processes in a container.
        """
        #Just skip this if there operation is not passed in.
        if operation is None:
            return

        if not hasattr(process, operation):
            raise NotFound(
                "The operation %s does not exist for the %s service" %
                (operation, process.name))

        process_op_conditions = self.get_process_operation_dict(process.name,
                                                                auto_add=False)
        if process_op_conditions is not None and operation in process_op_conditions:
            preconditions = process_op_conditions[operation]
            preconditions[:] = [
                pre for pre in preconditions if not pre == precondition
            ]
            if not preconditions:
                del process_op_conditions[operation]

    def unregister_all_process_policy_preconditions(self):
        """
        This method removes all precondition functions registered with an operation on a process.
        It will not remove "hard wired" preconditions that are directly registered by processes in a container.
        """
        for proc in self._service_op_preconditions:
            process_op_conditions = self.get_process_operation_dict(
                proc, auto_add=False)
            if process_op_conditions is not None:
                for op in process_op_conditions:
                    preconditions = process_op_conditions[op]
                    preconditions[:] = [
                        pre for pre in preconditions
                        if type(pre) == types.FunctionType
                    ]

    def check_process_operation_preconditions(self, process, msg, headers):
        """
        This method is called by the ION endpoint to execute any process operation preconditions functions before
        allowing the operation to be called.
        """
        operation = headers.get('op', None)
        if operation is None:
            return

        process_op_conditions = self.get_process_operation_dict(process.name,
                                                                auto_add=False)
        if process_op_conditions is not None and operation in process_op_conditions:
            preconditions = process_op_conditions[operation]
            for precond in reversed(preconditions):
                if type(precond) in (types.MethodType, types.FunctionType):
                    # Handle precondition which are built-in functions
                    try:
                        ret_val, ret_message = precond(msg, headers)
                    except Exception as e:
                        # TODD - Catching all exceptions and logging as errors, don't want to stop processing for this right now
                        log.error(
                            'Executing precondition function: %s for operation: %s - %s so it will be ignored.'
                            % (precond.__name__, operation, e.message))
                        ret_val = True
                        ret_message = ''

                    if not ret_val:
                        raise Unauthorized(ret_message)

                elif isinstance(precond, basestring):
                    try:
                        # See if this is method within the endpoint process, if so call it
                        method = getattr(process, precond, None)
                        if method:
                            ret_val, ret_message = method(msg, headers)
                        else:
                            # It is not a method in the process, so try to execute as a simple python function
                            exec precond
                            pref = locals()["precondition_func"]
                            ret_val, ret_message = pref(process, msg, headers)

                    except Exception as e:
                        # TODD - Catching all exceptions and logging as errors, don't want to stop processing for this right now
                        log.error(
                            'Executing precondition function: %s for operation: %s - %s so it will be ignored.'
                            % (precond, operation, e.message))
                        ret_val = True
                        ret_message = ''

                    if not ret_val:
                        raise Unauthorized(ret_message)
Ejemplo n.º 11
0
class GovernanceController(object):
    """
    This is a singleton object which handles governance functionality in the container.
    """

    def __init__(self,container):
        log.debug('GovernanceController.__init__()')
        self.container = container
        self.enabled = False
        self.interceptor_by_name_dict = dict()
        self.interceptor_order = []
        self.policy_decision_point_manager = None
        self.governance_dispatcher = None

        # Holds a list per service operation of policy methods to check before the op in a process is allowed to be called
        self._service_op_preconditions = dict()

        self._is_container_org_boundary = False
        self._container_org_name = None
        self._container_org_id = None

        # For policy debugging purposes. Keeps a list of most recent policy updates for later readout
        self._policy_update_log = []
        self._policy_snapshot = None

    def start(self):

        log.debug("GovernanceController starting ...")

        self._CFG = CFG

        self.enabled = CFG.get_safe('interceptor.interceptors.governance.config.enabled', False)

        log.info("GovernanceInterceptor enabled: %s" % str(self.enabled))

        self.policy_event_subscriber = None

        #containers default to not Org Boundary and ION Root Org
        self._is_container_org_boundary = CFG.get_safe('container.org_boundary',False)
        self._container_org_name = CFG.get_safe('container.org_name', CFG.get_safe('system.root_org', 'ION'))
        self._container_org_id = None
        self._system_root_org_name = CFG.get_safe('system.root_org', 'ION')

        self._is_root_org_container = (self._container_org_name == self._system_root_org_name)

        self.system_actor_id = None
        self.system_actor_user_header = None

        if self.enabled:

            config = CFG.get_safe('interceptor.interceptors.governance.config')

            self.initialize_from_config(config)

            self.policy_event_subscriber = EventSubscriber(event_type=OT.PolicyEvent, callback=self.policy_event_callback)
            self.policy_event_subscriber.start()

            self.rr_client = ResourceRegistryServiceProcessClient(node=self.container.node, process=self.container)
            self.policy_client = PolicyManagementServiceProcessClient(node=self.container.node, process=self.container)

            self._policy_snapshot = self._get_policy_snapshot()
            self._log_policy_update("start_governance_ctrl", message="Container start")

    def initialize_from_config(self, config):

        self.governance_dispatcher = GovernanceDispatcher()

        self.policy_decision_point_manager = PolicyDecisionPointManager(self)

        if 'interceptor_order' in config:
            self.interceptor_order = config['interceptor_order']

        if 'governance_interceptors' in config:
            gov_ints = config['governance_interceptors']

            for name in gov_ints:
                interceptor_def = gov_ints[name]

                # Instantiate and put in by_name array
                parts = interceptor_def["class"].split('.')
                modpath = ".".join(parts[:-1])
                classname = parts[-1]
                module = __import__(modpath, fromlist=[classname])
                classobj = getattr(module, classname)
                classinst = classobj()

                # Put in by_name_dict for possible re-use
                self.interceptor_by_name_dict[name] = classinst

    def stop(self):
        log.debug("GovernanceController stopping ...")

        if self.policy_event_subscriber is not None:
            self.policy_event_subscriber.stop()


    @property
    def is_container_org_boundary(self):
        return self._is_container_org_boundary

    @property
    def container_org_name(self):
        return self._container_org_name

    @property
    def system_root_org_name(self):
        return self._system_root_org_name

    @property
    def is_root_org_container(self):
        return self._is_root_org_container

    @property
    def CFG(self):
        return self._CFG


    @property
    def rr(self):
        """
        Returns the active resource registry instance or client.

        Used to directly contact the resource registry via the container if available,
        otherwise the messaging client to the RR service is returned.
        """
        if self.container.has_capability('RESOURCE_REGISTRY'):
            return self.container.resource_registry

        return self.rr_client


    def get_container_org_boundary_id(self):
        """
        Returns the permanent org identifier configured for this container
        @return:
        """

        if not self._is_container_org_boundary:
            return None

        if self._container_org_id is None:
            orgs, _ = self.rr.find_resources(restype=RT.Org)
            for org in orgs:
                if org.org_governance_name == self._container_org_name:
                    self._container_org_id = org._id
                    break

        return self._container_org_id

    def process_incoming_message(self,invocation):
        """
        The GovernanceController hook into the incoming message interceptor stack
        @param invocation:
        @return:
        """
        self.process_message(invocation, self.interceptor_order,'incoming' )
        return self.governance_dispatcher.handle_incoming_message(invocation)

    def process_outgoing_message(self,invocation):
        """
        The GovernanceController hook into the outgoing message interceptor stack
        @param invocation:
        @return:
        """
        self.process_message(invocation, reversed(self.interceptor_order),'outgoing')
        return self.governance_dispatcher.handle_outgoing_message(invocation)

    def process_message(self,invocation,interceptor_list, method):
        """
        The GovernanceController hook to iterate over the interceptors to call each one and evaluate the annotations
        to see what actions should be done.
        @TODO - may want to make this more dynamic instead of hard coded for the moment.
        @param invocation:
        @param interceptor_list:
        @param method:
        @return:
        """
        for int_name in interceptor_list:
            class_inst = self.interceptor_by_name_dict[int_name]
            getattr(class_inst, method)(invocation)

            #Stop processing message if an issue with the message was found by an interceptor.
            if ( invocation.message_annotations.has_key(GovernanceDispatcher.CONVERSATION__STATUS_ANNOTATION) and invocation.message_annotations[GovernanceDispatcher.CONVERSATION__STATUS_ANNOTATION] == GovernanceDispatcher.STATUS_REJECT) or\
               ( invocation.message_annotations.has_key(GovernanceDispatcher.POLICY__STATUS_ANNOTATION) and invocation.message_annotations[GovernanceDispatcher.POLICY__STATUS_ANNOTATION] == GovernanceDispatcher.STATUS_REJECT) :
                break

        return invocation



    # Manage all of the policies in the container

    def policy_event_callback(self, policy_event, *args, **kwargs):
        """
        The generic policy event call back for dispatching policy related events
        """
        # Need to check to set here to set after the system actor is created
        if self.system_actor_id is None:
            system_actor = get_system_actor()
            if system_actor is not None:
                self.system_actor_id = system_actor._id
                self.system_actor_user_header = get_system_actor_header()

        log.info("Policy event callback received: %s" % policy_event)

        if policy_event.type_ == OT.ResourcePolicyEvent:
            self.resource_policy_event_callback(policy_event, *args, **kwargs)
        elif policy_event.type_ == OT.RelatedResourcePolicyEvent:
            self.resource_policy_event_callback(policy_event, *args, **kwargs)
        elif policy_event.type_ == OT.ServicePolicyEvent:
            self.service_policy_event_callback(policy_event, *args, **kwargs)

        self._log_policy_update("policy_event_callback",
                                message="Event processed",
                                event=policy_event)

    def resource_policy_event_callback(self, resource_policy_event, *args, **kwargs):
        """
        The ResourcePolicyEvent handler
        """
        log.debug('Resource policy event received: %s', str(resource_policy_event.__dict__))

        policy_id = resource_policy_event.origin
        resource_id = resource_policy_event.resource_id
        delete_policy = True if resource_policy_event.sub_type == 'DeletePolicy' else False

        self.update_resource_access_policy(resource_id, delete_policy)

    def service_policy_event_callback(self, service_policy_event, *args, **kwargs):
        """
        The ServicePolicyEvent handler

        @param args:
        @param kwargs:
        @return:
        """
        log.debug('Service policy event received: %s', str(service_policy_event.__dict__))

        policy_id = service_policy_event.origin
        service_name = service_policy_event.service_name
        service_op = service_policy_event.op
        delete_policy = True if service_policy_event.sub_type == 'DeletePolicy' else False

        if service_name:
            if self.container.proc_manager.is_local_service_process(service_name):
                self.update_service_access_policy(service_name, service_op, delete_policy=delete_policy)
            elif self.container.proc_manager.is_local_agent_process(service_name):
                self.update_service_access_policy(service_name, service_op, delete_policy=delete_policy)

        else:
            self.update_common_service_access_policy()


    def reset_policy_cache(self):
        """
        The function to empty and reload the container's policy caches

        @return:
        """
        log.info('Resetting policy cache')

        #First remove all cached polices and precondition functions that are not hard-wired
        self._reset_container_policy_caches()

        #Then load the common service access policies since they are shared across services
        self.update_common_service_access_policy()

        #Now iterate over the processes running in the container and reload their policies
        proc_list = self.container.proc_manager.list_local_processes()
        for proc in proc_list:
            self.update_container_policies(proc)

        self._log_policy_update("reset_policy_cache")


    def _reset_container_policy_caches(self):
        self.policy_decision_point_manager.clear_policy_cache()
        self.unregister_all_process_policy_preconditions()

    def _get_policy_snapshot(self):
        policy_snap = {}
        policy_snap["snap_ts"] = get_ion_ts()

        policies = self.get_active_policies()
        common_list = []
        policy_snap["common_pdp"] = common_list
        for rule in policies.get("common_service_access", {}).policy.rules:
            rule_dict = dict(id=rule.id, description=rule.description, effect=rule.effect.value)
            common_list.append(rule_dict)

        service_dict = {}
        policy_snap["service_pdp"] = service_dict
        for (svc_name, sp) in policies.get("service_access", {}).iteritems():
            for rule in sp.policy.rules:
                if svc_name not in service_dict:
                    service_dict[svc_name] = []
                rule_dict = dict(id=rule.id, description=rule.description, effect=rule.effect.value)
                service_dict[svc_name].append(rule_dict)

        service_pre_dict = {}
        policy_snap["service_precondition"] = service_pre_dict
        for (svc_name, sp) in policies.get("service_operation", {}).iteritems():
            for op, f in sp.iteritems():
                if svc_name not in service_pre_dict:
                    service_pre_dict[svc_name] = []
                service_pre_dict[svc_name].append(op)

        resource_dict = {}
        policy_snap["resource_pdp"] = resource_dict
        for (res_name, sp) in policies.get("resource_access", {}).iteritems():
            for rule in sp.policy.rules:
                if res_name not in service_dict:
                    resource_dict[res_name] = []
                rule_dict = dict(id=rule.id, description=rule.description, effect=rule.effect.value)
                resource_dict[res_name].append(rule_dict)

        return policy_snap

    def _log_policy_update(self, update_type=None, message=None, event=None, process=None):
        policy_update_dict = {}
        policy_update_dict["update_ts"] = get_ion_ts()
        policy_update_dict["update_type"] = update_type or ""
        policy_update_dict["message"] = message or ""
        if event:
            policy_update_dict["event._id"] = getattr(event, "_id", "")
            policy_update_dict["event.ts_created"] = getattr(event, "ts_created", "")
            policy_update_dict["event.type_"] = getattr(event, "type_", "")
            policy_update_dict["event.sub_type"] = getattr(event, "sub_type", "")
        if process:
            policy_update_dict["proc._proc_name"] = getattr(process, "_proc_name", "")
            policy_update_dict["proc.name"] = getattr(process, "name", "")
            policy_update_dict["proc._proc_listen_name"] = getattr(process, "_proc_listen_name", "")
            policy_update_dict["proc.resource_type"] = getattr(process, "resource_type", "")
            policy_update_dict["proc.resource_id"] = getattr(process, "resource_id", "")
        any_change = False   # Change can only be detected in number/names of policy not content
        snapshot = self._policy_snapshot
        policy_now = self._get_policy_snapshot()
        # Comparison of snapshot to current policy
        try:
            def compare_policy(pol_cur, pol_snap, key, res):
                pol_cur_set = {d["id"] if isinstance(d, dict) else d for d in pol_cur}
                pol_snap_set = {d["id"] if isinstance(d, dict) else d for d in pol_snap}
                if pol_cur_set != pol_snap_set:
                    policy_update_dict["snap.%s.%s.added" % (key, res)] = pol_cur_set - pol_snap_set
                    policy_update_dict["snap.%s.%s.removed" % (key, res)] = pol_snap_set - pol_cur_set
                    log.debug("Policy changed for %s.%s: %s vs %s" % (key, res, pol_cur_set, pol_snap_set))
                    return True
                return False
            policy_update_dict["snap.snap_ts"] = snapshot["snap_ts"]
            for key in ("common_pdp", "service_pdp", "service_precondition", "resource_pdp"):
                pol_snap = snapshot[key]
                pol_cur = policy_now[key]
                if isinstance(pol_cur, dict):
                    for res in pol_cur.keys():
                        pol_list = pol_cur[res]
                        snap_list = pol_snap.get(res, [])
                        any_change = compare_policy(pol_list, snap_list, key, res) or any_change
                elif isinstance(pol_cur, list):
                    any_change = compare_policy(pol_cur, pol_snap, key, "common") or any_change

            policy_update_dict["snap.policy_changed"] = str(any_change)
        except Exception as ex:
            log.warn("Cannot compare current policy to prior snapshot", exc_info=True)

        self._policy_update_log.append(policy_update_dict)
        self._policy_update_log = self._policy_update_log[-100:]
        self._policy_snapshot = policy_now

        log.info("Policy update logged. Type=%s, message=%s, changed=%s" % (update_type, message, any_change))

    def update_container_policies(self, process_instance, safe_mode=False):
        """
        Load any applicable process policies. To be called by the container proc manager after
        registering a new process.
        @param process_instance  The ION process for which to load policy
        @param safe_mode  If True, will not attempt to read policy if Policy MS not available
        """

        # This method can be called before policy management service is available during system startup
        if safe_mode and not self._is_policy_management_service_available():
            if not is_testing() and (process_instance.name not in (
                "resource_registry", "system_management", "directory", "identity_management") and
                process_instance._proc_name != "event_persister"):
                # We are in the early phases of bootstrapping
                log.warn("update_container_policies(%s) - No update. Policy MS not available" % process_instance._proc_name)

            self._log_policy_update("update_container_policies",
                                    message="No update. Policy MS not available",
                                    process=process_instance)
            return

        # Need to check to set here to set after the system actor is created
        if self.system_actor_id is None:
            system_actor = get_system_actor()
            if system_actor is not None:
                self.system_actor_id = system_actor._id
                self.system_actor_user_header = get_system_actor_header()

        if process_instance._proc_type == SERVICE_PROCESS_TYPE:
            # look to load any existing policies for this service

            self.update_service_access_policy(process_instance._proc_listen_name)

        elif process_instance._proc_type == AGENT_PROCESS_TYPE:
            # look to load any existing policies for this agent service
            if process_instance.resource_type is None:
                self.update_service_access_policy(process_instance.name)
            else:
                self.update_service_access_policy(process_instance.resource_type)

            if process_instance.resource_id:
                # look to load any existing policies for this resource
                self.update_resource_access_policy(process_instance.resource_id)

        self._log_policy_update("update_container_policies",
                                message="Updated",
                                process=process_instance)


    def update_resource_access_policy(self, resource_id, delete_policy=False):

        if self.policy_decision_point_manager is not None:

            try:
                policy_rules = self.policy_client.get_active_resource_access_policy_rules(resource_id, headers=self.system_actor_user_header)
                self.policy_decision_point_manager.load_resource_policy_rules(resource_id, policy_rules)

            except Exception, e:
                #If the resource does not exist, just ignore it - but log a warning.
                log.warn("The resource %s is not found or there was an error applying access policy: %s" % ( resource_id, e.message))
Ejemplo n.º 12
0
    def test_pub_on_different_subsubtypes(self):
        res_list = [DotDict(ar=event.AsyncResult(), gq=queue.Queue(), count=0) for i in xrange(4)]

        def cb_gen(num):
            def cb(event, *args, **kwargs):
                res_list[num].count += 1
                res_list[num].gq.put(event)
                if event.description == "end":
                    res_list[num].ar.set()
            return cb

        sub0 = EventSubscriber(event_type="ResourceModifiedEvent", sub_type="st1.*", callback=cb_gen(0))
        sub0.start()

        sub1 = EventSubscriber(event_type="ResourceModifiedEvent", sub_type="st1.a", callback=cb_gen(1))
        sub1.start()

        sub2 = EventSubscriber(event_type="ResourceModifiedEvent", sub_type="*.a", callback=cb_gen(2))
        sub2.start()

        sub3 = EventSubscriber(event_type="ResourceModifiedEvent", sub_type="st1", callback=cb_gen(3))
        sub3.start()

        pub1 = EventPublisher(event_type="ResourceModifiedEvent")

        pub1.publish_event(origin="one", sub_type="st1.a", description="1")
        pub1.publish_event(origin="two", sub_type="st1", description="2")
        pub1.publish_event(origin="three", sub_type="st1.b", description="3")

        pub1.publish_event(origin="four", sub_type="st2.a", description="4")
        pub1.publish_event(origin="five", sub_type="st2", description="5")

        pub1.publish_event(origin="six", sub_type="a", description="6")
        pub1.publish_event(origin="seven", sub_type="", description="7")

        pub1.publish_event(origin="end", sub_type="st1.a", description="end")
        pub1.publish_event(origin="end", sub_type="st1", description="end")

        [res_list[i].ar.get(timeout=5) for i in xrange(3)]

        sub0.stop()
        sub1.stop()
        sub2.stop()
        sub3.stop()

        for i in xrange(4):
            res_list[i].res = []
            for x in xrange(res_list[i].count):
                res_list[i].res.append(res_list[i].gq.get(timeout=5))

        self.assertEquals(len(res_list[0].res), 3)
        self.assertEquals(res_list[0].res[0].description, "1")

        self.assertEquals(len(res_list[1].res), 2)
        self.assertEquals(res_list[1].res[0].description, "1")

        self.assertEquals(len(res_list[2].res), 3)
        self.assertEquals(res_list[2].res[0].description, "1")

        self.assertEquals(len(res_list[3].res), 2)
        self.assertEquals(res_list[3].res[0].description, "2")
Ejemplo n.º 13
0
class TransformPrime(TransformDataProcess):
    binding=['output']
    '''
    Transforms which have an incoming stream and an outgoing stream.

    Parameters:
      process.stream_id      Outgoing stream identifier.
      process.exchange_point Route's exchange point.
      process.routing_key    Route's routing key.
      process.queue_name     Name of the queue to listen on.
      process.routes         streams,actor for each route {(stream_input_id, stream_output_id):actor} 
    Either the stream_id or both the exchange_point and routing_key need to be provided.
    '''    
    def on_start(self):
        TransformDataProcess.on_start(self)
        self.pubsub_management = PubsubManagementServiceProcessClient(process=self)
        self.stored_values = StoredValueManager(self.container)
        self.input_data_product_ids = self.CFG.get_safe('process.input_products', [])
        self.output_data_product_ids = self.CFG.get_safe('process.output_products', [])
        self.lookup_docs = self.CFG.get_safe('process.lookup_docs',[])
        self.new_lookups = Queue()
        self.lookup_monitor = EventSubscriber(event_type=OT.ExternalReferencesUpdatedEvent,callback=self._add_lookups, auto_delete=True)
        self.lookup_monitor.start()

    def on_quit(self):
        self.lookup_monitor.stop()
        TransformDataProcess.on_quit(self)

    def _add_lookups(self, event, *args, **kwargs):
        if event.origin in self.input_data_product_ids + self.output_data_product_ids:
            if isinstance(event.reference_keys, list):
                self.new_lookups.put(event.reference_keys)


    @memoize_lru(100)
    def read_stream_def(self,stream_id):
        return self.pubsub_management.read_stream_definition(stream_id=stream_id)

    
    def recv_packet(self, msg, stream_route, stream_id):
        process_routes = self.CFG.get_safe('process.routes', {})
        for stream_in_id,routes in process_routes.iteritems():
            if stream_id == stream_in_id:
                for stream_out_id, actor in routes.iteritems():
                    if actor is None:
                        rdt_out = self._execute_transform(msg, (stream_in_id, stream_out_id))
                        self.publish(rdt_out.to_granule(), stream_out_id)
                    else:
                        outgoing = self._execute_actor(msg, actor, (stream_in_id, stream_out_id))
                        self.publish(outgoing, stream_out_id)

    def publish(self, msg, stream_out_id):
        publisher = getattr(self, stream_out_id)
        publisher.publish(msg)

    def _load_actor(self, actor):
        '''
        Returns callable execute method if it exists, otherwise it raises a BadRequest
        '''
        try:
            module = __import__(actor['module'], fromlist=[''])
        except ImportError:
            log.exception('Actor could not be loaded')
            raise
        try:
            cls = getattr(module, actor['class'])
        except AttributeError:
            log.exception('Module %s does not have class %s', repr(module), actor['class'])
            raise
        try:
            execute = getattr(cls,'execute')
        except AttributeError:
            log.exception('Actor class does not contain execute method')
            raise
        return execute

   
    def _execute_actor(self, msg, actor, streams):
        stream_in_id,stream_out_id = streams
        stream_def_out = self.read_stream_def(stream_out_id)
        params = self.CFG.get_safe('process.params', {})
        config = self.CFG.get_safe('process')
        #do the stuff with the actor
        params['stream_def'] = stream_def_out._id
        executor = self._load_actor(actor)
        try:
            rdt_out = executor(msg, None, config, params, None)
        except:
            log.exception('Error running actor for %s', self.id)
            raise
        return rdt_out

    def _merge_pdicts(self, pdict1, pdict2):
        incoming_pdict = ParameterDictionary.load(pdict1)
        outgoing_pdict = ParameterDictionary.load(pdict2)
        
        merged_pdict = ParameterDictionary()
        for k,v in incoming_pdict.iteritems():
            ordinal, v = v
            if k not in merged_pdict:
                merged_pdict.add_context(v)
        for k,v in outgoing_pdict.iteritems():
            ordinal, v = v
            if k not in merged_pdict:
                merged_pdict.add_context(v)
        return merged_pdict

    def _merge_rdt(self, stream_def_in, stream_def_out):
        incoming_pdict_dump = stream_def_in.parameter_dictionary
        outgoing_pdict_dump = stream_def_out.parameter_dictionary

        merged_pdict = self._merge_pdicts(incoming_pdict_dump, outgoing_pdict_dump)
        rdt_temp = RecordDictionaryTool(param_dictionary=merged_pdict)
        return rdt_temp


    def _get_lookup_value(self, lookup_value):
        if not self.new_lookups.empty():
            new_values = self.new_lookups.get()
            self.lookup_docs = new_values + self.lookup_docs

        lookup_value_document_keys = self.lookup_docs
        for key in lookup_value_document_keys:
            try:
                document = self.stored_values.read_value(key)
                if lookup_value in document:
                    return document[lookup_value]
            except NotFound:
                log.warning('Specified lookup document does not exist')

        return None

    def _execute_transform(self, msg, streams):
        stream_in_id,stream_out_id = streams
        stream_def_in = self.read_stream_def(stream_in_id)
        stream_def_out = self.read_stream_def(stream_out_id)

        rdt_temp = self._merge_rdt(stream_def_in, stream_def_out)
        
        rdt_in = RecordDictionaryTool.load_from_granule(msg)
        for field in rdt_temp.fields:
            if not isinstance(rdt_temp._pdict.get_context(field).param_type, ParameterFunctionType):
                try:
                    rdt_temp[field] = rdt_in[field]
                except KeyError:
                    pass

        rdt_temp.fetch_lookup_values()

        for lookup_field in rdt_temp.lookup_values():
            s = lookup_field
            stored_value = self._get_lookup_value(rdt_temp.context(s).lookup_value)
            if stored_value is not None:
                rdt_temp[s] = stored_value
        
        for field in rdt_temp.fields:
            if isinstance(rdt_temp._pdict.get_context(field).param_type, ParameterFunctionType):
                rdt_temp[field] = rdt_temp[field]

        
        rdt_out = RecordDictionaryTool(stream_definition_id=stream_def_out._id)

        for field in rdt_out.fields:
            rdt_out[field] = rdt_temp[field]
        
        return rdt_out 
Ejemplo n.º 14
0
    def test_pub_on_different_subsubtypes(self):
        res_list = [
            DotDict(ar=event.AsyncResult(), gq=queue.Queue(), count=0)
            for i in xrange(4)
        ]

        def cb_gen(num):
            def cb(event, *args, **kwargs):
                res_list[num].count += 1
                res_list[num].gq.put(event)
                if event.description == "end":
                    res_list[num].ar.set()

            return cb

        sub0 = EventSubscriber(event_type="ResourceModifiedEvent",
                               sub_type="st1.*",
                               callback=cb_gen(0))
        sub0.start()

        sub1 = EventSubscriber(event_type="ResourceModifiedEvent",
                               sub_type="st1.a",
                               callback=cb_gen(1))
        sub1.start()

        sub2 = EventSubscriber(event_type="ResourceModifiedEvent",
                               sub_type="*.a",
                               callback=cb_gen(2))
        sub2.start()

        sub3 = EventSubscriber(event_type="ResourceModifiedEvent",
                               sub_type="st1",
                               callback=cb_gen(3))
        sub3.start()

        pub1 = EventPublisher(event_type="ResourceModifiedEvent")

        pub1.publish_event(origin="one", sub_type="st1.a", description="1")
        pub1.publish_event(origin="two", sub_type="st1", description="2")
        pub1.publish_event(origin="three", sub_type="st1.b", description="3")

        pub1.publish_event(origin="four", sub_type="st2.a", description="4")
        pub1.publish_event(origin="five", sub_type="st2", description="5")

        pub1.publish_event(origin="six", sub_type="a", description="6")
        pub1.publish_event(origin="seven", sub_type="", description="7")

        pub1.publish_event(origin="end", sub_type="st1.a", description="end")
        pub1.publish_event(origin="end", sub_type="st1", description="end")

        [res_list[i].ar.get(timeout=5) for i in xrange(3)]

        sub0.stop()
        sub1.stop()
        sub2.stop()
        sub3.stop()

        for i in xrange(4):
            res_list[i].res = []
            for x in xrange(res_list[i].count):
                res_list[i].res.append(res_list[i].gq.get(timeout=5))

        self.assertEquals(len(res_list[0].res), 3)
        self.assertEquals(res_list[0].res[0].description, "1")

        self.assertEquals(len(res_list[1].res), 2)
        self.assertEquals(res_list[1].res[0].description, "1")

        self.assertEquals(len(res_list[2].res), 3)
        self.assertEquals(res_list[2].res[0].description, "1")

        self.assertEquals(len(res_list[3].res), 2)
        self.assertEquals(res_list[3].res[0].description, "2")
Ejemplo n.º 15
0
class ContainerManager(object):
    def __init__(self, container, handlers=DEFAULT_HANDLERS):
        self.container = container
        self.running = False
        # make sure start() completes before an event is handled,
        # and any event is either handled before stop() begins,
        # or the handler begins after stop() completes and the event is dropped
        self.lock = Lock()
        self.handlers = handlers[:]

    def start(self):
        # Install the container tracer (could be its own
        self.container_tracer = ContainerTracer()
        self.container_tracer.start_tracing()
        self.container.tracer = CallTracer
        self.container.tracer.configure(CFG.get_safe("container.tracer", {}))

        ## create queue listener and publisher
        self.sender = EventPublisher(event_type="ContainerManagementResult")
        self.receiver = EventSubscriber(event_type="ContainerManagementRequest", callback=self._receive_event)
        with self.lock:
            self.running = True
            self.receiver.start()
        log.info('ready for container management requests')

    def stop(self):
        log.debug('container management stopping')
        with self.lock:
            self.receiver.stop()
            self.sender.close()
            self.running = False
        log.debug('container management stopped')

        self.container_tracer.stop_tracing()

    def add_handler(self, handler):
        self.handlers.append(handler)

    def _get_handlers(self, action):
        out = []
        for handler in self.handlers:
            if handler.can_handle_request(action):
                out.append(handler)
        return out

    def _receive_event(self, event, headers):
        with self.lock:
            if not isinstance(event, ContainerManagementRequest):
                log.trace('ignoring wrong type event: %r', event)
                return
            if not self.running:
                log.warn('ignoring admin message received after shutdown: %s', event.action)
                return
            predicate = ContainerSelector.from_object(event.predicate)
            if predicate.should_handle(self.container):
                log.trace('handling admin message: %s', event.action)
                self._perform_action(event.action)
            else:
                log.trace('ignoring admin action: %s', event.action)
                if SEND_RESULT_IF_NOT_SELECTED:
                    self.sender.publish_event(origin=self.container.id, action=event.action, outcome='not selected')
                    log.debug('received action: %s, outcome: not selected', event.action)

    def _perform_action(self, action):
        handlers = self._get_handlers(action)
        if not handlers:
            log.info('action accepted but no handlers found: %s', action)
            result = 'unhandled'
            self.sender.publish_event(origin=self.container.id, action=action, outcome=str(result))
            log.debug('received action: %s, outcome: %s', action, result)
        else:
            for handler in handlers:
                try:
                    result = handler.handle_request(action) or "completed"
                except Exception,e:
                    log.error("handler %r failed to perform action: %s", handler, action, exc_info=True)
                    result = e
                self.sender.publish_event(origin=self.container.id, action=action, outcome=str(result))
                log.debug('performed action: %s, outcome: %s', action, result)
Ejemplo n.º 16
0
class EOIRegistrationProcess(SimpleProcess):

    def on_start(self):
        self.data_source_subscriber = EventSubscriber(event_type=OT.ResourceModifiedEvent,
                                                      origin_type=RT.DataSource,
                                                      callback=self._register_data_source)
        self.provider_subscriber = EventSubscriber(event_type=OT.ResourceModifiedEvent,
                                                      origin_type=RT.ExternalDataProvider,
                                                      callback=self._register_provider)
        self.data_source_subscriber.start()
        self.provider_subscriber.start()

        self.rr = self.container.resource_registry

        self.using_eoi_services = CFG.get_safe('eoi.meta.use_eoi_services', False)
        self.server = CFG.get_safe('eoi.importer_service.server', "localhost")+":"+str(CFG.get_safe('eoi.importer_service.port', 8844))

        log.info("Using geoservices="+str(self.using_eoi_services))
        if not self.using_eoi_services:
            log.warn("not using geoservices...") 

        self.importer_service_available = self.check_for_importer_service()
        if not self.importer_service_available:
            log.warn("not using importer service...")  

    def check_for_importer_service(self):
        '''
        only gets run on start, used to identify if importer service is available
        '''        
        try:
            r = requests.get(self.server+'/service=alive&name=ooi&id=ooi')
            log.info("importer service available, status code: %s", str(r.status_code))
            #alive service returned ok
            if r.status_code == 200:
                return True
            else:
                return False
        except Exception as e:
            #SERVICE IS REALLY NOT AVAILABLE
            log.warn("importer service is really not available...%s", e)
            return False    


    def _register_data_source(self, event, *args, **kwargs):        
        '''
        used to create a harvester
        '''
        if self.importer_service_available:
            obj = self.rr.read(event.origin)        
            data_fields = []
            for attrname, value in vars(obj).iteritems():           
                #generate th param list to pass to importer service using field names
                if attrname is not "contact":
                    f = attrname.replace("_", "")+"="+str(obj[attrname])
                    data_fields.append(f)

            param_list = '&'.join(data_fields)

            request_string = self.server+'/service='+CREATE_HARVESTER+"&"+param_list            
            r = requests.get(request_string)


    def _register_provider(self, event, *args, **kwargs):
        if self.importer_service_available:
            #print "provider id:", event.origin
            pass
            

    def on_quit(self):
        self.data_source_subscriber.stop()
        self.provider_subscriber.stop()
Ejemplo n.º 17
0
class QCProcessor(SimpleProcess):
    def __init__(self):
        self.event = Event() # Synchronizes the thread
        self.timeout = 10

    def on_start(self):
        '''
        Process initialization
        '''
        self._thread = self._process.thread_manager.spawn(self.thread_loop)
        self._event_subscriber = EventSubscriber(event_type=OT.ResetQCEvent, callback=self.receive_event, auto_delete=True) # TODO Correct event types
        self._event_subscriber.start()
        self.timeout = self.CFG.get_safe('endpoint.receive.timeout', 10)
        self.resource_registry = self.container.resource_registry
        self.event_queue = Queue()

    def on_quit(self):
        '''
        Stop and cleanup the thread
        '''
        self._event_subscriber.stop()
        self.suspend()

    def receive_event(self, event, *args, **kwargs):
        log.error("Adding event to the event queue")
        self.event_queue.put(event)

    def thread_loop(self):
        '''
        Asynchronous event-loop
        '''
        threading.current_thread().name = '%s-qc-processor' % self.id
        while not self.event.wait(1):
            try:
                self.qc_processing_loop()
            except:
                log.error("Error in QC Processing Loop", exc_info=True)
            try:
                self.event_processing_loop()
            except:
                log.error("Error in QC Event Loop", exc_info=True)

    def qc_processing_loop(self):
        '''
        Iterates through available data products and evaluates QC
        '''
        data_products, _ = self.container.resource_registry.find_resources(restype=RT.DataProduct, id_only=False)
        for data_product in data_products:
            # Get the reference designator
            try:
                rd = self.get_reference_designator(data_product._id)
            except BadRequest:
                continue
            parameters = self.get_parameters(data_product)
            # Create a mapping of inputs to QC
            qc_mapping = {}

            # Creates a dictionary { data_product_name : parameter_name }
            for p in parameters:
                if p.ooi_short_name:
                    sname = p.ooi_short_name
                    g = re.match(r'([a-zA-Z-_]+)(_L[0-9])', sname)
                    if g:
                        sname = g.groups()[0]
                    qc_mapping[sname] = p.name

            for p in parameters:
                # for each parameter, if the name ends in _qc run the qc
                if p.name.endswith('_qc'):
                    self.run_qc(data_product,rd, p, qc_mapping)

            # Break early if we can
            if self.event.is_set(): 
                break

    def event_processing_loop(self):
        '''
        Processes the events in the event queue
        '''
        log.error("Processing event queue")
        self.event_queue.put(StopIteration)
        for event in self.event_queue:
            log.error("My event's reference designator: %s", event.origin)

    def suspend(self):
        '''
        Stops the event loop
        '''
        self.event.set()
        self._thread.join(self.timeout)
        log.info("QC Thread Suspended")


    def get_reference_designator(self, data_product_id=''):
        '''
        Returns the reference designator for a data product if it has one
        '''
        # First try to get the parent data product
        data_product_ids, _ = self.resource_registry.find_objects(subject=data_product_id, predicate=PRED.hasDataProductParent, id_only=True)
        if data_product_ids:
            return self.get_reference_designator(data_product_ids[0])

        device_ids, _ = self.resource_registry.find_subjects(object=data_product_id, predicate=PRED.hasOutputProduct, subject_type=RT.InstrumentDevice, id_only=True)
        if not device_ids: 
            raise BadRequest("No instrument device associated with this data product")
        device_id = device_ids[0]

        sites, _ = self.resource_registry.find_subjects(object=device_id, predicate=PRED.hasDevice, subject_type=RT.InstrumentSite, id_only=False)
        if not sites:
            raise BadRequest("No site is associated with this data product")
        site = sites[0]
        rd = site.reference_designator
        return rd

    def run_qc(self, data_product, reference_designator, parameter, qc_mapping):
        '''
        Determines which algorithm the parameter should run, then evaluates the QC
        '''

        # We key off of the OOI Short Name
        # DATAPRD_ALGRTHM_QC
        dp_ident, alg, qc = parameter.ooi_short_name.split('_')
        if dp_ident not in qc_mapping:
            return # No input!
        input_name = qc_mapping[dp_ident]

        try:
            doc = self.container.object_store.read_doc(reference_designator)
        except NotFound:
            return # NO QC lookups found
        if dp_ident not in doc:
            log.critical("Data product %s not in doc", dp_ident)
            return # No data product of this listing in the RD's entry
        # Lookup table has the rows for the QC inputs
        lookup_table = doc[dp_ident]

        # An instance of the coverage is loaded if we need to run an algorithm
        dataset_id = self.get_dataset(data_product)
        coverage = self.get_coverage(dataset_id)
        if not coverage.num_timesteps: # No data = no qc
            coverage.close()
            return

        try:
            # Get the lookup table info then run
            if alg.lower() == 'glblrng':
                row = self.recent_row(lookup_table['global_range'])
                min_value = row['min_value']
                max_value = row['max_value']
                self.process_glblrng(coverage, parameter, input_name, min_value, max_value)

            elif alg.lower() == 'stuckvl':
                row = self.recent_row(lookup_table['stuck_value'])
                resolution = row['resolution']
                N = row['consecutive_values']
                self.process_stuck_value(coverage, parameter,input_name, resolution, N)

            elif alg.lower() == 'trndtst':
                row = self.recent_row(lookup_table['trend_test'])
                ord_n = row['polynomial_order']
                nstd = row['standard_deviation']
                self.process_trend_test(coverage, parameter, input_name, ord_n, nstd)

            elif alg.lower() == 'spketst':
                row = self.recent_row(lookup_table['spike_test'])
                acc = row['accuracy']
                N = row['range_multiplier']
                L = row['window_length']
                self.process_spike_test(coverage, parameter, input_name, acc, N, L)

            elif alg.lower() == "gradtst":
                row = self.recent_row(lookup_table["gradient_test"])
                ddatdx = row["ddatdx"]
                mindx = row["mindx"]
                startdat = row["startdat"]
                if isinstance(startdat, basestring) and not startdat:
                    startdat = np.nan
                if isinstance(mindx, basestring) and not mindx:
                    mindx = np.nan
                toldat = row["toldat"]
                self.process_gradient_test(coverage, parameter, input_name, ddatdx, mindx, startdat, toldat)

            elif alg.lower() == 'loclrng':
                pass

        except KeyError: # No lookup table
            self.set_error(coverage, parameter)


        finally:
            coverage.close()

    def set_error(self, coverage, parameter):
        log.error("setting coverage parameter %s to -99", parameter.name)

    def process_glblrng(self, coverage, parameter, input_name, min_value, max_value):
        '''
        Evaluates the QC for global range for all data values that equal -88 (not yet evaluated)
        '''
        log.error("input name: %s", input_name)
        log.info("Num timesteps: %s", coverage.num_timesteps)

        # Get all of the QC values, and find where -88 is set (uninitialized)
        qc_array = coverage.get_parameter_values(parameter.name)
        indexes = np.where( qc_array == -88 )[0]

        # Now build a variable, but I need to keep track of the time where the data goes
        time_array = coverage.get_parameter_values(coverage.temporal_parameter_name)[indexes]
        value_array = coverage.get_parameter_values(input_name)[indexes]

        from ion_functions.qc.qc_functions import dataqc_globalrangetest
        qc = dataqc_globalrangetest(value_array, [min_value, max_value])
        return_dictionary = {
                coverage.temporal_parameter_name : time_array,
                parameter.name : qc
        }


    def process_stuck_value(self, coverage, parameter, input_name, resolution, N):
        '''
        Evaluates the QC for stuck value for all data values that equal -88 (not yet evaluated)
        '''
        # Get al of the QC values and find out where -88 is set
        qc_array = coverage.get_parameter_values(parameter.name)
        indexes = np.where(qc_array == -88)[0]

        # Horribly inefficient...
        from ion_functions.qc.qc_functions import dataqc_stuckvaluetest_wrapper
        value_array = coverage.get_parameter_values(input_name)
        qc_array = dataqc_stuckvaluetest_wrapper(value_array, resolution, N)
        qc_array = qc_array[indexes]
        time_array = coverage.get_parameter_values(coverage.temporal_parameter_name)[indexes]

        return_dictionary = {
                coverage.temporal_parameter_name : time_array,
                parameter.name : qc_array
        }


    def process_trend_test(self, coverage, parameter, input_name, ord_n, nstd):
        '''
        Evaluates the QC for trend test for all data values that equal -88 (not yet evaluated)
        '''
        # Get al of the QC values and find out where -88 is set
        qc_array = coverage.get_parameter_values(parameter.name)
        indexes = np.where(qc_array == -88)[0]

        from ion_functions.qc.qc_functions import dataqc_polytrendtest_wrapper
        time_array = coverage.get_parameter_values(coverage.temporal_parameter_name)
        value_array = coverage.get_parameter_values(input_name)

        qc_array = dataqc_polytrendtest_wrapper(value_array, time_array, ord_n, nstd)
        qc_array = qc_array[indexes]
        return_dictionary = {
                coverage.temporal_parameter_name : time_array,
                parameter.name : qc_array
        }

    def process_spike_test(self, coverage, parameter, input_name, acc, N, L):
        '''
        Evaluates the QC for spike test for all data values that equal -88 (not yet evaluated)
        '''
        # Get al of the QC values and find out where -88 is set
        qc_array = coverage.get_parameter_values(parameter.name)
        indexes = np.where(qc_array == -88)[0]

        from ion_functions.qc.qc_functions import dataqc_spiketest_wrapper
        value_array = coverage.get_parameter_values(input_name)
        qc_array = dataqc_spiketest_wrapper(value_array, acc, N, L)
        qc_array = qc_array[indexes]
        time_array = coverage.get_parameter_values(coverage.temporal_parameter_name)[indexes]
        return_dictionary = {
                coverage.temporal_parameter_name : time_array,
                parameter.name : qc_array
        }

    def process_gradient_test(self, coverage, parameter, input_name, ddatdx, mindx, startdat, toldat):
        qc_array = coverage.get_parameter_values(parameter.name)
        indexes = np.where(qc_array == -88)[0]

        from ion_functions.qc.qc_functions import dataqc_gradienttest_wrapper
        value_array = coverage.get_parameter_values(input_name)
        time_array = coverage.get_parameter_values(coverage.temporal_parameter_name)
        
        qc_array = dataqc_gradienttest_wrapper(value_array, time_array, ddatdx, mindx, startdat, toldat)

        return_dictionary = {
                coverage.temporal_parameter_name : time_array[indexes],
                parameter.name : qc_array[indexes]
        }


    def process_local_range_test(self, coverage, parameter, input_name, datlim, datlimz):
        qc_array = coverage.get_parameter_values(parameter.name)
        indexes = np.where(qc_array == -88)[0]

        from ion_functions.qc.qc_functions import dataqc_localrangetest
        value_array = coverage.get_parameter_values(input_name)
        # z_parameter_name needs to come from, I guess the column headings... 
        # I also need to deal with the case where there are multiple axes... 
        # I don't have a good feeling about this.
        z_parameter_name = None
        z_array = coverage.get_parameter_values(z_parameter_name)

        qc_array = dataqc_localrangetest(value_array, z_array, datlim, datlimz)
        return_dictionary = {
                coverage.temporal_parameter_name : time_array[indexes],
                parameter.name : qc_array[indexes]
        }





    def get_dataset(self, data_product):
        dataset_ids, _ = self.resource_registry.find_objects(data_product, PRED.hasDataset, id_only=True)
        if not dataset_ids:
            raise BadRequest("No Dataset")
        dataset_id = dataset_ids[0]
        return dataset_id

    def get_coverage(self, dataset_id):
        cov = DatasetManagementService._get_coverage(dataset_id, mode='r+')
        return cov

    def recent_row(self, rows):
        '''
        Determines the most recent data based on the timestamp
        '''
        most_recent = None
        ts = 0
        for row in rows:
            if row['ts_created'] > ts:
                most_recent = row
                ts = row['ts_created']
        return most_recent


    def get_parameters(self, data_product):
        '''
        Returns the relevant parameter contexts of the data product
        '''

        # DataProduct -> StreamDefinition
        stream_defs, _ = self.resource_registry.find_objects(data_product._id, PRED.hasStreamDefinition, id_only=False)
        stream_def = stream_defs[0]

        # StreamDefinition -> ParameterDictionary
        pdict_ids, _ = self.resource_registry.find_objects(stream_def._id, PRED.hasParameterDictionary, id_only=True)
        pdict_id = pdict_ids[0]

        # ParameterDictionary -> ParameterContext
        pctxts, _ = self.resource_registry.find_objects(pdict_id, PRED.hasParameterContext, id_only=False)
        relevant = [ctx for ctx in pctxts if not stream_def.available_fields or (stream_def.available_fields and ctx.name in stream_def.available_fields)]
        return relevant
Ejemplo n.º 18
0
class QCProcessor(SimpleProcess):
    def __init__(self):
        self.event = Event()  # Synchronizes the thread
        self.timeout = 10

    def on_start(self):
        '''
        Process initialization
        '''
        self._thread = self._process.thread_manager.spawn(self.thread_loop)
        self._event_subscriber = EventSubscriber(
            event_type=OT.ResetQCEvent,
            callback=self.receive_event,
            auto_delete=True)  # TODO Correct event types
        self._event_subscriber.start()
        self.timeout = self.CFG.get_safe('endpoint.receive.timeout', 10)
        self.resource_registry = self.container.resource_registry
        self.event_queue = Queue()

    def on_quit(self):
        '''
        Stop and cleanup the thread
        '''
        self._event_subscriber.stop()
        self.suspend()

    def receive_event(self, event, *args, **kwargs):
        log.error("Adding event to the event queue")
        self.event_queue.put(event)

    def thread_loop(self):
        '''
        Asynchronous event-loop
        '''
        threading.current_thread().name = '%s-qc-processor' % self.id
        while not self.event.wait(1):
            try:
                self.qc_processing_loop()
            except:
                log.error("Error in QC Processing Loop", exc_info=True)
            try:
                self.event_processing_loop()
            except:
                log.error("Error in QC Event Loop", exc_info=True)

    def qc_processing_loop(self):
        '''
        Iterates through available data products and evaluates QC
        '''
        data_products, _ = self.container.resource_registry.find_resources(
            restype=RT.DataProduct, id_only=False)
        for data_product in data_products:
            # Get the reference designator
            try:
                rd = self.get_reference_designator(data_product._id)
            except BadRequest:
                continue
            parameters = self.get_parameters(data_product)
            # Create a mapping of inputs to QC
            qc_mapping = {}

            # Creates a dictionary { data_product_name : parameter_name }
            for p in parameters:
                if p.ooi_short_name:
                    sname = p.ooi_short_name
                    g = re.match(r'([a-zA-Z-_]+)(_L[0-9])', sname)
                    if g:
                        sname = g.groups()[0]
                    qc_mapping[sname] = p.name

            for p in parameters:
                # for each parameter, if the name ends in _qc run the qc
                if p.name.endswith('_qc'):
                    self.run_qc(data_product, rd, p, qc_mapping, parameters)

            # Break early if we can
            if self.event.is_set():
                break

    def event_processing_loop(self):
        '''
        Processes the events in the event queue
        '''
        log.error("Processing event queue")
        self.event_queue.put(StopIteration)
        for event in self.event_queue:
            log.error("My event's reference designator: %s", event.origin)

    def suspend(self):
        '''
        Stops the event loop
        '''
        self.event.set()
        self._thread.join(self.timeout)
        log.info("QC Thread Suspended")

    def get_reference_designator(self, data_product_id=''):
        '''
        Returns the reference designator for a data product if it has one
        '''
        # First try to get the parent data product
        data_product_ids, _ = self.resource_registry.find_objects(
            subject=data_product_id,
            predicate=PRED.hasDataProductParent,
            id_only=True)
        if data_product_ids:
            return self.get_reference_designator(data_product_ids[0])

        device_ids, _ = self.resource_registry.find_subjects(
            object=data_product_id,
            predicate=PRED.hasOutputProduct,
            subject_type=RT.InstrumentDevice,
            id_only=True)
        if not device_ids:
            raise BadRequest(
                "No instrument device associated with this data product")
        device_id = device_ids[0]

        sites, _ = self.resource_registry.find_subjects(
            object=device_id,
            predicate=PRED.hasDevice,
            subject_type=RT.InstrumentSite,
            id_only=False)
        if not sites:
            raise BadRequest("No site is associated with this data product")
        site = sites[0]
        rd = site.reference_designator
        return rd

    def calibrated_candidates(self, data_product, parameter, qc_mapping,
                              parameters):
        '''
        Returns a list of potential candidate parameter names to use as the input parameter
        '''

        # 1st Priority is *b_interp
        # 2nd Priority is *b_pd
        # 3rd Priority is input_name
        parameters = {p.name: p for p in parameters}

        dp_ident, alg, qc = parameter.ooi_short_name.split('_')
        input_name = qc_mapping[dp_ident]  # input_name is the third priority

        sname = parameters[
            input_name].ooi_short_name  # should be something like tempwat_l1

        interp = sname.lower() + 'b_interp'
        pd = sname.lower() + 'b_pd'

        print "1st priority:", interp  # 1st priority
        print "2nd priority:", pd  # 2nd priority
        print "3rd priority:", input_name  # 3rd priority

        if interp in parameters:
            return interp
        elif pd in parameters:
            return pd
        else:
            return input_name

    def run_qc(self, data_product, reference_designator, parameter, qc_mapping,
               parameters):
        '''
        Determines which algorithm the parameter should run, then evaluates the QC

        data_product         - Data Product Resource
        reference_designator - reference designator string
        parameter            - parameter context resource
        qc_mapping           - a dictionary of { data_product_name : parameter_name }
        '''

        # We key off of the OOI Short Name
        # DATAPRD_ALGRTHM_QC
        dp_ident, alg, qc = parameter.ooi_short_name.split('_')
        if dp_ident not in qc_mapping:
            return  # No input!
        input_name = self.calibrated_candidates(data_product, parameter,
                                                qc_mapping, parameters)

        try:
            doc = self.container.object_store.read_doc(reference_designator)
        except NotFound:
            return  # NO QC lookups found
        if dp_ident not in doc:
            log.critical("Data product %s not in doc", dp_ident)
            return  # No data product of this listing in the RD's entry
        # Lookup table has the rows for the QC inputs
        lookup_table = doc[dp_ident]

        # An instance of the coverage is loaded if we need to run an algorithm
        dataset_id = self.get_dataset(data_product)
        coverage = self.get_coverage(dataset_id)
        if not coverage.num_timesteps:  # No data = no qc
            coverage.close()
            return

        try:
            # Get the lookup table info then run
            if alg.lower() == 'glblrng':
                row = self.recent_row(lookup_table['global_range'])
                min_value = row['min_value']
                max_value = row['max_value']
                self.process_glblrng(coverage, parameter, input_name,
                                     min_value, max_value)

            elif alg.lower() == 'stuckvl':
                row = self.recent_row(lookup_table['stuck_value'])
                resolution = row['resolution']
                N = row['consecutive_values']
                self.process_stuck_value(coverage, parameter, input_name,
                                         resolution, N)

            elif alg.lower() == 'trndtst':
                row = self.recent_row(lookup_table['trend_test'])
                ord_n = row['polynomial_order']
                nstd = row['standard_deviation']
                self.process_trend_test(coverage, parameter, input_name, ord_n,
                                        nstd)

            elif alg.lower() == 'spketst':
                row = self.recent_row(lookup_table['spike_test'])
                acc = row['accuracy']
                N = row['range_multiplier']
                L = row['window_length']
                self.process_spike_test(coverage, parameter, input_name, acc,
                                        N, L)

            elif alg.lower() == "gradtst":
                row = self.recent_row(lookup_table["gradient_test"])
                ddatdx = row["ddatdx"]
                mindx = row["mindx"]
                startdat = row["startdat"]
                if isinstance(startdat, basestring) and not startdat:
                    startdat = np.nan
                if isinstance(mindx, basestring) and not mindx:
                    mindx = np.nan
                toldat = row["toldat"]
                self.process_gradient_test(coverage, parameter, input_name,
                                           ddatdx, mindx, startdat, toldat)

            elif alg.lower() == 'loclrng':
                row = self.recent_row(lookup_table["local_range"])
                table = row['table']
                dims = []
                datlimz = []
                for key in table.iterkeys():
                    # Skip the datlims
                    if 'datlim' in key:
                        continue
                    dims.append(key)
                    datlimz.append(table[key])

                datlimz = np.column_stack(datlimz)
                datlim = np.column_stack([table['datlim1'], table['datlim2']])
                self.process_local_range_test(coverage, parameter, input_name,
                                              datlim, datlimz, dims)

        except KeyError:  # No lookup table
            self.set_error(coverage, parameter)

        finally:
            coverage.close()

    def set_error(self, coverage, parameter):
        log.error("setting coverage parameter %s to -99", parameter.name)

    def get_parameter_values(self, coverage, name):
        array = coverage.get_parameter_values(
            [name], fill_empty_params=True).get_data()[name]
        return array

    def process_glblrng(self, coverage, parameter, input_name, min_value,
                        max_value):
        '''
        Evaluates the QC for global range for all data values that equal -88 (not yet evaluated)
        '''
        log.error("input name: %s", input_name)
        log.info("Num timesteps: %s", coverage.num_timesteps)

        # Get all of the QC values, and find where -88 is set (uninitialized)
        qc_array = self.get_parameter_values(coverage, parameter.name)
        indexes = np.where(qc_array == -88)[0]

        # Now build a variable, but I need to keep track of the time where the data goes
        time_array = self.get_parameter_values(
            coverage, coverage.temporal_parameter_name)[indexes]
        value_array = self.get_parameter_values(coverage, input_name)[indexes]

        from ion_functions.qc.qc_functions import dataqc_globalrangetest
        qc = dataqc_globalrangetest(value_array, [min_value, max_value])
        return_dictionary = {
            coverage.temporal_parameter_name: time_array,
            parameter.name: qc
        }

    def process_stuck_value(self, coverage, parameter, input_name, resolution,
                            N):
        '''
        Evaluates the QC for stuck value for all data values that equal -88 (not yet evaluated)
        '''
        # Get al of the QC values and find out where -88 is set
        qc_array = self.get_parameter_values(coverage, parameter.name)
        indexes = np.where(qc_array == -88)[0]

        # Horribly inefficient...
        from ion_functions.qc.qc_functions import dataqc_stuckvaluetest_wrapper
        value_array = self.get_parameter_values(coverage, input_name)[indexes]
        qc_array = dataqc_stuckvaluetest_wrapper(value_array, resolution, N)
        qc_array = qc_array[indexes]
        time_array = self.get_parameter_values(
            coverage, coverage.temporal_parameter_name)[indexes]

        return_dictionary = {
            coverage.temporal_parameter_name: time_array,
            parameter.name: qc_array
        }

    def process_trend_test(self, coverage, parameter, input_name, ord_n, nstd):
        '''
        Evaluates the QC for trend test for all data values that equal -88 (not yet evaluated)
        '''
        # Get al of the QC values and find out where -88 is set
        qc_array = self.get_parameter_values(coverage, parameter.name)
        indexes = np.where(qc_array == -88)[0]

        from ion_functions.qc.qc_functions import dataqc_polytrendtest_wrapper
        time_array = self.get_parameter_values(
            coverage, coverage.temporal_parameter_name)[indexes]
        value_array = self.get_parameter_values(coverage, input_name)[indexes]

        qc_array = dataqc_polytrendtest_wrapper(value_array, time_array, ord_n,
                                                nstd)
        qc_array = qc_array[indexes]
        return_dictionary = {
            coverage.temporal_parameter_name: time_array,
            parameter.name: qc_array
        }

    def process_spike_test(self, coverage, parameter, input_name, acc, N, L):
        '''
        Evaluates the QC for spike test for all data values that equal -88 (not yet evaluated)
        '''
        # Get al of the QC values and find out where -88 is set
        qc_array = self.get_parameter_values(coverage, parameter.name)
        indexes = np.where(qc_array == -88)[0]

        from ion_functions.qc.qc_functions import dataqc_spiketest_wrapper
        value_array = self.get_parameter_values(coverage, input_name)[indexes]
        qc_array = dataqc_spiketest_wrapper(value_array, acc, N, L)
        qc_array = qc_array[indexes]
        time_array = self.get_parameter_values(
            coverage, coverage.temporal_parameter_name)[indexes]
        return_dictionary = {
            coverage.temporal_parameter_name: time_array,
            parameter.name: qc_array
        }

    def process_gradient_test(self, coverage, parameter, input_name, ddatdx,
                              mindx, startdat, toldat):
        qc_array = self.get_parameter_values(coverage, parameter.name)
        indexes = np.where(qc_array == -88)[0]

        from ion_functions.qc.qc_functions import dataqc_gradienttest_wrapper
        value_array = self.get_parameter_values(coverage, input_name)[indexes]
        time_array = self.get_parameter_values(
            coverage, coverage.temporal_parameter_name)[indexes]

        qc_array = dataqc_gradienttest_wrapper(value_array, time_array, ddatdx,
                                               mindx, startdat, toldat)

        return_dictionary = {
            coverage.temporal_parameter_name: time_array[indexes],
            parameter.name: qc_array[indexes]
        }

    def process_local_range_test(self, coverage, parameter, input_name, datlim,
                                 datlimz, dims):
        return  # Not ready
        qc_array = self.get_parameter_values(coverage, parameter.name)
        indexes = np.where(qc_array == -88)[0]

        from ion_functions.qc.qc_functions import dataqc_localrangetest_wrapper
        # dat
        value_array = self.get_parameter_values(coverage, input_name)[indexes]
        time_array = self.get_parameter_values(
            coverage, coverage.temporal_parameter_name)[indexes]

        # datlim is an argument and comes from the lookup table
        # datlimz is an argument and comes from the lookup table
        # dims is an argument and is created using the column headings
        # pval_callback, well as for that...
        # TODO: slice_ is the window of the site data product, but for
        # now we'll just use a global slice
        slice_ = slice(None)

        def parameter_callback(param_name):
            return coverage.get_parameter_values(param_name, slice_)

        qc_array = dataqc_localrangetest_wrapper(value_array, datlim, datlimz,
                                                 dims, parameter_callback)
        return_dictionary = {
            coverage.temporal_parameter_name: time_array[indexes],
            parameter.name: qc_array[indexes]
        }
        log.error("Here's what it would look like\n%s", return_dictionary)

    def get_dataset(self, data_product):
        dataset_ids, _ = self.resource_registry.find_objects(data_product,
                                                             PRED.hasDataset,
                                                             id_only=True)
        if not dataset_ids:
            raise BadRequest("No Dataset")
        dataset_id = dataset_ids[0]
        return dataset_id

    def get_coverage(self, dataset_id):
        cov = DatasetManagementService._get_coverage(dataset_id, mode='r+')
        return cov

    def recent_row(self, rows):
        '''
        Determines the most recent data based on the timestamp
        '''
        most_recent = None
        ts = 0
        for row in rows:
            if row['ts_created'] > ts:
                most_recent = row
                ts = row['ts_created']
        return most_recent

    def get_parameters(self, data_product):
        '''
        Returns the relevant parameter contexts of the data product
        '''

        # DataProduct -> StreamDefinition
        stream_defs, _ = self.resource_registry.find_objects(
            data_product._id, PRED.hasStreamDefinition, id_only=False)
        stream_def = stream_defs[0]

        # StreamDefinition -> ParameterDictionary
        pdict_ids, _ = self.resource_registry.find_objects(
            stream_def._id, PRED.hasParameterDictionary, id_only=True)
        pdict_id = pdict_ids[0]

        # ParameterDictionary -> ParameterContext
        pctxts, _ = self.resource_registry.find_objects(
            pdict_id, PRED.hasParameterContext, id_only=False)
        relevant = [
            ctx for ctx in pctxts if not stream_def.available_fields or (
                stream_def.available_fields
                and ctx.name in stream_def.available_fields)
        ]
        return relevant