Example #1
0
    def on_start(self): #pragma no cover
        #--------------------------------------------------------------------------------
        # Explicit on_start
        #--------------------------------------------------------------------------------

        # Skip TransformStreamListener and go to StreamProcess to avoid the subscriber being created
        # We want explicit management of the thread and subscriber object for ingestion

        TransformStreamProcess.on_start(self)
        
        self.queue_name = self.CFG.get_safe('process.queue_name',self.id)
        self.subscriber = StreamSubscriber(process=self, exchange_name=self.queue_name, callback=self.receive_callback)
        self.thread_lock = RLock()
        
        #--------------------------------------------------------------------------------
        # Normal on_start after this point
        #--------------------------------------------------------------------------------

        BaseIngestionWorker.on_start(self)
        self._rpc_server = self.container.proc_manager._create_listening_endpoint(from_name=self.id, process=self)
        self.add_endpoint(self._rpc_server)

        self.event_publisher = EventPublisher(OT.DatasetModified)
        self.stored_value_manager = StoredValueManager(self.container)

        self.lookup_docs = self.CFG.get_safe('process.lookup_docs',[])
        self.input_product = self.CFG.get_safe('process.input_product','')
        self.new_lookups = Queue()
        self.lookup_monitor = EventSubscriber(event_type=OT.ExternalReferencesUpdatedEvent, callback=self._add_lookups, auto_delete=True)
        self.add_endpoint(self.lookup_monitor)
        self.connection_id = ''
        self.connection_index = None
        
        self.start_listener()
    def process_execution(self, temp_vector, qc_params, bad_times):
        interval_key = uuid4().hex
        data_product_id = self.make_large_dataset(temp_vector)
        async_queue = Queue()

        def cb(event, *args, **kwargs):
            if '_'.join(event.qc_parameter.split('_')[1:]) not in qc_params:
                # I don't care about
                return
            times = event.temporal_values
            self.assertEquals(len(times), bad_times)
            async_queue.put(1)

        es = EventSubscriber(event_type=OT.ParameterQCEvent,
                             origin=data_product_id,
                             callback=cb,
                             auto_delete=True)
        es.start()
        self.addCleanup(es.stop)
        config = DotDict()
        config.process.interval_key = interval_key
        config.process.qc_params = qc_params
        self.sync_launch(config)

        # So now the process is started, time to throw an event at it
        ep = EventPublisher(event_type='TimerEvent')
        ep.publish_event(origin=interval_key)

        try:
            async_queue.get(timeout=120)
        except Empty:
            raise AssertionError('QC was not flagged in time')
Example #3
0
    def start(self):
        self.container.stats_mgr = ContainerStatsManager(self.container)
        self.container.stats_mgr.start()

        ## create queue listener and publisher
        self.sender = EventPublisher(event_type="ContainerManagementResult")
        self.receiver = EventSubscriber(event_type="ContainerManagementRequest", callback=self._receive_event)
        with self.lock:
            self.running = True
            self.receiver.start()
        log.debug('Container ready for container management requests')
Example #4
0
    def start(self):
        if self.events_enabled:
            # init change event publisher
            self.event_pub = EventPublisher()

            # Register to receive directory changes
            # self.event_sub = EventSubscriber(event_type="ContainerConfigModifiedEvent",
            #                                  origin="Directory",
            #                                  callback=self.receive_directory_change_event)

        # Create directory root entry (for current org) if not existing
        self.register("/", "DIR", sys_name=bootstrap.get_sys_name(), create_only=True)
    def process(self, dataset_id, start_time=0, end_time=0):
        if not dataset_id:
            raise BadRequest('No dataset id specified.')
        now = time.time()
        start_time = start_time or (now - (3600 * (self.run_interval + 1))
                                    )  # Every N hours with 1 of overlap
        end_time = end_time or now

        qc_params = [i for i in self.qc_params if i in self.qc_suffixes
                     ] or self.qc_suffixes

        self.qc_publisher = EventPublisher(event_type=OT.ParameterQCEvent)
        log.debug('Iterating over the data blocks')

        for st, et in self.chop(int(start_time), int(end_time)):
            log.debug('Chopping %s:%s', st, et)
            log.debug(
                "Retrieving data: data_retriever.retrieve('%s', query={'start_time':%s, 'end_time':%s')",
                dataset_id, st, et)
            try:
                granule = self.data_retriever.retrieve(dataset_id,
                                                       query={
                                                           'start_time': st,
                                                           'end_time': et
                                                       })
            except BadRequest:
                data_products, _ = self.container.resource_registry.find_subjects(
                    object=dataset_id,
                    predicate=PRED.hasDataset,
                    subject_type=RT.DataProduct)
                for data_product in data_products:
                    log.exception('Failed to perform QC Post Processing on %s',
                                  data_product.name)
                    log.error('Calculated Start Time: %s', st)
                    log.error('Calculated End Time:   %s', et)
                raise
            log.debug('Retrieved Data')
            rdt = RecordDictionaryTool.load_from_granule(granule)
            qc_fields = [
                i for i in rdt.fields
                if any([i.endswith(j) for j in qc_params])
            ]
            log.debug('QC Fields: %s', qc_fields)
            for field in qc_fields:
                val = rdt[field]
                if val is None:
                    continue
                if not np.all(val):
                    log.debug('Found QC Alerts')
                    indexes = np.where(val == 0)
                    timestamps = rdt[rdt.temporal_parameter][indexes[0]]
                    self.flag_qc_parameter(dataset_id, field,
                                           timestamps.tolist(), {})
Example #6
0
    def test_base_subscriber_as_catchall(self):
        ar = event.AsyncResult()
        gq = queue.Queue()
        self.count = 0

        def cb(*args, **kwargs):
            self.count += 1
            gq.put(args[0])
            if self.count == 2:
                ar.set()

        sub = EventSubscriber(callback=cb)
        pub1 = EventPublisher(event_type="ResourceEvent")
        pub2 = EventPublisher(event_type="ContainerLifecycleEvent")

        self._listen(sub)

        pub1.publish_event(origin="some", description="1")
        pub2.publish_event(origin="other", description="2")

        ar.get(timeout=5)

        res = []
        for x in xrange(self.count):
            res.append(gq.get(timeout=5))

        self.assertEquals(len(res), 2)
        self.assertEquals(res[0].description, "1")
        self.assertEquals(res[1].description, "2")
Example #7
0
    def process(self, dataset_id, start_time=0, end_time=0):
        if not dataset_id:
            raise BadRequest('No dataset id specified.')
        now = time.time()
        start_time = start_time or (now - (3600 * (self.run_interval + 1))
                                    )  # Every N hours with 1 of overlap
        end_time = end_time or now

        qc_params = [i for i in self.qc_params if i in self.qc_suffixes
                     ] or self.qc_suffixes

        self.qc_publisher = EventPublisher(event_type=OT.ParameterQCEvent)
        log.debug('Iterating over the data blocks')

        for st, et in self.chop(int(start_time), int(end_time)):
            log.debug('Chopping %s:%s', st, et)
            log.debug(
                "Retrieving data: data_retriever.retrieve('%s', query={'start_time':%s, 'end_time':%s')",
                dataset_id, st, et)
            granule = self.data_retriever.retrieve(dataset_id,
                                                   query={
                                                       'start_time': st,
                                                       'end_time': et
                                                   })
            log.debug('Retrieved Data')
            rdt = RecordDictionaryTool.load_from_granule(granule)
            qc_fields = [
                i for i in rdt.fields
                if any([i.endswith(j) for j in qc_params])
            ]
            log.debug('QC Fields: %s', qc_fields)
            for field in qc_fields:
                val = rdt[field]
                if val is None:
                    continue
                if not np.all(val):
                    log.debug('Found QC Alerts')
                    indexes = np.where(val == 0)
                    timestamps = rdt[rdt.temporal_parameter][indexes[0]]
                    self.flag_qc_parameter(dataset_id, field,
                                           timestamps.tolist(), {})
Example #8
0
    def test_pub_on_different_origins(self):
        ar = event.AsyncResult()
        gq = queue.Queue()
        self.count = 0

        def cb(*args, **kwargs):
            self.count += 1
            gq.put(args[0])
            if self.count == 3:
                ar.set()

        sub = EventSubscriber(event_type="ResourceEvent", callback=cb)
        pub = EventPublisher(event_type="ResourceEvent")

        self._listen(sub)

        pub.publish_event(origin="one", description="1")
        pub.publish_event(origin="two", description="2")
        pub.publish_event(origin="three", description="3")

        ar.get(timeout=5)

        res = []
        for x in xrange(self.count):
            res.append(gq.get(timeout=5))

        self.assertEquals(len(res), 3)
        self.assertEquals(res[0].description, "1")
        self.assertEquals(res[1].description, "2")
        self.assertEquals(res[2].description, "3")
Example #9
0
def fail(x):
    '''
    The goal behind this function is to publish an event so that threads
    can synchronize with it to verify that it was run, regardless of context
    '''
    event_publisher = EventPublisher(OT.GranuleIngestionErrorEvent)
    try:
        event_publisher.publish_event(error_msg='failure')

        raise StandardError('Something you tried to do failed')
    finally:
        event_publisher.close()
Example #10
0
    def test_subscriber_listening_for_specific_origin(self):
        ar = event.AsyncResult()
        self.count = 0

        def cb(*args, **kwargs):
            self.count += 1
            ar.set(args[0])

        sub = EventSubscriber(event_type="ResourceEvent",
                              origin="specific",
                              callback=cb)
        pub = EventPublisher(event_type="ResourceEvent")

        self._listen(sub)

        pub.publish_event(origin="notspecific", description="1")
        pub.publish_event(origin="notspecific", description="2")
        pub.publish_event(origin="specific", description="3")
        pub.publish_event(origin="notspecific", description="4")

        evmsg = ar.get(timeout=5)
        self.assertEquals(self.count, 1)
        self.assertEquals(evmsg.description, "3")
Example #11
0
 def start(self):
     self.container.event_pub = EventPublisher()
Example #12
0
class QCPostProcessing(SimpleProcess):
    '''
    QC Post Processing Process

    This process provides the capability to ION clients and operators to evaluate the automated quality control flags on
    various data products. This process should be run periodically with overlapping spans of data to ensure complete
    dataset QC verification.

    This parameters that this process accepts as configurations are:
        - dataset_id: The dataset identifier, required.
        - start_time: Unix timestamp, defaults to 24 hours in the past
        - end_time: Unix timestamp, defaults to current time
        - qc_params: a list of qc functions to evaluate, currently supported functions are: ['glblrng_qc',
          'spketst_qc', 'stuckvl_qc'], defaults to all

    '''

    qc_suffixes = ['glblrng_qc', 'spketst_qc', 'stuckvl_qc']

    def on_start(self):
        SimpleProcess.on_start(self)
        self.data_retriever = DataRetrieverServiceProcessClient(process=self)
        self.interval_key = self.CFG.get_safe('process.interval_key', None)
        self.qc_params = self.CFG.get_safe('process.qc_params', [])
        validate_is_not_none(
            self.interval_key,
            'An interval key is necessary to paunch this process')
        self.event_subscriber = EventSubscriber(event_type=OT.TimerEvent,
                                                origin=self.interval_key,
                                                callback=self._event_callback,
                                                auto_delete=True)
        self.add_endpoint(self.event_subscriber)
        self.resource_registry = self.container.resource_registry
        self.run_interval = self.CFG.get_safe(
            'service.qc_processing.run_interval', 24)

    def _event_callback(self, *args, **kwargs):
        log.info('QC Post Processing Triggered')
        dataset_ids, _ = self.resource_registry.find_resources(
            restype=RT.Dataset, id_only=True)
        for dataset_id in dataset_ids:
            log.info('QC Post Processing for dataset %s', dataset_id)
            try:
                self.process(dataset_id)
            except BadRequest as e:
                if 'Problems reading from the coverage' in e.message:
                    log.error('Failed to read from dataset')

    def process(self, dataset_id, start_time=0, end_time=0):
        if not dataset_id:
            raise BadRequest('No dataset id specified.')
        now = time.time()
        start_time = start_time or (now - (3600 * (self.run_interval + 1))
                                    )  # Every N hours with 1 of overlap
        end_time = end_time or now

        qc_params = [i for i in self.qc_params if i in self.qc_suffixes
                     ] or self.qc_suffixes

        self.qc_publisher = EventPublisher(event_type=OT.ParameterQCEvent)
        log.debug('Iterating over the data blocks')

        for st, et in self.chop(int(start_time), int(end_time)):
            log.debug('Chopping %s:%s', st, et)
            log.debug(
                "Retrieving data: data_retriever.retrieve('%s', query={'start_time':%s, 'end_time':%s')",
                dataset_id, st, et)
            granule = self.data_retriever.retrieve(dataset_id,
                                                   query={
                                                       'start_time': st,
                                                       'end_time': et
                                                   })
            log.debug('Retrieved Data')
            rdt = RecordDictionaryTool.load_from_granule(granule)
            qc_fields = [
                i for i in rdt.fields
                if any([i.endswith(j) for j in qc_params])
            ]
            log.debug('QC Fields: %s', qc_fields)
            for field in qc_fields:
                val = rdt[field]
                if val is None:
                    continue
                if not np.all(val):
                    log.debug('Found QC Alerts')
                    indexes = np.where(val == 0)
                    timestamps = rdt[rdt.temporal_parameter][indexes[0]]
                    self.flag_qc_parameter(dataset_id, field,
                                           timestamps.tolist(), {})

    def flag_qc_parameter(self, dataset_id, parameter, temporal_values,
                          configuration):
        log.info('Flagging QC for %s', parameter)
        data_product_ids, _ = self.resource_registry.find_subjects(
            object=dataset_id,
            subject_type=RT.DataProduct,
            predicate=PRED.hasDataset,
            id_only=True)
        for data_product_id in data_product_ids:
            self.qc_publisher.publish_event(origin=data_product_id,
                                            qc_parameter=parameter,
                                            temporal_values=temporal_values,
                                            configuration=configuration)

    @classmethod
    def chop(cls, start_time, end_time):
        while start_time < end_time:
            yield (start_time, min(start_time + 3600, end_time))
            start_time = min(start_time + 3600, end_time)
        return
Example #13
0
class ContainerManager(object):
    def __init__(self, container, handlers=DEFAULT_HANDLERS):
        self.container = container
        self.running = False
        # make sure start() completes before an event is handled,
        # and any event is either handled before stop() begins,
        # or the handler begins after stop() completes and the event is dropped
        self.lock = Lock()
        self.handlers = handlers[:]

    def start(self):
        self.container.stats_mgr = ContainerStatsManager(self.container)
        self.container.stats_mgr.start()

        ## create queue listener and publisher
        self.sender = EventPublisher(event_type="ContainerManagementResult")
        self.receiver = EventSubscriber(event_type="ContainerManagementRequest", callback=self._receive_event)
        with self.lock:
            self.running = True
            self.receiver.start()
        log.debug('Container ready for container management requests')

    def stop(self):
        log.debug('container management stopping')
        with self.lock:
            self.receiver.stop()
            self.sender.close()
            self.running = False
        log.debug('container management stopped')

        self.container.stats_mgr.stop()

    def add_handler(self, handler):
        self.handlers.append(handler)

    def _get_handlers(self, action):
        out = []
        for handler in self.handlers:
            if handler.can_handle_request(action):
                out.append(handler)
        return out

    def _receive_event(self, event, headers):
        with self.lock:
            if not isinstance(event, ContainerManagementRequest):
                log.trace('ignoring wrong type event: %r', event)
                return
            if not self.running:
                log.warn('ignoring admin message received after shutdown: %s', event.action)
                return
            predicate = ContainerSelector.from_object(event.predicate)
            if predicate.should_handle(self.container):
                log.trace('handling admin message: %s', event.action)
                self._perform_action(event.action)
            else:
                log.trace('ignoring admin action: %s', event.action)
                if SEND_RESULT_IF_NOT_SELECTED:
                    self.sender.publish_event(origin=self.container.id, action=event.action, outcome='not selected')
                    log.debug('received action: %s, outcome: not selected', event.action)

    def _perform_action(self, action):
        handlers = self._get_handlers(action)
        if not handlers:
            log.info('action accepted but no handlers found: %s', action)
            result = 'unhandled'
            self.sender.publish_event(origin=self.container.id, action=action, outcome=str(result))
            log.debug('received action: %s, outcome: %s', action, result)
        else:
            for handler in handlers:
                try:
                    result = handler.handle_request(action) or "completed"
                except Exception,e:
                    log.error("handler %r failed to perform action: %s", handler, action, exc_info=True)
                    result = e
                self.sender.publish_event(origin=self.container.id, action=action, outcome=str(result))
                log.debug('performed action: %s, outcome: %s', action, result)
Example #14
0
class ScienceGranuleIngestionWorker(TransformStreamListener,
                                    BaseIngestionWorker):
    CACHE_LIMIT = CFG.get_safe('container.ingestion_cache', 5)

    def __init__(self, *args, **kwargs):
        TransformStreamListener.__init__(self, *args, **kwargs)
        BaseIngestionWorker.__init__(self, *args, **kwargs)

        #--------------------------------------------------------------------------------
        # Ingestion Cache
        # - Datasets
        # - Coverage instances
        #--------------------------------------------------------------------------------
        self._datasets = collections.OrderedDict()
        self._coverages = collections.OrderedDict()

        self._bad_coverages = {}

        self.time_stats = Accumulator(format='%3f')
        # unique ID to identify this worker in log msgs
        self._id = uuid.uuid1()

    def on_start(self):  #pragma no cover
        #--------------------------------------------------------------------------------
        # Explicit on_start
        #--------------------------------------------------------------------------------

        # Skip TransformStreamListener and go to StreamProcess to avoid the subscriber being created
        # We want explicit management of the thread and subscriber object for ingestion

        TransformStreamProcess.on_start(self)

        self.queue_name = self.CFG.get_safe('process.queue_name', self.id)
        self.subscriber = StreamSubscriber(process=self,
                                           exchange_name=self.queue_name,
                                           callback=self.receive_callback)
        self.thread_lock = RLock()

        #--------------------------------------------------------------------------------
        # Normal on_start after this point
        #--------------------------------------------------------------------------------

        BaseIngestionWorker.on_start(self)
        self._rpc_server = self.container.proc_manager._create_listening_endpoint(
            from_name=self.id, process=self)
        self.add_endpoint(self._rpc_server)

        self.event_publisher = EventPublisher(OT.DatasetModified)
        self.stored_value_manager = StoredValueManager(self.container)

        self.lookup_docs = self.CFG.get_safe('process.lookup_docs', [])
        self.input_product = self.CFG.get_safe('process.input_product', '')
        self.new_lookups = Queue()
        self.lookup_monitor = EventSubscriber(
            event_type=OT.ExternalReferencesUpdatedEvent,
            callback=self._add_lookups,
            auto_delete=True)
        self.add_endpoint(self.lookup_monitor)
        self.connection_id = ''
        self.connection_index = None

        self.start_listener()

    def on_quit(self):  #pragma no cover
        self.event_publisher.close()
        if self.subscriber_thread:
            self.stop_listener()
        for stream, coverage in self._coverages.iteritems():
            try:
                coverage.close(timeout=5)
            except:
                log.exception('Problems closing the coverage')
        self._coverages.clear()
        TransformStreamListener.on_quit(self)
        BaseIngestionWorker.on_quit(self)

    def start_listener(self):
        # We use a lock here to prevent possible race conditions from starting multiple listeners and coverage clobbering
        with self.thread_lock:
            self.subscriber_thread = self._process.thread_manager.spawn(
                self.subscriber.listen, thread_name='%s-subscriber' % self.id)

    def stop_listener(self):
        # Avoid race conditions with coverage operations (Don't start a listener at the same time as closing one)
        with self.thread_lock:
            self.subscriber.close()
            self.subscriber_thread.join(timeout=10)
            for stream, coverage in self._coverages.iteritems():
                try:
                    coverage.close(timeout=5)
                except:
                    log.exception('Problems closing the coverage')
            self._coverages.clear()
            self.subscriber_thread = None

    def pause(self):
        if self.subscriber_thread is not None:
            self.stop_listener()

    def resume(self):
        if self.subscriber_thread is None:
            self.start_listener()

    def _add_lookups(self, event, *args, **kwargs):
        if event.origin == self.input_product:
            if isinstance(event.reference_keys, list):
                self.new_lookups.put(event.reference_keys)

    def _new_dataset(self, stream_id):
        '''
        Adds a new dataset to the internal cache of the ingestion worker
        '''
        rr_client = self.container.resource_registry
        datasets, _ = rr_client.find_subjects(subject_type=RT.Dataset,
                                              predicate=PRED.hasStream,
                                              object=stream_id,
                                              id_only=True)
        if datasets:
            return datasets[0]
        return None

    def _get_data_products(self, dataset_id):
        rr_client = self.container.resource_registry
        data_products, _ = rr_client.find_subjects(object=dataset_id,
                                                   predicate=PRED.hasDataset,
                                                   subject_type=RT.DataProduct,
                                                   id_only=False)
        return data_products

    #--------------------------------------------------------------------------------
    # Metadata Handlers
    #--------------------------------------------------------------------------------

    def initialize_metadata(self, dataset_id, rdt):
        '''
        Initializes a metadata document in the object store. The document
        contains information about the bounds and extents of the dataset as
        well other metadata to improve performance.

        '''

        object_store = self.container.object_store
        key = dataset_id
        bounds = {}
        extents = {}
        last_values = {}
        rough_size = 0
        for k, v in rdt.iteritems():
            v = v[:].flatten()
            if v.dtype.char not in ('S', 'O', 'U', 'V'):
                bounds[k] = (np.min(v), np.max(v))
                last_values[k] = v[-1]
            extents[k] = len(rdt)
            rough_size += len(rdt) * 4

        doc = {
            'bounds': bounds,
            'extents': extents,
            'last_values': last_values,
            'size': rough_size
        }
        doc = numpy_walk(doc)
        object_store.create_doc(doc, object_id=key)
        return

    def update_metadata(self, dataset_id, rdt):
        '''
        Updates the metada document with the latest information available
        '''

        self.update_data_product_metadata(dataset_id, rdt)

        # Grab the document
        object_store = self.container.object_store
        key = dataset_id
        try:
            doc = object_store.read_doc(key)
        except NotFound:
            return self.initialize_metadata(dataset_id, rdt)
        # These are the fields we're interested in
        bounds = doc['bounds']
        extents = doc['extents']
        last_values = doc['last_values']
        rough_size = doc['size']
        for k, v in rdt.iteritems():
            if k not in bounds:
                continue

            v = v[:].flatten()  # Get the numpy representation (dense array).
            if v.dtype.char not in ('S', 'O', 'U', 'V'):
                l_min = np.min(v)
                l_max = np.max(v)
                o_min, o_max = bounds[k]
                bounds[k] = (min(l_min, o_min), max(l_max, o_max))
                last_values[k] = v[-1]
            # Update the bounds
            # Increase the extents
            extents[k] = extents[k] + len(rdt)
            # How about the last value?

            rough_size += len(rdt) * 4
            doc['size'] = rough_size
        # Sanitize it
        doc = numpy_walk(doc)
        object_store.update_doc(doc)

    def update_data_product_metadata(self, dataset_id, rdt):
        data_products = self._get_data_products(dataset_id)
        for data_product in data_products:
            self.update_time(data_product, rdt[rdt.temporal_parameter][:])
            self.update_geo(data_product, rdt)
            try:
                self.container.resource_registry.update(data_product)
            except:  # TODO: figure out WHICH Exception gets raised here when the bounds are off
                log.error("Problem updating the data product metadata",
                          exc_info=True)
                # Carry on :(

    def update_time(self, data_product, t):
        '''
        Sets the nominal_datetime for a data product correctly
        Accounts for things like NTP and out of order data
        '''

        t0, t1 = self.get_datetime_bounds(data_product)
        #TODO: Account for non NTP-based timestamps
        min_t = np.min(t) - 2208988800
        max_t = np.max(t) - 2208988800
        if t0:
            t0 = min(t0, min_t)
        else:
            t0 = min_t

        if t1:
            t1 = max(t1, max_t)
        else:
            t1 = max_t

        if t0 > t1:
            log.error("This should never happen but t0 > t1")

        data_product.nominal_datetime.start_datetime = float(t0)
        data_product.nominal_datetime.end_datetime = float(t1)

    def get_datetime(self, nominal_datetime):
        '''
        Returns a floating point value for the datetime or None if it's an
        empty string
        '''
        t = None
        # So normally this is a string
        if isinstance(nominal_datetime, (float, int)):
            t = nominal_datetime  # simple enough
        elif isinstance(nominal_datetime, basestring):
            if nominal_datetime:  # not an empty string
                # Try to convert it to a float
                try:
                    t = float(nominal_datetime)
                except ValueError:
                    pass
        return t

    def get_datetime_bounds(self, data_product):
        '''Returns the min and max for the bounds in the nominal_datetime
        attr
        '''

        t0 = self.get_datetime(data_product.nominal_datetime.start_datetime)
        t1 = self.get_datetime(data_product.nominal_datetime.end_datetime)
        return (t0, t1)

    def update_geo(self, data_product, rdt):
        '''
        Finds the maximum bounding box
        '''
        lat = None
        lon = None
        for p in rdt:
            if rdt._rd[p] is None:
                continue
            # TODO: Not an all encompassing list of acceptable names for lat and lon
            if p.lower() in ('lat', 'latitude', 'y_axis'):
                lat = np.asscalar(rdt[p][-1])
            elif p.lower() in ('lon', 'longitude', 'x_axis'):
                lon = np.asscalar(rdt[p][-1])
            if lat and lon:
                break

        if lat and lon:
            data_product.geospatial_bounds.geospatial_latitude_limit_north = lat
            data_product.geospatial_bounds.geospatial_latitude_limit_south = lat
            data_product.geospatial_bounds.geospatial_longitude_limit_east = lon
            data_product.geospatial_bounds.geospatial_longitude_limit_west = lon

    #--------------------------------------------------------------------------------
    # Cache managemnt
    #--------------------------------------------------------------------------------

    def get_dataset(self, stream_id):
        '''
        Memoization (LRU) of _new_dataset
        '''
        try:
            result = self._datasets.pop(stream_id)
        except KeyError:
            result = self._new_dataset(stream_id)
            if result is None:
                return None
            if len(self._datasets) >= self.CACHE_LIMIT:
                self._datasets.popitem(0)
        self._datasets[stream_id] = result
        return result

    def get_coverage(self, stream_id):
        '''
        Memoization (LRU) of _get_coverage
        '''
        try:
            result = self._coverages.pop(stream_id)
        except KeyError:
            dataset_id = self.get_dataset(stream_id)
            if dataset_id is None:
                return None
            result = DatasetManagementService._get_simplex_coverage(dataset_id,
                                                                    mode='a')
            if result is None:
                return None
            if len(self._coverages) >= self.CACHE_LIMIT:
                k, coverage = self._coverages.popitem(0)
                coverage.close(timeout=5)
        self._coverages[stream_id] = result
        return result

    #--------------------------------------------------------------------------------
    # Granule Parsing and Handling
    #--------------------------------------------------------------------------------

    @handle_stream_exception()
    def recv_packet(self, msg, stream_route, stream_id):
        '''
        The consumer callback to parse and manage the granule.
        The message is ACK'd once the function returns
        '''
        log.trace('received granule for stream %s', stream_id)

        if msg == {}:
            log.error('Received empty message from stream: %s', stream_id)
            return
        # Message validation
        if not isinstance(msg, Granule):
            log.error('Ingestion received a message that is not a granule: %s',
                      msg)
            return

        rdt = RecordDictionaryTool.load_from_granule(msg)
        if rdt is None:
            log.error('Invalid granule (no RDT) for stream %s', stream_id)
            return
        if not len(rdt):
            log.debug('Empty granule for stream %s', stream_id)
            return

        self.persist_or_timeout(stream_id, rdt)

    def persist_or_timeout(self, stream_id, rdt):
        '''
        A loop that tries to parse and store a granule for up to five minutes,
        and waits an increasing amount of time each iteration.
        '''
        done = False
        timeout = 2
        start = time.time()
        while not done:
            if self.parse_granule(stream_id, rdt, start, done):
                return  # We're all done, everything worked

            if (time.time() -
                    start) > MAX_RETRY_TIME:  # After a while, give up
                dataset_id = self.get_dataset(stream_id)
                log.error(
                    "We're giving up, the coverage needs to be inspected %s",
                    DatasetManagementService._get_coverage_path(dataset_id))
                raise

            if stream_id in self._coverages:
                log.info('Popping coverage for stream %s', stream_id)
                self._coverages.pop(stream_id)

            gevent.sleep(timeout)

            timeout = min(60 * 5, timeout * 2)

    def parse_granule(self, stream_id, rdt, start, done):
        try:
            self.add_granule(stream_id, rdt)
            return True
        except Exception as e:
            log.exception('An issue with coverage, retrying after a bit')
            return False
        return True  # never reaches here, Added for clarity

    def dataset_changed(self, dataset_id, window):
        self.event_publisher.publish_event(origin=dataset_id,
                                           author=self.id,
                                           window=window)

    def build_data_dict(self, rdt):
        np_dict = {}

        time_array = rdt[rdt.temporal_parameter]
        if time_array is None:
            raise ValueError("A granule needs a time array")
        for k, v in rdt.iteritems():
            # Sparse values are different and aren't constructed using NumpyParameterData
            if isinstance(rdt.param_type(k), SparseConstantType):
                value = v[0]
                if hasattr(value, 'dtype'):
                    value = np.asscalar(value)
                time_start = np.asscalar(time_array[0])
                np_dict[k] = ConstantOverTime(k,
                                              value,
                                              time_start=time_start,
                                              time_end=None)  # From now on
                continue
            elif isinstance(rdt.param_type(k), CategoryType):
                log.warning("Category types temporarily unsupported")
                continue
            elif isinstance(rdt.param_type(k), RecordType):
                value = v
            else:
                value = v

            try:
                if k == 'temp_sample':
                    print repr(value)
                np_dict[k] = NumpyParameterData(k, value, time_array)
            except:
                raise

        return np_dict

    def insert_values(self, coverage, rdt, stream_id):

        np_dict = self.build_data_dict(rdt)

        if 'ingestion_timestamp' in coverage.list_parameters():
            timestamps = np.array([(time.time() + 2208988800)
                                   for i in rdt[rdt.temporal_parameter]])
            np_dict['ingestion_timestamp'] = NumpyParameterData(
                'ingestion_timestamp', timestamps, rdt[rdt.temporal_parameter])

        # If it's sparse only
        if self.sparse_only(rdt):
            del np_dict[rdt.temporal_parameter]

        try:
            coverage.set_parameter_values(np_dict)
        except IOError as e:
            log.error("Couldn't insert values for coverage: %s",
                      coverage.persistence_dir,
                      exc_info=True)
            try:
                coverage.close()
            finally:
                self._bad_coverages[stream_id] = 1
                raise CorruptionError(e.message)
        except KeyError as e:
            if 'has not been initialized' in e.message:
                coverage.refresh()
            raise
        except Exception as e:
            print repr(rdt)
            raise

    def add_granule(self, stream_id, rdt):
        ''' Appends the granule's data to the coverage and persists it. '''
        if stream_id in self._bad_coverages:
            log.info(
                'Message attempting to be inserted into bad coverage: %s',
                DatasetManagementService._get_coverage_path(
                    self.get_dataset(stream_id)))

        #--------------------------------------------------------------------------------
        # Coverage determiniation and appending
        #--------------------------------------------------------------------------------
        dataset_id = self.get_dataset(stream_id)
        if not dataset_id:
            log.error('No dataset could be determined on this stream: %s',
                      stream_id)
            return

        try:
            coverage = self.get_coverage(stream_id)
        except IOError as e:
            log.error(
                "Couldn't open coverage: %s",
                DatasetManagementService._get_coverage_path(
                    self.get_dataset(stream_id)))
            raise CorruptionError(e.message)

        if not coverage:
            log.error(
                'Could not persist coverage from granule, coverage is None')
            return
        #--------------------------------------------------------------------------------
        # Actual persistence
        #--------------------------------------------------------------------------------

        if rdt[rdt.temporal_parameter] is None:
            log.warning("Empty granule received")
            return

        # Parse the RDT and set hte values in the coverage
        self.insert_values(coverage, rdt, stream_id)

        # Force the data to be flushed
        DatasetManagementService._save_coverage(coverage)

        self.update_metadata(dataset_id, rdt)

        try:
            window = rdt[rdt.temporal_parameter][[0, -1]]
            window = window.tolist()
        except (ValueError, IndexError):
            window = None
        self.dataset_changed(dataset_id, window)

    def sparse_only(self, rdt):
        '''
        A sparse only rdt will have only a time array AND sparse values, no other data
        '''
        if rdt[rdt.temporal_parameter] is None:
            return False  # No time, so it's just empty

        at_least_one = False

        for key in rdt.iterkeys():
            # Skip time, that needs to be there
            if key == rdt.temporal_parameter:
                continue
            if not isinstance(rdt.param_type(key), SparseConstantType):
                return False
            else:
                at_least_one = True

        return at_least_one
Example #15
0
    def test_pub_on_different_subtypes(self):
        ar = event.AsyncResult()
        gq = queue.Queue()
        self.count = 0

        def cb(event, *args, **kwargs):
            self.count += 1
            gq.put(event)
            if event.description == "end":
                ar.set()

        sub = EventSubscriber(event_type="ResourceModifiedEvent",
                              sub_type="st1",
                              callback=cb)
        sub.start()

        pub1 = EventPublisher(event_type="ResourceModifiedEvent")
        pub2 = EventPublisher(event_type="ContainerLifecycleEvent")

        pub1.publish_event(origin="two", sub_type="st2", description="2")
        pub2.publish_event(origin="three", sub_type="st1", description="3")
        pub1.publish_event(origin="one", sub_type="st1", description="1")
        pub1.publish_event(origin="four", sub_type="st1", description="end")

        ar.get(timeout=5)
        sub.stop()

        res = []
        for x in xrange(self.count):
            res.append(gq.get(timeout=5))

        self.assertEquals(len(res), 2)
        self.assertEquals(res[0].description, "1")
class ScienceGranuleIngestionWorker(TransformStreamListener,
                                    BaseIngestionWorker):
    CACHE_LIMIT = CFG.get_safe('container.ingestion_cache', 5)

    def __init__(self, *args, **kwargs):
        TransformStreamListener.__init__(self, *args, **kwargs)
        BaseIngestionWorker.__init__(self, *args, **kwargs)

        #--------------------------------------------------------------------------------
        # Ingestion Cache
        # - Datasets
        # - Coverage instances
        #--------------------------------------------------------------------------------
        self._datasets = collections.OrderedDict()
        self._coverages = collections.OrderedDict()

        self._bad_coverages = {}

        self.time_stats = Accumulator(format='%3f')
        # unique ID to identify this worker in log msgs
        self._id = uuid.uuid1()

    def on_start(self):  #pragma no cover
        #--------------------------------------------------------------------------------
        # Explicit on_start
        #--------------------------------------------------------------------------------

        # Skip TransformStreamListener and go to StreamProcess to avoid the subscriber being created
        # We want explicit management of the thread and subscriber object for ingestion

        TransformStreamProcess.on_start(self)

        self.queue_name = self.CFG.get_safe('process.queue_name', self.id)
        self.subscriber = StreamSubscriber(process=self,
                                           exchange_name=self.queue_name,
                                           callback=self.receive_callback)
        self.thread_lock = RLock()

        #--------------------------------------------------------------------------------
        # Normal on_start after this point
        #--------------------------------------------------------------------------------

        BaseIngestionWorker.on_start(self)
        self._rpc_server = self.container.proc_manager._create_listening_endpoint(
            from_name=self.id, process=self)
        self.add_endpoint(self._rpc_server)

        self.event_publisher = EventPublisher(OT.DatasetModified)
        self.stored_value_manager = StoredValueManager(self.container)

        self.lookup_docs = self.CFG.get_safe('process.lookup_docs', [])
        self.input_product = self.CFG.get_safe('process.input_product', '')
        self.qc_enabled = self.CFG.get_safe('process.qc_enabled', True)
        self.ignore_gaps = self.CFG.get_safe('service.ingestion.ignore_gaps',
                                             False)
        self.new_lookups = Queue()
        self.lookup_monitor = EventSubscriber(
            event_type=OT.ExternalReferencesUpdatedEvent,
            callback=self._add_lookups,
            auto_delete=True)
        self.add_endpoint(self.lookup_monitor)
        self.qc_publisher = EventPublisher(event_type=OT.ParameterQCEvent)
        self.connection_id = ''
        self.connection_index = None

        self.start_listener()

    def on_quit(self):  #pragma no cover
        self.event_publisher.close()
        self.qc_publisher.close()
        if self.subscriber_thread:
            self.stop_listener()
        for stream, coverage in self._coverages.iteritems():
            try:
                coverage.close(timeout=5)
            except:
                log.exception('Problems closing the coverage')
        self._coverages.clear()
        TransformStreamListener.on_quit(self)
        BaseIngestionWorker.on_quit(self)

    def start_listener(self):
        # We use a lock here to prevent possible race conditions from starting multiple listeners and coverage clobbering
        with self.thread_lock:
            self.subscriber_thread = self._process.thread_manager.spawn(
                self.subscriber.listen, thread_name='%s-subscriber' % self.id)

    def stop_listener(self):
        # Avoid race conditions with coverage operations (Don't start a listener at the same time as closing one)
        with self.thread_lock:
            self.subscriber.close()
            self.subscriber_thread.join(timeout=10)
            for stream, coverage in self._coverages.iteritems():
                try:
                    coverage.close(timeout=5)
                except:
                    log.exception('Problems closing the coverage')
            self._coverages.clear()
            self.subscriber_thread = None

    def pause(self):
        if self.subscriber_thread is not None:
            self.stop_listener()

    def resume(self):
        if self.subscriber_thread is None:
            self.start_listener()

    def _add_lookups(self, event, *args, **kwargs):
        if event.origin == self.input_product:
            if isinstance(event.reference_keys, list):
                self.new_lookups.put(event.reference_keys)

    def _new_dataset(self, stream_id):
        '''
        Adds a new dataset to the internal cache of the ingestion worker
        '''
        rr_client = ResourceRegistryServiceClient()
        datasets, _ = rr_client.find_subjects(subject_type=RT.Dataset,
                                              predicate=PRED.hasStream,
                                              object=stream_id,
                                              id_only=True)
        if datasets:
            return datasets[0]
        return None

    def get_dataset(self, stream_id):
        '''
        Memoization (LRU) of _new_dataset
        '''
        try:
            result = self._datasets.pop(stream_id)
        except KeyError:
            result = self._new_dataset(stream_id)
            if result is None:
                return None
            if len(self._datasets) >= self.CACHE_LIMIT:
                self._datasets.popitem(0)
        self._datasets[stream_id] = result
        return result

    def get_coverage(self, stream_id):
        '''
        Memoization (LRU) of _get_coverage
        '''
        try:
            result = self._coverages.pop(stream_id)
        except KeyError:
            dataset_id = self.get_dataset(stream_id)
            if dataset_id is None:
                return None
            result = DatasetManagementService._get_simplex_coverage(dataset_id,
                                                                    mode='a')
            if result is None:
                return None
            if len(self._coverages) >= self.CACHE_LIMIT:
                k, coverage = self._coverages.popitem(0)
                coverage.close(timeout=5)
        self._coverages[stream_id] = result
        return result

    def gap_coverage(self, stream_id):
        try:
            old_cov = self._coverages.pop(stream_id)
            dataset_id = self.get_dataset(stream_id)
            sdom, tdom = time_series_domain()
            new_cov = DatasetManagementService._create_simplex_coverage(
                dataset_id, old_cov.parameter_dictionary, sdom, tdom,
                old_cov._persistence_layer.inline_data_writes)
            old_cov.close()
            result = new_cov
        except KeyError:
            result = self.get_coverage(stream_id)
        self._coverages[stream_id] = result
        return result

    def dataset_changed(self, dataset_id, extents, window):
        self.event_publisher.publish_event(origin=dataset_id,
                                           author=self.id,
                                           extents=extents,
                                           window=window)

    def evaluate_qc(self, rdt, dataset_id):
        if self.qc_enabled:
            for field in rdt.fields:
                if not (field.endswith('glblrng_qc')
                        or field.endswith('loclrng_qc')):
                    continue
                try:
                    values = rdt[field]
                    if values is not None:
                        if not all(values):
                            topology = np.where(values == 0)
                            timestamps = rdt[rdt.temporal_parameter][
                                topology[0]]
                            self.flag_qc_parameter(dataset_id, field,
                                                   timestamps.tolist(), {})
                except:
                    continue

    def flag_qc_parameter(self, dataset_id, parameter, temporal_values,
                          configuration):
        data_product_ids, _ = self.container.resource_registry.find_subjects(
            object=dataset_id,
            predicate=PRED.hasDataset,
            subject_type=RT.DataProduct,
            id_only=True)
        for data_product_id in data_product_ids:
            description = 'Automated Quality Control Alerted on %s' % parameter
            self.qc_publisher.publish_event(origin=data_product_id,
                                            qc_parameter=parameter,
                                            temporal_values=temporal_values,
                                            configuration=configuration,
                                            description=description)

    def update_connection_index(self, connection_id, connection_index):
        self.connection_id = connection_id
        try:
            connection_index = int(connection_index)
            self.connection_index = connection_index
        except ValueError:
            pass

    def has_gap(self, connection_id, connection_index):
        if connection_id:
            if not self.connection_id:
                self.update_connection_index(connection_id, connection_index)
                return False
            else:
                if connection_id != self.connection_id:
                    return True
        if connection_index:
            if self.connection_index is None:
                self.update_connection_index(connection_id, connection_index)
                return False
            try:
                connection_index = int(connection_index)
                if connection_index != self.connection_index + 1:
                    return True
            except ValueError:
                pass

        return False

    def splice_coverage(self, dataset_id, coverage):
        log.info('Splicing new coverage')
        DatasetManagementService._splice_coverage(dataset_id, coverage)

    @handle_stream_exception()
    def recv_packet(self, msg, stream_route, stream_id):
        ''' receive packet for ingestion '''
        log.trace('received granule for stream %s', stream_id)

        if msg == {}:
            log.error('Received empty message from stream: %s', stream_id)
            return
        # Message validation
        if not isinstance(msg, Granule):
            log.error('Ingestion received a message that is not a granule: %s',
                      msg)
            return

        rdt = RecordDictionaryTool.load_from_granule(msg)
        if rdt is None:
            log.error('Invalid granule (no RDT) for stream %s', stream_id)
            return
        if not len(rdt):
            log.debug('Empty granule for stream %s', stream_id)
            return

        self.persist_or_timeout(stream_id, rdt)

    def persist_or_timeout(self, stream_id, rdt):
        """ retry writing coverage multiple times and eventually time out """
        done = False
        timeout = 2
        start = time.time()
        while not done:
            try:
                self.add_granule(stream_id, rdt)
                done = True
            except:
                log.exception('An issue with coverage, retrying after a bit')
                if (time.time() -
                        start) > MAX_RETRY_TIME:  # After an hour just give up
                    dataset_id = self.get_dataset(stream_id)
                    log.error(
                        "We're giving up, the coverage needs to be inspected %s",
                        DatasetManagementService._get_coverage_path(
                            dataset_id))
                    raise

                if stream_id in self._coverages:
                    log.info('Popping coverage for stream %s', stream_id)
                    self._coverages.pop(stream_id)

                gevent.sleep(timeout)
                if timeout > (60 * 5):
                    timeout = 60 * 5
                else:
                    timeout *= 2

    def expand_coverage(self, coverage, elements, stream_id):
        try:
            coverage.insert_timesteps(elements, oob=False)
        except IOError as e:
            log.error("Couldn't insert time steps for coverage: %s",
                      coverage.persistence_dir,
                      exc_info=True)
            try:
                coverage.close()
            finally:
                self._bad_coverages[stream_id] = 1
                raise CorruptionError(e.message)

    def get_stored_values(self, lookup_value):
        if not self.new_lookups.empty():
            new_values = self.new_lookups.get()
            self.lookup_docs = new_values + self.lookup_docs
        lookup_value_document_keys = self.lookup_docs
        for key in lookup_value_document_keys:
            try:
                document = self.stored_value_manager.read_value(key)
                if lookup_value in document:
                    return document[lookup_value]
            except NotFound:
                log.warning('Specified lookup document does not exist')
        return None

    def fill_lookup_values(self, rdt):
        rdt.fetch_lookup_values()
        for field in rdt.lookup_values():
            value = self.get_stored_values(rdt.context(field).lookup_value)
            if value:
                rdt[field] = value

    def insert_sparse_values(self, coverage, rdt, stream_id):

        self.fill_lookup_values(rdt)
        for field in rdt.fields:
            if rdt[field] is None:
                continue
            if not isinstance(
                    rdt.context(field).param_type, SparseConstantType):
                # We only set sparse values before insert
                continue
            value = rdt[field]
            try:
                coverage.set_parameter_values(param_name=field, value=value)
            except ValueError as e:
                if "'lower_bound' cannot be >= 'upper_bound'" in e.message:
                    continue
                else:
                    raise
            except IOError as e:
                log.error("Couldn't insert values for coverage: %s",
                          coverage.persistence_dir,
                          exc_info=True)
                try:
                    coverage.close()
                finally:
                    self._bad_coverages[stream_id] = 1
                    raise CorruptionError(e.message)

    def insert_values(self, coverage, rdt, stream_id):
        elements = len(rdt)

        start_index = coverage.num_timesteps - elements

        for k, v in rdt.iteritems():
            if isinstance(v, SparseConstantValue):
                continue
            slice_ = slice(start_index, None)
            try:
                coverage.set_parameter_values(param_name=k,
                                              tdoa=slice_,
                                              value=v)
            except IOError as e:
                log.error("Couldn't insert values for coverage: %s",
                          coverage.persistence_dir,
                          exc_info=True)
                try:
                    coverage.close()
                finally:
                    self._bad_coverages[stream_id] = 1
                    raise CorruptionError(e.message)

        if 'ingestion_timestamp' in coverage.list_parameters():
            t_now = time.time()
            ntp_time = TimeUtils.ts_to_units(
                coverage.get_parameter_context('ingestion_timestamp').uom,
                t_now)
            coverage.set_parameter_values(param_name='ingestion_timestamp',
                                          tdoa=slice_,
                                          value=ntp_time)

    def add_granule(self, stream_id, rdt):
        ''' Appends the granule's data to the coverage and persists it. '''
        debugging = log.isEnabledFor(DEBUG)
        timer = Timer() if debugging else None
        if stream_id in self._bad_coverages:
            log.info(
                'Message attempting to be inserted into bad coverage: %s',
                DatasetManagementService._get_coverage_path(
                    self.get_dataset(stream_id)))

        #--------------------------------------------------------------------------------
        # Gap Analysis
        #--------------------------------------------------------------------------------
        if not self.ignore_gaps:
            gap_found = self.has_gap(rdt.connection_id, rdt.connection_index)
            if gap_found:
                log.error(
                    'Gap Found!   New connection: (%s,%s)\tOld Connection: (%s,%s)',
                    rdt.connection_id, rdt.connection_index,
                    self.connection_id, self.connection_index)
                self.gap_coverage(stream_id)

        #--------------------------------------------------------------------------------
        # Coverage determiniation and appending
        #--------------------------------------------------------------------------------
        dataset_id = self.get_dataset(stream_id)
        if not dataset_id:
            log.error('No dataset could be determined on this stream: %s',
                      stream_id)
            return

        try:
            coverage = self.get_coverage(stream_id)
        except IOError as e:
            log.error(
                "Couldn't open coverage: %s",
                DatasetManagementService._get_coverage_path(
                    self.get_dataset(stream_id)))
            raise CorruptionError(e.message)

        if debugging:
            path = DatasetManagementService._get_coverage_path(dataset_id)
            log.debug(
                '%s: add_granule stream %s dataset %s coverage %r file %s',
                self._id, stream_id, dataset_id, coverage, path)

        if not coverage:
            log.error(
                'Could not persist coverage from granule, coverage is None')
            return
        #--------------------------------------------------------------------------------
        # Actual persistence
        #--------------------------------------------------------------------------------

        elements = len(rdt)
        if rdt[rdt.temporal_parameter] is None:
            elements = 0

        self.insert_sparse_values(coverage, rdt, stream_id)

        if debugging:
            timer.complete_step('checks')  # lightweight ops, should be zero

        self.expand_coverage(coverage, elements, stream_id)

        if debugging:
            timer.complete_step('insert')

        self.insert_values(coverage, rdt, stream_id)

        if debugging:
            timer.complete_step('keys')

        DatasetManagementService._save_coverage(coverage)

        if debugging:
            timer.complete_step('save')

        start_index = coverage.num_timesteps - elements
        self.dataset_changed(dataset_id, coverage.num_timesteps,
                             (start_index, start_index + elements))

        if not self.ignore_gaps and gap_found:
            self.splice_coverage(dataset_id, coverage)

        self.evaluate_qc(rdt, dataset_id)

        if debugging:
            timer.complete_step('notify')
            self._add_timing_stats(timer)

        self.update_connection_index(rdt.connection_id, rdt.connection_index)

    def _add_timing_stats(self, timer):
        """ add stats from latest coverage operation to Accumulator and periodically log results """
        self.time_stats.add(timer)
        if self.time_stats.get_count() % REPORT_FREQUENCY > 0:
            return

        if log.isEnabledFor(TRACE):
            # report per step
            for step in 'checks', 'insert', 'keys', 'save', 'notify':
                log.debug('%s step %s times: %s', self._id, step,
                          self.time_stats.to_string(step))
        # report totals
        log.debug('%s total times: %s', self._id, self.time_stats)
Example #17
0
class Directory(object):
    """
    Frontend to a directory functionality backed by a datastore.
    A directory is a system wide datastore backend tree of entries with attributes and child entries.
    Entries can be identified by a path. The root is '/'.
    Every Org can have its own directory. The default directory is for the root Org (ION).
    """

    def __init__(self, orgname=None, datastore_manager=None, container=None):
        self.container = container or bootstrap.container_instance
        # Get an instance of datastore configured as directory.
        datastore_manager = datastore_manager or self.container.datastore_manager
        self.dir_store = datastore_manager.get_datastore(DataStore.DS_DIRECTORY, DataStore.DS_PROFILE.DIRECTORY)

        self.orgname = orgname or CFG.system.root_org
        self.is_root = (self.orgname == CFG.system.root_org)
        self.events_enabled = CFG.get_safe("service.directory.publish_events") is True   # Publish change events?

        self.event_pub = None
        self.event_sub = None

    def start(self):
        if self.events_enabled:
            # init change event publisher
            self.event_pub = EventPublisher()

            # Register to receive directory changes
            # self.event_sub = EventSubscriber(event_type="ContainerConfigModifiedEvent",
            #                                  origin="Directory",
            #                                  callback=self.receive_directory_change_event)

        # Create directory root entry (for current org) if not existing
        self.register("/", "DIR", sys_name=bootstrap.get_sys_name(), create_only=True)

    def stop(self):
        self.close()

    def close(self):
        """
        Close directory and all resources including datastore and event listener.
        """
        if self.event_sub:
            self.event_sub.deactivate()
        self.dir_store.close()

    # -------------------------------------------------------------------------
    # Directory register, lookup and find

    def lookup(self, parent, key=None, return_entry=False):
        """
        Read directory entry by key and parent node.
        @param return_entry  If True, returns DirEntry object if found, otherwise DirEntry attributes dict
        @retval Either current DirEntry attributes dict or DirEntry object or None if not found.
        """
        path = self._get_path(parent, key) if key else parent
        direntry = self._read_by_path(path)
        if return_entry:
            return direntry
        else:
            return direntry.attributes if direntry else None

    def lookup_mult(self, parent, keys=None, return_entry=False):
        """
        Read several directory entries by keys from the same parent node.
        @param return_entry  If True, returns DirEntry object if found, otherwise DirEntry attributes dict
        @retval Either list of current DirEntry attributes dict or DirEntry object or None if not found.
        """
        direntry_list = self._read_by_path(parent, mult_keys=keys)
        if return_entry:
            return direntry_list
        else:
            return [direntry.attributes if direntry else None for direntry in direntry_list]

    def register(self, parent, key, create_only=False, return_entry=False, ensure_parents=True, **kwargs):
        """
        Add/replace an entry within directory, below a parent node or "/" root.
        Note: Replaces (not merges) the attribute values of the entry if existing.
        register will fail when a concurrent write was detected, meaning that the other writer wins.
        @param create_only  If True, does not change an already existing entry
        @param return_entry  If True, returns DirEntry object of prior entry, otherwise DirEntry attributes dict
        @param ensure_parents  If True, make sure that parent nodes exist
        @retval  DirEntry if previously existing
        """
        if not (parent and key):
            raise BadRequest("Illegal arguments")
        if not type(parent) is str or not parent.startswith("/"):
            raise BadRequest("Illegal arguments: parent")

        dn = self._get_path(parent, key)
        log.debug("Directory.register(%s): %s", dn, kwargs)

        entry_old = None
        cur_time = get_ion_ts()
        # Must read existing entry by path to make sure to not create path twice
        direntry = self._read_by_path(dn)
        if direntry and create_only:
            # We only wanted to make sure entry exists. Do not change
            # NOTE: It is ambiguous to the caller whether we ran into this situation. Seems OK.
            return direntry if return_entry else direntry.attributes
        elif direntry:
            old_rev, old_ts, old_attr = direntry._rev, direntry.ts_updated, direntry.attributes
            direntry.attributes = kwargs
            direntry.ts_updated = cur_time
            try:
                self.dir_store.update(direntry)

                if self.events_enabled and self.container.has_capability(CCAP.EXCHANGE_MANAGER):
                    self.event_pub.publish_event(event_type="DirectoryModifiedEvent",
                                                 origin=self.orgname + ".DIR", origin_type="DIR",
                                                 key=key, parent=parent, org=self.orgname,
                                                 sub_type="REGISTER." + parent[1:].replace("/", "."),
                                                 mod_type=DirectoryModificationType.UPDATE)
            except Conflict:
                # Concurrent update - we accept that we finished the race second and give up
                log.warn("Concurrent update to %s detected. We lost: %s", dn, kwargs)

            if return_entry:
                # Reset object back to prior state
                direntry.attributes = old_attr
                direntry.ts_updated = old_ts
                direntry._rev = old_rev
                entry_old = direntry
            else:
                entry_old = old_attr
        else:
            direntry = self._create_dir_entry(parent, key, attributes=kwargs, ts=cur_time)
            if ensure_parents:
                self._ensure_parents_exist([direntry])
            try:
                self.dir_store.create(direntry, create_unique_directory_id())
                if self.events_enabled and self.container.has_capability(CCAP.EXCHANGE_MANAGER):
                    self.event_pub.publish_event(event_type="DirectoryModifiedEvent",
                                                 origin=self.orgname + ".DIR", origin_type="DIR",
                                                 key=key, parent=parent, org=self.orgname,
                                                 sub_type="REGISTER." + parent[1:].replace("/", "."),
                                                 mod_type=DirectoryModificationType.CREATE)
            except BadRequest as ex:
                if not ex.message.startswith("DirEntry already exists"):
                    raise
                # Concurrent create - we accept that we finished the race second and give up
                log.warn("Concurrent create of %s detected. We lost: %s", dn, kwargs)

        return entry_old

    def register_safe(self, parent, key, **kwargs):
        """
        Use this method to protect caller from any form of directory register error
        """
        try:
            return self.register(parent, key, **kwargs)
        except Exception as ex:
            log.exception("Error registering path=%s/%s, args=%s", parent, key, kwargs)

    def register_mult(self, entries):
        """
        Registers multiple directory entries efficiently in one datastore access.
        Note: this fails if entries are already existing, so works for create only.
        """
        if type(entries) not in (list, tuple):
            raise BadRequest("Bad entries type")
        de_list = []
        cur_time = get_ion_ts()
        for parent, key, attrs in entries:
            direntry = self._create_dir_entry(parent, key, attributes=attrs, ts=cur_time)
            de_list.append(direntry)
        pe_list = self._ensure_parents_exist(de_list, create=False)
        de_list.extend(pe_list)
        deid_list = [create_unique_directory_id() for i in xrange(len(de_list))]
        self.dir_store.create_mult(de_list, deid_list)

        if self.events_enabled and self.container.has_capability(CCAP.EXCHANGE_MANAGER):
            for de in de_list:
                self.event_pub.publish_event(event_type="DirectoryModifiedEvent",
                                             origin=self.orgname + ".DIR", origin_type="DIR",
                                             key=de.key, parent=de.parent, org=self.orgname,
                                             sub_type="REGISTER." + de.parent[1:].replace("/", "."),
                                             mod_type=DirectoryModificationType.CREATE)

    def unregister(self, parent, key=None, return_entry=False):
        """
        Remove entry from directory.
        Returns attributes of deleted DirEntry
        """
        path = self._get_path(parent, key) if key else parent
        log.debug("Removing content at path %s" % path)

        direntry = self._read_by_path(path)
        if direntry:
            self.dir_store.delete(direntry)
            if self.events_enabled and self.container.has_capability(CCAP.EXCHANGE_MANAGER):
                self.event_pub.publish_event(event_type="DirectoryModifiedEvent",
                                             origin=self.orgname + ".DIR", origin_type="DIR",
                                             key=key, parent=parent, org=self.orgname,
                                             sub_type="UNREGISTER." + parent[1:].replace("/", "."),
                                             mod_type=DirectoryModificationType.DELETE)

        if direntry and not return_entry:
            return direntry.attributes
        else:
            return direntry

    def unregister_safe(self, parent, key):
        try:
            return self.unregister(parent, key)
        except Exception as ex:
            log.exception("Error unregistering path=%s/%s", parent, key)

    def find_child_entries(self, parent='/', direct_only=True, **kwargs):
        """
        Return all child entries (ordered by path) for the given parent path.
        Does not return the parent itself. Optionally returns child of child entries.
        Additional kwargs are applied to constrain the search results (limit, descending, skip).
        @param parent  Path to parent (must start with "/")
        @param direct_only  If False, includes child of child entries
        @retval  A list of DirEntry objects for the matches
        """
        if not type(parent) is str or not parent.startswith("/"):
            raise BadRequest("Illegal argument parent: %s" % parent)
        if direct_only:
            start_key = [self.orgname, parent, 0]
            end_key = [self.orgname, parent]
            res = self.dir_store.find_by_view('directory', 'by_parent',
                start_key=start_key, end_key=end_key, id_only=True, convert_doc=True, **kwargs)
        else:
            path = parent[1:].split("/")
            start_key = [self.orgname, path, 0]
            end_key = [self.orgname, list(path) + ["ZZZZZZ"]]
            res = self.dir_store.find_by_view('directory', 'by_path',
                start_key=start_key, end_key=end_key, id_only=True, convert_doc=True, **kwargs)

        match = [value for docid, indexkey, value in res]
        return match

    def find_by_key(self, key=None, parent='/', **kwargs):
        """
        Returns a list of DirEntry for each directory entry that matches the given key name.
        If a parent is provided, only checks in this parent and all subtree.
        These entries are in the same org's directory but have different parents.
        """
        if key is None:
            raise BadRequest("Illegal arguments")
        if parent is None:
            raise BadRequest("Illegal arguments")
        start_key = [self.orgname, key, parent]
        end_key = [self.orgname, key, parent + "ZZZZZZ"]
        res = self.dir_store.find_by_view('directory', 'by_key',
            start_key=start_key, end_key=end_key, id_only=True, convert_doc=True, **kwargs)

        match = [value for docid, indexkey, value in res]
        return match

    def find_by_value(self, subtree='/', attribute=None, value=None, **kwargs):
        """
        Returns a list of DirEntry with entries that have an attribute with the given value.
        """
        if attribute is None:
            raise BadRequest("Illegal arguments")
        if subtree is None:
            raise BadRequest("Illegal arguments")
        start_key = [self.orgname, attribute, value, subtree]
        end_key = [self.orgname, attribute, value, subtree + "ZZZZZZ"]
        res = self.dir_store.find_by_view('directory', 'by_attribute',
                        start_key=start_key, end_key=end_key, id_only=True, convert_doc=True, **kwargs)

        match = [value for docid, indexkey, value in res]
        return match

    def remove_child_entries(self, parent, delete_parent=False):
        pass

    # -------------------------------------------------------------------------
    #  Concurrency Control

    def acquire_lock(self, key, timeout=LOCK_EXPIRES_DEFAULT, lock_holder=None, lock_info=None):
        """
        Attempts to atomically acquire a lock with the given key and namespace.
        If holder is given and holder already has the lock, renew.
        Checks for expired locks.
        @param timeout  Secs until lock expiration or 0 for no expiration
        @param lock_holder  Str value identifying lock holder for subsequent exclusive access
        @param lock_info  Dict value for additional attributes describing lock
        @retval  bool - could lock be acquired?
        """
        if not key:
            raise BadRequest("Missing argument: key")
        if "/" in key:
            raise BadRequest("Invalid argument value: key")

        lock_attrs = {LOCK_EXPIRES_ATTR: get_ion_ts_millis() + int(1000*timeout) if timeout else 0,
                      LOCK_HOLDER_ATTR: lock_holder or ""}
        if lock_info:
            lock_attrs.update(lock_info)
        expires = int(lock_attrs[LOCK_EXPIRES_ATTR])  # Check type just to be sure
        if expires and get_ion_ts_millis() > expires:
            raise BadRequest("Invalid lock expiration value: %s", expires)

        direntry = self._create_dir_entry(LOCK_DIR_PATH, key, attributes=lock_attrs)
        lock_result = False
        try:
            # This is an atomic operation. It relies on the unique key constraint of the directory service
            self.dir_store.create(direntry, create_unique_directory_id())
            lock_result = True
        except BadRequest as ex:
            if ex.message.startswith("DirEntry already exists"):
                de_old = self.lookup(LOCK_DIR_PATH, key, return_entry=True)
                if de_old:
                    if self._is_lock_expired(de_old):
                        # Lock is expired: remove, try to relock
                        # Note: even as holder, it's safer to reacquire in this case than renew
                        log.warn("Removing expired lock: %s/%s", de_old.parent, de_old.key)
                        try:
                            # This is safe, because of lock was deleted + recreated in the meantime, it has different id
                            self._delete_lock(de_old)
                            # Try recreate - may fail again due to concurrency
                            self.dir_store.create(direntry, create_unique_directory_id())
                            lock_result = True
                        except BadRequest as ex:
                            if not ex.message.startswith("DirEntry already exists"):
                                log.exception("Error releasing/reacquiring expired lock %s", de_old.key)
                        except Exception:
                            log.exception("Error releasing/reacquiring expired lock %s", de_old.key)
                    elif lock_holder and de_old.attributes[LOCK_HOLDER_ATTR] == lock_holder:
                        # Holder currently holds the lock: renew
                        log.debug("Renewing lock %s/%s for holder %s", de_old.parent, de_old.key, lock_holder)
                        de_old.attributes = lock_attrs
                        try:
                            self.dir_store.update(de_old)
                            lock_result = True
                        except Exception:
                            log.exception("Error renewing expired lock %s", de_old.key)
                # We do nothing if we could not find the lock now...
            else:
                raise

        log.debug("Directory.acquire_lock(%s): %s -> %s", key, lock_attrs, lock_result)

        return lock_result

    def is_locked(self, key):
        if not key:
            raise BadRequest("Missing argument: key")
        if "/" in key:
            raise BadRequest("Invalid argument value: key")

        lock_entry = self.lookup(LOCK_DIR_PATH, key, return_entry=True)
        return lock_entry and not self._is_lock_expired(lock_entry)

    def release_lock(self, key, lock_holder=None):
        """
        Releases lock identified by key.
        Raises NotFound if lock does not exist.
        """
        if not key:
            raise BadRequest("Missing argument: key")
        if "/" in key:
            raise BadRequest("Invalid argument value: key")

        log.debug("Directory.release_lock(%s)", key)

        dir_entry = self.lookup(LOCK_DIR_PATH, key, return_entry=True)
        if dir_entry:
            if lock_holder and dir_entry.attributes[LOCK_HOLDER_ATTR] != lock_holder:
                raise BadRequest("Cannot release lock - not currently lock holder")
            self._delete_lock(dir_entry)
        else:
            raise NotFound("Lock %s not found" % key)

    def release_expired_locks(self):
        """Removes all expired locks
        """
        de_list = self.find_child_entries(LOCK_DIR_PATH, direct_only=True)
        for de in de_list:
            if self._is_lock_expired(de):
                log.warn("Removing expired lock %s/%s", de.parent, de.key)
                try:
                    # This is safe, because if lock was deleted + recreated in the meantime, it has different id
                    self._delete_lock(de)
                except Exception:
                    log.exception("Error releasing expired lock %s", de.key)

    def _is_lock_expired(self, lock_entry):
        if not lock_entry:
            raise BadRequest("No lock entry provided")
        return 0 < lock_entry.attributes[LOCK_EXPIRES_ATTR] <= get_ion_ts_millis()

    def _delete_lock(self, lock_entry):
        lock_entry_id = lock_entry._id
        self.dir_store.delete(lock_entry_id)

    # -------------------------------------------------------------------------
    # Internal functions

    def receive_directory_change_event(self, event_msg, headers):
        # @TODO add support to fold updated config into container config
        pass


    def _get_path(self, parent, key):
        """
        Returns the qualified directory path for a directory entry.
        """
        if parent == "/":
            return parent + key
        elif parent.startswith("/"):
            return parent + "/" + key
        else:
            raise BadRequest("Illegal parent: %s" % parent)

    def _get_key(self, path):
        """
        Returns the key from a qualified directory path
        """
        parent, key = path.rsplit("/", 1)
        return key

    def _create_dir_entry(self, parent, key, orgname=None, ts=None, attributes=None):
        """
        Standard way to create a DirEntry object.
        """
        orgname = orgname or self.orgname
        ts = ts or get_ion_ts()
        attributes = attributes if attributes is not None else {}
        parent = parent or "/"
        de = DirEntry(org=orgname, parent=parent, key=key, attributes=attributes, ts_created=ts, ts_updated=ts)
        return de

    def _read_by_path(self, path, orgname=None, mult_keys=None):
        """
        Given a qualified path, find entry in directory and return DirEntry object or None if not found.
        """
        if path is None:
            raise BadRequest("Illegal arguments")
        orgname = orgname or self.orgname
        if mult_keys:
            parent = path or "/"
            key = mult_keys
        else:
            parent, key = path.rsplit("/", 1)
            parent = parent or "/"
        find_key = [orgname, key, parent]
        view_res = self.dir_store.find_by_view('directory', 'by_key', key=find_key, id_only=True, convert_doc=True)

        match = [doc for docid, index, doc in view_res]
        if mult_keys:
            entries_by_key = {doc.key: doc for doc in match}
            entries = [entries_by_key.get(key, None) for key in mult_keys]
            return entries
        else:
            if len(match) > 1:
                log.error("More than one directory entry found for key %s" % path)
                return match[0]
            elif match:
                return match[0]
            return None

    def _get_unique_parents(self, entry_list):
        """Returns a sorted, unique list of parents of DirEntries (excluding the root /)"""
        if entry_list and type(entry_list) not in (list, tuple):
            entry_list = [entry_list]
        parents = set()
        for entry in entry_list:
            parents.add(entry.parent)
        if "/" in parents:
            parents.remove("/")
        return sorted(parents)

    def _ensure_parents_exist(self, entry_list, create=True):
        parents_list = self._get_unique_parents(entry_list)
        pe_list = []
        try:
            for parent in parents_list:
                pe = self.lookup(parent)
                if pe is None:
                    pp, pk = parent.rsplit("/", 1)
                    direntry = self._create_dir_entry(parent=pp, key=pk)
                    pe_list.append(direntry)
                    if create:
                        try:
                            self.dir_store.create(direntry, create_unique_directory_id())
                        except BadRequest as ex:
                            if not ex.message.startswith("DirEntry already exists"):
                                raise
                            # Else: Concurrent create
        except Exception as ex:
            log.warn("_ensure_parents_exist(): Error creating directory parents", exc_info=True)
        return pe_list

    def _cleanup_outdated_entries(self, dir_entries, common="key"):
        """
        This function takes all DirEntry from the list and removes all but the most recent one
        by ts_updated timestamp. It returns the most recent DirEntry and removes the others by
        direct datastore operations. If there are multiple entries with most recent timestamp, the
        first encountered is kept and the others non-deterministically removed.
        Note: This operation can be called for DirEntries without common keys, e.g. for all
        entries registering an agent for a device.
        """
        if not dir_entries:
            return
        newest_entry = dir_entries[0]
        try:
            for de in dir_entries:
                if int(de.ts_updated) > int(newest_entry.ts_updated):
                    newest_entry = de

            remove_list = [de for de in dir_entries if de is not newest_entry]

            log.info("Attempting to cleanup these directory entries: %s" % remove_list)
            for de in remove_list:
                try:
                    self.dir_store.delete(de)
                except Exception as ex:
                    log.warn("Removal of outdated %s directory entry failed: %s" % (common, de))
            log.info("Cleanup of %s old %s directory entries succeeded" % (len(remove_list), common))

        except Exception as ex:
            log.warn("Cleanup of multiple directory entries for %s failed: %s" % (
                common, str(ex)))

        return newest_entry
    def test_lookup_values(self):
        ph = ParameterHelper(self.dataset_management, self.addCleanup)
        pdict_id = ph.create_lookups()
        stream_def_id = self.pubsubcli.create_stream_definition(
            'lookup', parameter_dictionary_id=pdict_id)
        self.addCleanup(self.pubsubcli.delete_stream_definition, stream_def_id)

        data_product = DataProduct(name='lookup data product')
        tdom, sdom = time_series_domain()
        data_product.temporal_domain = tdom.dump()
        data_product.spatial_domain = sdom.dump()

        data_product_id = self.dpsc_cli.create_data_product(
            data_product, stream_definition_id=stream_def_id)
        self.addCleanup(self.dpsc_cli.delete_data_product, data_product_id)
        data_producer = DataProducer(name='producer')
        data_producer.producer_context = DataProcessProducerContext()
        data_producer.producer_context.configuration['qc_keys'] = [
            'offset_document'
        ]
        data_producer_id, _ = self.rrclient.create(data_producer)
        self.addCleanup(self.rrclient.delete, data_producer_id)
        assoc, _ = self.rrclient.create_association(
            subject=data_product_id,
            object=data_producer_id,
            predicate=PRED.hasDataProducer)
        self.addCleanup(self.rrclient.delete_association, assoc)

        document_keys = self.damsclient.list_qc_references(data_product_id)

        self.assertEquals(document_keys, ['offset_document'])
        svm = StoredValueManager(self.container)
        svm.stored_value_cas('offset_document', {'offset_a': 2.0})
        self.dpsc_cli.activate_data_product_persistence(data_product_id)
        dataset_ids, _ = self.rrclient.find_objects(subject=data_product_id,
                                                    predicate=PRED.hasDataset,
                                                    id_only=True)
        dataset_id = dataset_ids[0]

        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = [0]
        rdt['temp'] = [20.]
        granule = rdt.to_granule()

        stream_ids, _ = self.rrclient.find_objects(subject=data_product_id,
                                                   predicate=PRED.hasStream,
                                                   id_only=True)
        stream_id = stream_ids[0]
        route = self.pubsubcli.read_stream_route(stream_id=stream_id)

        publisher = StandaloneStreamPublisher(stream_id, route)
        publisher.publish(granule)

        self.assertTrue(dataset_monitor.event.wait(10))

        granule = self.data_retriever.retrieve(dataset_id)
        rdt2 = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_equal(rdt['temp'], rdt2['temp'])
        np.testing.assert_array_almost_equal(rdt2['calibrated'],
                                             np.array([22.0]))

        svm.stored_value_cas('updated_document', {'offset_a': 3.0})
        dataset_monitor = DatasetMonitor(dataset_id)
        self.addCleanup(dataset_monitor.stop)
        ep = EventPublisher(event_type=OT.ExternalReferencesUpdatedEvent)
        ep.publish_event(origin=data_product_id,
                         reference_keys=['updated_document'])

        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)
        rdt['time'] = [1]
        rdt['temp'] = [20.]
        granule = rdt.to_granule()
        gevent.sleep(2)  # Yield so that the event goes through
        publisher.publish(granule)
        self.assertTrue(dataset_monitor.event.wait(10))

        granule = self.data_retriever.retrieve(dataset_id)
        rdt2 = RecordDictionaryTool.load_from_granule(granule)
        np.testing.assert_array_equal(rdt2['temp'], np.array([20., 20.]))
        np.testing.assert_array_almost_equal(rdt2['calibrated'],
                                             np.array([22.0, 23.0]))
Example #19
0
    def test_pub_and_sub(self):
        ar = event.AsyncResult()
        gq = queue.Queue()
        self.count = 0

        def cb(*args, **kwargs):
            self.count += 1
            gq.put(args[0])
            if self.count == 2:
                ar.set()

        sub = EventSubscriber(event_type="ResourceEvent",
                              callback=cb,
                              origin="specific")
        pub = EventPublisher(event_type="ResourceEvent")

        self._listen(sub)
        pub.publish_event(origin="specific", description="hello")

        event_obj = bootstrap.IonObject('ResourceEvent',
                                        origin='specific',
                                        description='more testing')
        self.assertEqual(event_obj, pub.publish_event_object(event_obj))

        with self.assertRaises(BadRequest) as cm:
            event_obj = bootstrap.IonObject('ResourceEvent',
                                            origin='specific',
                                            description='more testing',
                                            ts_created='2423')
            pub.publish_event_object(event_obj)
        self.assertIn('The ts_created value is not a valid timestamp',
                      cm.exception.message)

        with self.assertRaises(BadRequest) as cm:
            event_obj = bootstrap.IonObject('ResourceEvent',
                                            origin='specific',
                                            description='more testing',
                                            ts_created='1000494978462')
            pub.publish_event_object(event_obj)
        self.assertIn('This ts_created value is too old', cm.exception.message)

        with self.assertRaises(BadRequest) as cm:
            event_obj = bootstrap.IonObject('ResourceEvent',
                                            origin='specific',
                                            description='more testing')
            event_obj._id = '343434'
            pub.publish_event_object(event_obj)
        self.assertIn('The event object cannot contain a _id field',
                      cm.exception.message)

        ar.get(timeout=5)

        res = []
        for x in xrange(self.count):
            res.append(gq.get(timeout=5))

        self.assertEquals(len(res), self.count)
        self.assertEquals(res[0].description, "hello")
        self.assertAlmostEquals(int(res[0].ts_created),
                                int(get_ion_ts()),
                                delta=5000)

        self.assertEquals(res[1].description, "more testing")
        self.assertAlmostEquals(int(res[1].ts_created),
                                int(get_ion_ts()),
                                delta=5000)
Example #20
0
    def test_pub_on_different_subsubtypes(self):
        res_list = [
            DotDict(ar=event.AsyncResult(), gq=queue.Queue(), count=0)
            for i in xrange(4)
        ]

        def cb_gen(num):
            def cb(event, *args, **kwargs):
                res_list[num].count += 1
                res_list[num].gq.put(event)
                if event.description == "end":
                    res_list[num].ar.set()

            return cb

        sub0 = EventSubscriber(event_type="ResourceModifiedEvent",
                               sub_type="st1.*",
                               callback=cb_gen(0))
        sub0.start()

        sub1 = EventSubscriber(event_type="ResourceModifiedEvent",
                               sub_type="st1.a",
                               callback=cb_gen(1))
        sub1.start()

        sub2 = EventSubscriber(event_type="ResourceModifiedEvent",
                               sub_type="*.a",
                               callback=cb_gen(2))
        sub2.start()

        sub3 = EventSubscriber(event_type="ResourceModifiedEvent",
                               sub_type="st1",
                               callback=cb_gen(3))
        sub3.start()

        pub1 = EventPublisher(event_type="ResourceModifiedEvent")

        pub1.publish_event(origin="one", sub_type="st1.a", description="1")
        pub1.publish_event(origin="two", sub_type="st1", description="2")
        pub1.publish_event(origin="three", sub_type="st1.b", description="3")

        pub1.publish_event(origin="four", sub_type="st2.a", description="4")
        pub1.publish_event(origin="five", sub_type="st2", description="5")

        pub1.publish_event(origin="six", sub_type="a", description="6")
        pub1.publish_event(origin="seven", sub_type="", description="7")

        pub1.publish_event(origin="end", sub_type="st1.a", description="end")
        pub1.publish_event(origin="end", sub_type="st1", description="end")

        [res_list[i].ar.get(timeout=5) for i in xrange(3)]

        sub0.stop()
        sub1.stop()
        sub2.stop()
        sub3.stop()

        for i in xrange(4):
            res_list[i].res = []
            for x in xrange(res_list[i].count):
                res_list[i].res.append(res_list[i].gq.get(timeout=5))

        self.assertEquals(len(res_list[0].res), 3)
        self.assertEquals(res_list[0].res[0].description, "1")

        self.assertEquals(len(res_list[1].res), 2)
        self.assertEquals(res_list[1].res[0].description, "1")

        self.assertEquals(len(res_list[2].res), 3)
        self.assertEquals(res_list[2].res[0].description, "1")

        self.assertEquals(len(res_list[3].res), 2)
        self.assertEquals(res_list[3].res[0].description, "2")