def test_base_subscriber_as_catchall(self): ar = event.AsyncResult() gq = queue.Queue() self.count = 0 def cb(*args, **kwargs): self.count += 1 gq.put(args[0]) if self.count == 2: ar.set() sub = EventSubscriber(callback=cb) pub1 = EventPublisher(event_type="ResourceEvent") pub2 = EventPublisher(event_type="ContainerLifecycleEvent") self._listen(sub) pub1.publish_event(origin="some", description="1") pub2.publish_event(origin="other", description="2") ar.get(timeout=5) res = [] for x in xrange(self.count): res.append(gq.get(timeout=5)) self.assertEquals(len(res), 2) self.assertEquals(res[0].description, "1") self.assertEquals(res[1].description, "2")
def test_pub_on_different_subtypes(self): ar = event.AsyncResult() gq = queue.Queue() self.count = 0 def cb(event, *args, **kwargs): self.count += 1 gq.put(event) if event.description == "end": ar.set() sub = EventSubscriber(event_type="ResourceModifiedEvent", sub_type="st1", callback=cb) sub.start() pub1 = EventPublisher(event_type="ResourceModifiedEvent") pub2 = EventPublisher(event_type="ContainerLifecycleEvent") pub1.publish_event(origin="two", sub_type="st2", description="2") pub2.publish_event(origin="three", sub_type="st1", description="3") pub1.publish_event(origin="one", sub_type="st1", description="1") pub1.publish_event(origin="four", sub_type="st1", description="end") ar.get(timeout=5) sub.stop() res = [] for x in xrange(self.count): res.append(gq.get(timeout=5)) self.assertEquals(len(res), 2) self.assertEquals(res[0].description, "1")
def on_start(self): #pragma no cover #-------------------------------------------------------------------------------- # Explicit on_start #-------------------------------------------------------------------------------- # Skip TransformStreamListener and go to StreamProcess to avoid the subscriber being created # We want explicit management of the thread and subscriber object for ingestion TransformStreamProcess.on_start(self) self.queue_name = self.CFG.get_safe('process.queue_name',self.id) self.subscriber = StreamSubscriber(process=self, exchange_name=self.queue_name, callback=self.receive_callback) self.thread_lock = RLock() #-------------------------------------------------------------------------------- # Normal on_start after this point #-------------------------------------------------------------------------------- BaseIngestionWorker.on_start(self) self._rpc_server = self.container.proc_manager._create_listening_endpoint(from_name=self.id, process=self) self.add_endpoint(self._rpc_server) self.event_publisher = EventPublisher(OT.DatasetModified) self.stored_value_manager = StoredValueManager(self.container) self.lookup_docs = self.CFG.get_safe('process.lookup_docs',[]) self.input_product = self.CFG.get_safe('process.input_product','') self.new_lookups = Queue() self.lookup_monitor = EventSubscriber(event_type=OT.ExternalReferencesUpdatedEvent, callback=self._add_lookups, auto_delete=True) self.add_endpoint(self.lookup_monitor) self.connection_id = '' self.connection_index = None self.start_listener()
def process_execution(self, temp_vector, qc_params, bad_times): interval_key = uuid4().hex data_product_id = self.make_large_dataset(temp_vector) async_queue = Queue() def cb(event, *args, **kwargs): if '_'.join(event.qc_parameter.split('_')[1:]) not in qc_params: # I don't care about return times = event.temporal_values self.assertEquals(len(times), bad_times) async_queue.put(1) es = EventSubscriber(event_type=OT.ParameterQCEvent, origin=data_product_id, callback=cb, auto_delete=True) es.start() self.addCleanup(es.stop) config = DotDict() config.process.interval_key = interval_key config.process.qc_params = qc_params self.sync_launch(config) # So now the process is started, time to throw an event at it ep = EventPublisher(event_type='TimerEvent') ep.publish_event(origin=interval_key) try: async_queue.get(timeout=120) except Empty: raise AssertionError('QC was not flagged in time')
def test_pub_on_different_origins(self): ar = event.AsyncResult() gq = queue.Queue() self.count = 0 def cb(*args, **kwargs): self.count += 1 gq.put(args[0]) if self.count == 3: ar.set() sub = EventSubscriber(event_type="ResourceEvent", callback=cb) pub = EventPublisher(event_type="ResourceEvent") self._listen(sub) pub.publish_event(origin="one", description="1") pub.publish_event(origin="two", description="2") pub.publish_event(origin="three", description="3") ar.get(timeout=5) res = [] for x in xrange(self.count): res.append(gq.get(timeout=5)) self.assertEquals(len(res), 3) self.assertEquals(res[0].description, "1") self.assertEquals(res[1].description, "2") self.assertEquals(res[2].description, "3")
def start(self): self.container.stats_mgr = ContainerStatsManager(self.container) self.container.stats_mgr.start() ## create queue listener and publisher self.sender = EventPublisher(event_type="ContainerManagementResult") self.receiver = EventSubscriber(event_type="ContainerManagementRequest", callback=self._receive_event) with self.lock: self.running = True self.receiver.start() log.debug('Container ready for container management requests')
def fail(x): ''' The goal behind this function is to publish an event so that threads can synchronize with it to verify that it was run, regardless of context ''' event_publisher = EventPublisher(OT.GranuleIngestionErrorEvent) try: event_publisher.publish_event(error_msg='failure') raise StandardError('Something you tried to do failed') finally: event_publisher.close()
def start(self): if self.events_enabled: # init change event publisher self.event_pub = EventPublisher() # Register to receive directory changes # self.event_sub = EventSubscriber(event_type="ContainerConfigModifiedEvent", # origin="Directory", # callback=self.receive_directory_change_event) # Create directory root entry (for current org) if not existing self.register("/", "DIR", sys_name=bootstrap.get_sys_name(), create_only=True)
def process(self, dataset_id, start_time=0, end_time=0): if not dataset_id: raise BadRequest('No dataset id specified.') now = time.time() start_time = start_time or (now - (3600 * (self.run_interval + 1)) ) # Every N hours with 1 of overlap end_time = end_time or now qc_params = [i for i in self.qc_params if i in self.qc_suffixes ] or self.qc_suffixes self.qc_publisher = EventPublisher(event_type=OT.ParameterQCEvent) log.debug('Iterating over the data blocks') for st, et in self.chop(int(start_time), int(end_time)): log.debug('Chopping %s:%s', st, et) log.debug( "Retrieving data: data_retriever.retrieve('%s', query={'start_time':%s, 'end_time':%s')", dataset_id, st, et) try: granule = self.data_retriever.retrieve(dataset_id, query={ 'start_time': st, 'end_time': et }) except BadRequest: data_products, _ = self.container.resource_registry.find_subjects( object=dataset_id, predicate=PRED.hasDataset, subject_type=RT.DataProduct) for data_product in data_products: log.exception('Failed to perform QC Post Processing on %s', data_product.name) log.error('Calculated Start Time: %s', st) log.error('Calculated End Time: %s', et) raise log.debug('Retrieved Data') rdt = RecordDictionaryTool.load_from_granule(granule) qc_fields = [ i for i in rdt.fields if any([i.endswith(j) for j in qc_params]) ] log.debug('QC Fields: %s', qc_fields) for field in qc_fields: val = rdt[field] if val is None: continue if not np.all(val): log.debug('Found QC Alerts') indexes = np.where(val == 0) timestamps = rdt[rdt.temporal_parameter][indexes[0]] self.flag_qc_parameter(dataset_id, field, timestamps.tolist(), {})
def process(self, dataset_id, start_time=0, end_time=0): if not dataset_id: raise BadRequest('No dataset id specified.') now = time.time() start_time = start_time or (now - (3600 * (self.run_interval + 1)) ) # Every N hours with 1 of overlap end_time = end_time or now qc_params = [i for i in self.qc_params if i in self.qc_suffixes ] or self.qc_suffixes self.qc_publisher = EventPublisher(event_type=OT.ParameterQCEvent) log.debug('Iterating over the data blocks') for st, et in self.chop(int(start_time), int(end_time)): log.debug('Chopping %s:%s', st, et) log.debug( "Retrieving data: data_retriever.retrieve('%s', query={'start_time':%s, 'end_time':%s')", dataset_id, st, et) granule = self.data_retriever.retrieve(dataset_id, query={ 'start_time': st, 'end_time': et }) log.debug('Retrieved Data') rdt = RecordDictionaryTool.load_from_granule(granule) qc_fields = [ i for i in rdt.fields if any([i.endswith(j) for j in qc_params]) ] log.debug('QC Fields: %s', qc_fields) for field in qc_fields: val = rdt[field] if val is None: continue if not np.all(val): log.debug('Found QC Alerts') indexes = np.where(val == 0) timestamps = rdt[rdt.temporal_parameter][indexes[0]] self.flag_qc_parameter(dataset_id, field, timestamps.tolist(), {})
def test_subscriber_listening_for_specific_origin(self): ar = event.AsyncResult() self.count = 0 def cb(*args, **kwargs): self.count += 1 ar.set(args[0]) sub = EventSubscriber(event_type="ResourceEvent", origin="specific", callback=cb) pub = EventPublisher(event_type="ResourceEvent") self._listen(sub) pub.publish_event(origin="notspecific", description="1") pub.publish_event(origin="notspecific", description="2") pub.publish_event(origin="specific", description="3") pub.publish_event(origin="notspecific", description="4") evmsg = ar.get(timeout=5) self.assertEquals(self.count, 1) self.assertEquals(evmsg.description, "3")
def start(self): self.container.event_pub = EventPublisher()
def test_lookup_values(self): ph = ParameterHelper(self.dataset_management, self.addCleanup) pdict_id = ph.create_lookups() stream_def_id = self.pubsubcli.create_stream_definition( 'lookup', parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsubcli.delete_stream_definition, stream_def_id) data_product = DataProduct(name='lookup data product') tdom, sdom = time_series_domain() data_product.temporal_domain = tdom.dump() data_product.spatial_domain = sdom.dump() data_product_id = self.dpsc_cli.create_data_product( data_product, stream_definition_id=stream_def_id) self.addCleanup(self.dpsc_cli.delete_data_product, data_product_id) data_producer = DataProducer(name='producer') data_producer.producer_context = DataProcessProducerContext() data_producer.producer_context.configuration['qc_keys'] = [ 'offset_document' ] data_producer_id, _ = self.rrclient.create(data_producer) self.addCleanup(self.rrclient.delete, data_producer_id) assoc, _ = self.rrclient.create_association( subject=data_product_id, object=data_producer_id, predicate=PRED.hasDataProducer) self.addCleanup(self.rrclient.delete_association, assoc) document_keys = self.damsclient.list_qc_references(data_product_id) self.assertEquals(document_keys, ['offset_document']) svm = StoredValueManager(self.container) svm.stored_value_cas('offset_document', {'offset_a': 2.0}) self.dpsc_cli.activate_data_product_persistence(data_product_id) dataset_ids, _ = self.rrclient.find_objects(subject=data_product_id, predicate=PRED.hasDataset, id_only=True) dataset_id = dataset_ids[0] dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = [0] rdt['temp'] = [20.] granule = rdt.to_granule() stream_ids, _ = self.rrclient.find_objects(subject=data_product_id, predicate=PRED.hasStream, id_only=True) stream_id = stream_ids[0] route = self.pubsubcli.read_stream_route(stream_id=stream_id) publisher = StandaloneStreamPublisher(stream_id, route) publisher.publish(granule) self.assertTrue(dataset_monitor.event.wait(10)) granule = self.data_retriever.retrieve(dataset_id) rdt2 = RecordDictionaryTool.load_from_granule(granule) np.testing.assert_array_equal(rdt['temp'], rdt2['temp']) np.testing.assert_array_almost_equal(rdt2['calibrated'], np.array([22.0])) svm.stored_value_cas('updated_document', {'offset_a': 3.0}) dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) ep = EventPublisher(event_type=OT.ExternalReferencesUpdatedEvent) ep.publish_event(origin=data_product_id, reference_keys=['updated_document']) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = [1] rdt['temp'] = [20.] granule = rdt.to_granule() gevent.sleep(2) # Yield so that the event goes through publisher.publish(granule) self.assertTrue(dataset_monitor.event.wait(10)) granule = self.data_retriever.retrieve(dataset_id) rdt2 = RecordDictionaryTool.load_from_granule(granule) np.testing.assert_array_equal(rdt2['temp'], np.array([20., 20.])) np.testing.assert_array_almost_equal(rdt2['calibrated'], np.array([22.0, 23.0]))
def test_pub_and_sub(self): ar = event.AsyncResult() gq = queue.Queue() self.count = 0 def cb(*args, **kwargs): self.count += 1 gq.put(args[0]) if self.count == 2: ar.set() sub = EventSubscriber(event_type="ResourceEvent", callback=cb, origin="specific") pub = EventPublisher(event_type="ResourceEvent") self._listen(sub) pub.publish_event(origin="specific", description="hello") event_obj = bootstrap.IonObject('ResourceEvent', origin='specific', description='more testing') self.assertEqual(event_obj, pub.publish_event_object(event_obj)) with self.assertRaises(BadRequest) as cm: event_obj = bootstrap.IonObject('ResourceEvent', origin='specific', description='more testing', ts_created='2423') pub.publish_event_object(event_obj) self.assertIn('The ts_created value is not a valid timestamp', cm.exception.message) with self.assertRaises(BadRequest) as cm: event_obj = bootstrap.IonObject('ResourceEvent', origin='specific', description='more testing', ts_created='1000494978462') pub.publish_event_object(event_obj) self.assertIn('This ts_created value is too old', cm.exception.message) with self.assertRaises(BadRequest) as cm: event_obj = bootstrap.IonObject('ResourceEvent', origin='specific', description='more testing') event_obj._id = '343434' pub.publish_event_object(event_obj) self.assertIn('The event object cannot contain a _id field', cm.exception.message) ar.get(timeout=5) res = [] for x in xrange(self.count): res.append(gq.get(timeout=5)) self.assertEquals(len(res), self.count) self.assertEquals(res[0].description, "hello") self.assertAlmostEquals(int(res[0].ts_created), int(get_ion_ts()), delta=5000) self.assertEquals(res[1].description, "more testing") self.assertAlmostEquals(int(res[1].ts_created), int(get_ion_ts()), delta=5000)
def test_pub_on_different_subsubtypes(self): res_list = [ DotDict(ar=event.AsyncResult(), gq=queue.Queue(), count=0) for i in xrange(4) ] def cb_gen(num): def cb(event, *args, **kwargs): res_list[num].count += 1 res_list[num].gq.put(event) if event.description == "end": res_list[num].ar.set() return cb sub0 = EventSubscriber(event_type="ResourceModifiedEvent", sub_type="st1.*", callback=cb_gen(0)) sub0.start() sub1 = EventSubscriber(event_type="ResourceModifiedEvent", sub_type="st1.a", callback=cb_gen(1)) sub1.start() sub2 = EventSubscriber(event_type="ResourceModifiedEvent", sub_type="*.a", callback=cb_gen(2)) sub2.start() sub3 = EventSubscriber(event_type="ResourceModifiedEvent", sub_type="st1", callback=cb_gen(3)) sub3.start() pub1 = EventPublisher(event_type="ResourceModifiedEvent") pub1.publish_event(origin="one", sub_type="st1.a", description="1") pub1.publish_event(origin="two", sub_type="st1", description="2") pub1.publish_event(origin="three", sub_type="st1.b", description="3") pub1.publish_event(origin="four", sub_type="st2.a", description="4") pub1.publish_event(origin="five", sub_type="st2", description="5") pub1.publish_event(origin="six", sub_type="a", description="6") pub1.publish_event(origin="seven", sub_type="", description="7") pub1.publish_event(origin="end", sub_type="st1.a", description="end") pub1.publish_event(origin="end", sub_type="st1", description="end") [res_list[i].ar.get(timeout=5) for i in xrange(3)] sub0.stop() sub1.stop() sub2.stop() sub3.stop() for i in xrange(4): res_list[i].res = [] for x in xrange(res_list[i].count): res_list[i].res.append(res_list[i].gq.get(timeout=5)) self.assertEquals(len(res_list[0].res), 3) self.assertEquals(res_list[0].res[0].description, "1") self.assertEquals(len(res_list[1].res), 2) self.assertEquals(res_list[1].res[0].description, "1") self.assertEquals(len(res_list[2].res), 3) self.assertEquals(res_list[2].res[0].description, "1") self.assertEquals(len(res_list[3].res), 2) self.assertEquals(res_list[3].res[0].description, "2")