def test_execute_advanced_transform(self): # Runs a transform across L0-L2 with stream definitions including available fields streams = self.setup_advanced_transform() in_stream_id, in_stream_def_id = streams[0] out_stream_id, out_stream_defs_id = streams[1] validation_event = Event() def validator(msg, route, stream_id): rdt = RecordDictionaryTool.load_from_granule(msg) if not np.allclose(rdt['rho'], np.array([1001.0055034])): return validation_event.set() self.setup_validator(validator) in_route = self.pubsub_management.read_stream_route(in_stream_id) publisher = StandaloneStreamPublisher(in_stream_id, in_route) outbound_rdt = RecordDictionaryTool( stream_definition_id=in_stream_def_id) outbound_rdt['time'] = [0] outbound_rdt['TEMPWAT_L0'] = [280000] outbound_rdt['CONDWAT_L0'] = [100000] outbound_rdt['PRESWAT_L0'] = [2789] outbound_rdt['lat'] = [45] outbound_rdt['lon'] = [-71] outbound_granule = outbound_rdt.to_granule() publisher.publish(outbound_granule) self.assertTrue(validation_event.wait(2))
def test_serialize_compatability(self): ph = ParameterHelper(self.dataset_management, self.addCleanup) pdict_id = ph.create_extended_parsed() stream_def_id = self.pubsub_management.create_stream_definition('ctd extended', parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) stream_id, route = self.pubsub_management.create_stream('ctd1', 'xp1', stream_definition_id=stream_def_id) self.addCleanup(self.pubsub_management.delete_stream, stream_id) sub_id = self.pubsub_management.create_subscription('sub1', stream_ids=[stream_id]) self.addCleanup(self.pubsub_management.delete_subscription, sub_id) self.pubsub_management.activate_subscription(sub_id) self.addCleanup(self.pubsub_management.deactivate_subscription, sub_id) verified = Event() def verifier(msg, route, stream_id): for k,v in msg.record_dictionary.iteritems(): if v is not None: self.assertIsInstance(v, np.ndarray) rdt = RecordDictionaryTool.load_from_granule(msg) for k,v in rdt.iteritems(): self.assertIsInstance(rdt[k], np.ndarray) self.assertIsInstance(v, np.ndarray) verified.set() subscriber = StandaloneStreamSubscriber('sub1', callback=verifier) subscriber.start() self.addCleanup(subscriber.stop) publisher = StandaloneStreamPublisher(stream_id,route) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) ph.fill_rdt(rdt,10) publisher.publish(rdt.to_granule()) self.assertTrue(verified.wait(60))
def _publish_to_transform(self, stream_id = '', stream_route = None): pub = StandaloneStreamPublisher(stream_id, stream_route) publish_granule = self._get_new_ctd_L0_packet(stream_definition_id=self.in_stream_def_id_for_L0, length = 5) pub.publish(publish_granule) log.debug("Published the following granule: %s", publish_granule)
def test_ingestion_pause(self): ctd_stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset() ingestion_config_id = self.get_ingestion_config() self.start_ingestion(ctd_stream_id, dataset_id) self.addCleanup(self.stop_ingestion, ctd_stream_id) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = np.arange(10) publisher = StandaloneStreamPublisher(ctd_stream_id, route) monitor = DatasetMonitor(dataset_id) self.addCleanup(monitor.stop) publisher.publish(rdt.to_granule()) self.assertTrue(monitor.wait()) granule = self.data_retriever.retrieve(dataset_id) self.ingestion_management.pause_data_stream(ctd_stream_id, ingestion_config_id) monitor.event.clear() rdt['time'] = np.arange(10,20) publisher.publish(rdt.to_granule()) self.assertFalse(monitor.event.wait(1)) self.ingestion_management.resume_data_stream(ctd_stream_id, ingestion_config_id) self.assertTrue(monitor.wait()) granule = self.data_retriever.retrieve(dataset_id) rdt2 = RecordDictionaryTool.load_from_granule(granule) np.testing.assert_array_almost_equal(rdt2['time'], np.arange(20))
def set_configuration(self, config): log.warn("DRIVER: set_configuration") """ expect configuration to have: - parser module/class - directory, wildcard to find data files - optional timestamp of last granule - optional poll rate - publish info """ log.error("Log level: %s", log.getEffectiveLevel()) log.debug('using configuration: %s', config) self.config = config self.max_records = get_safe(config, 'max_records', 100) self.stream_config = self.CFG.get('stream_config', {}) if len(self.stream_config) == 1: stream_cfg = self.stream_config.values()[0] elif len(self.stream_config) > 1: stream_cfg = self.stream_config.values()[0] stream_id = stream_cfg['stream_id'] stream_route = IonObject(OT.StreamRoute, routing_key=stream_cfg['routing_key'], exchange_point=stream_cfg['exchange_point']) param_dict = stream_cfg['stream_def_dict']['parameter_dictionary'] self.publisher = StandaloneStreamPublisher(stream_id=stream_id, stream_route=stream_route) self.parameter_dictionary = ParameterDictionary.load(param_dict) self.time_field = self.parameter_dictionary.get_temporal_context() self.latest_granule_time = get_safe(config, 'last_time', 0)
def test_granule(self): pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) stream_def_id = self.pubsub_management.create_stream_definition('ctd', parameter_dictionary_id=pdict_id, stream_configuration={'reference_designator':"GA03FLMA-RI001-13-CTDMOG999"}) pdict = DatasetManagementService.get_parameter_dictionary_by_name('ctd_parsed_param_dict') self.addCleanup(self.pubsub_management.delete_stream_definition,stream_def_id) stream_id, route = self.pubsub_management.create_stream('ctd_stream', 'xp1', stream_definition_id=stream_def_id) self.addCleanup(self.pubsub_management.delete_stream,stream_id) publisher = StandaloneStreamPublisher(stream_id, route) subscriber = StandaloneStreamSubscriber('sub', self.verify_incoming) subscriber.start() self.addCleanup(subscriber.stop) subscription_id = self.pubsub_management.create_subscription('sub', stream_ids=[stream_id]) self.pubsub_management.activate_subscription(subscription_id) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = np.arange(10) rdt['temp'] = np.random.randn(10) * 10 + 30 rdt['pressure'] = [20] * 10 self.assertEquals(set(pdict.keys()), set(rdt.fields)) self.assertEquals(pdict.temporal_parameter_name, rdt.temporal_parameter) self.assertEquals(rdt._stream_config['reference_designator'],"GA03FLMA-RI001-13-CTDMOG999") self.rdt = rdt self.data_producer_id = 'data_producer' self.provider_metadata_update = {1:1} publisher.publish(rdt.to_granule(data_producer_id='data_producer', provider_metadata_update={1:1})) self.assertTrue(self.event.wait(10)) self.pubsub_management.deactivate_subscription(subscription_id) self.pubsub_management.delete_subscription(subscription_id) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = np.array([None,None,None]) self.assertTrue(rdt['time'] is None) rdt['time'] = np.array([None, 1, 2]) self.assertEquals(rdt['time'][0], rdt.fill_value('time')) stream_def_obj = self.pubsub_management.read_stream_definition(stream_def_id) rdt = RecordDictionaryTool(stream_definition=stream_def_obj) rdt['time'] = np.arange(20) rdt['temp'] = np.arange(20) granule = rdt.to_granule() rdt = RecordDictionaryTool.load_from_granule(granule) np.testing.assert_array_equal(rdt['time'], np.arange(20)) np.testing.assert_array_equal(rdt['temp'], np.arange(20))
def test_granule_publish(self): log.debug("test_granule_publish ") self.loggerpids = [] #retrieve the param dict from the repository pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict',id_only=True) stream_definition_id = self.pubsubclient.create_stream_definition('parsed stream', parameter_dictionary_id=pdict_id) dp_obj = IonObject(RT.DataProduct, name=str(uuid.uuid4()), description='ctd stream test') data_product_id1 = self.dpclient.create_data_product(data_product=dp_obj, stream_definition_id=stream_definition_id) # Retrieve the id of the output stream of the out Data Product stream_ids, _ = self.rrclient.find_objects(data_product_id1, PRED.hasStream, None, True) log.debug( 'test_granule_publish: Data product streams1 = %s', stream_ids) pid = self.create_logger('ctd_parsed', stream_ids[0] ) self.loggerpids.append(pid) rdt = RecordDictionaryTool(stream_definition_id=stream_definition_id) #create the publisher from the stream route stream_route = self.pubsubclient.read_stream_route(stream_ids[0]) publisher = StandaloneStreamPublisher(stream_ids[0], stream_route) # this is one sample from the ctd driver tomato = {"driver_timestamp": 3555971105.1268806, "instrument_id": "ABC-123", "pkt_format_id": "JSON_Data", "pkt_version": 1, "preferred_timestamp": "driver_timestamp", "quality_flag": "ok", "stream_name": "parsed", "values": [{"value": 22.9304, "value_id": "temp"}, {"value": 51.57381, "value_id": "conductivity"}, {"value": 915.551, "value_id": "pressure"}]} for value in tomato['values']: log.debug("test_granule_publish: Looping tomato values key: %s val: %s ", str(value['value']), str(value['value_id'])) if value['value_id'] in rdt: rdt[value['value_id']] = numpy.array( [ value['value'] ] ) log.debug("test_granule_publish: Added data item %s val: %s ", str(value['value']), str(value['value_id']) ) g = rdt.to_granule() publisher.publish(g) gevent.sleep(3) for pid in self.loggerpids: self.processdispatchclient.cancel_process(pid) #-------------------------------------------------------------------------------- # Cleanup data products #-------------------------------------------------------------------------------- dp_ids, _ = self.rrclient.find_resources(restype=RT.DataProduct, id_only=True) for dp_id in dp_ids: self.dataproductclient.delete_data_product(dp_id)
def publish_and_wait(self, dataset_id, granule): stream_ids, _ = self.resource_registry.find_objects(dataset_id, PRED.hasStream,id_only=True) stream_id=stream_ids[0] route = self.pubsub_management.read_stream_route(stream_id) publisher = StandaloneStreamPublisher(stream_id,route) dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) publisher.publish(granule) self.assertTrue(dataset_monitor.wait())
def test_coverage_transform(self): ph = ParameterHelper(self.dataset_management, self.addCleanup) pdict_id = ph.create_parsed() stream_def_id = self.pubsub_management.create_stream_definition( 'ctd parsed', parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) stream_id, route = self.pubsub_management.create_stream( 'example', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id) self.addCleanup(self.pubsub_management.delete_stream, stream_id) ingestion_config_id = self.get_ingestion_config() dataset_id = self.create_dataset(pdict_id) self.ingestion_management.persist_data_stream( stream_id=stream_id, ingestion_configuration_id=ingestion_config_id, dataset_id=dataset_id) self.addCleanup(self.ingestion_management.unpersist_data_stream, stream_id, ingestion_config_id) publisher = StandaloneStreamPublisher(stream_id, route) rdt = ph.get_rdt(stream_def_id) ph.fill_parsed_rdt(rdt) dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) publisher.publish(rdt.to_granule()) self.assertTrue(dataset_monitor.event.wait(30)) replay_granule = self.data_retriever.retrieve(dataset_id) rdt_out = RecordDictionaryTool.load_from_granule(replay_granule) np.testing.assert_array_almost_equal(rdt_out['time'], rdt['time']) np.testing.assert_array_almost_equal(rdt_out['temp'], rdt['temp']) np.testing.assert_array_almost_equal(rdt_out['conductivity_L1'], np.array([42.914])) np.testing.assert_array_almost_equal(rdt_out['temp_L1'], np.array([20.])) np.testing.assert_array_almost_equal(rdt_out['pressure_L1'], np.array([3.068])) np.testing.assert_array_almost_equal( rdt_out['density'], np.array([1021.7144739593881], dtype='float32')) np.testing.assert_array_almost_equal( rdt_out['salinity'], np.array([30.935132729668283], dtype='float32'))
def publish_hifi(self,stream_id,stream_route,offset=0): ''' Publish deterministic data ''' pub = StandaloneStreamPublisher(stream_id, stream_route) stream_def = self.pubsub_management.read_stream_definition(stream_id=stream_id) stream_def_id = stream_def._id rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = np.arange(10) + (offset * 10) rdt['temp'] = np.arange(10) + (offset * 10) pub.publish(rdt.to_granule())
def test_move_activated_subscription(self): stream_id, route = self.pubsub_management.create_stream( name='test_stream', exchange_point='test_xp') #-------------------------------------------------------------------------------- # Test moving after activate #-------------------------------------------------------------------------------- subscription_id = self.pubsub_management.create_subscription( 'first_queue', stream_ids=[stream_id]) self.pubsub_management.activate_subscription(subscription_id) xn_ids, _ = self.resource_registry.find_resources( restype=RT.ExchangeName, name='first_queue', id_only=True) subjects, _ = self.resource_registry.find_subjects( object=subscription_id, predicate=PRED.hasSubscription, id_only=True) self.assertEquals(xn_ids[0], subjects[0]) self.verified = Event() def verify(m, r, s): self.assertEquals(m, 'verified') self.verified.set() subscriber = StandaloneStreamSubscriber('second_queue', verify) subscriber.start() self.pubsub_management.move_subscription(subscription_id, exchange_name='second_queue') xn_ids, _ = self.resource_registry.find_resources( restype=RT.ExchangeName, name='second_queue', id_only=True) subjects, _ = self.resource_registry.find_subjects( object=subscription_id, predicate=PRED.hasSubscription, id_only=True) self.assertEquals(len(subjects), 1) self.assertEquals(subjects[0], xn_ids[0]) publisher = StandaloneStreamPublisher(stream_id, route) publisher.publish('verified') self.assertTrue(self.verified.wait(2)) self.pubsub_management.deactivate_subscription(subscription_id) self.pubsub_management.delete_subscription(subscription_id) self.pubsub_management.delete_stream(stream_id)
def test_demux(self): self.stream0, self.route0 = self.pubsub_client.create_stream( 'stream0', exchange_point='test') self.stream1, self.route1 = self.pubsub_client.create_stream( 'stream1', exchange_point='main_data') self.stream2, self.route2 = self.pubsub_client.create_stream( 'stream2', exchange_point='alt_data') self.r_stream1 = gevent.event.Event() self.r_stream2 = gevent.event.Event() def process(msg, stream_route, stream_id): if stream_id == self.stream1: self.r_stream1.set() elif stream_id == self.stream2: self.r_stream2.set() self.container.spawn_process( 'demuxer', 'ion.processes.data.transforms.mux', 'DemuxTransform', {'process': { 'out_streams': [self.stream1, self.stream2] }}, 'demuxer_pid') self.queue_cleanup.append('demuxer_pid') sub1 = StandaloneStreamSubscriber('sub1', process) sub2 = StandaloneStreamSubscriber('sub2', process) sub1.xn.bind(self.route1.routing_key, self.container.ex_manager.create_xp('main_data')) sub2.xn.bind(self.route2.routing_key, self.container.ex_manager.create_xp('alt_data')) sub1.start() sub2.start() self.queue_cleanup.append(sub1.xn) self.queue_cleanup.append(sub2.xn) xn = self.container.ex_manager.create_xn_queue('demuxer_pid') xn.bind( self.route0.routing_key, self.container.ex_manager.create_xp(self.route0.exchange_point)) domino = StandaloneStreamPublisher(self.stream0, self.route0) domino.publish('test') self.assertTrue(self.r_stream1.wait(2)) self.assertTrue(self.r_stream2.wait(2)) self.container.proc_manager.terminate_process('demuxer_pid') sub1.stop() sub2.stop()
def write_to_data_product(self, data_product_id): dataset_ids, _ = self.resource_registry.find_objects(data_product_id, 'hasDataset', id_only=True) dataset_id = dataset_ids.pop() stream_ids, _ = self.resource_registry.find_objects(data_product_id, 'hasStream', id_only=True) stream_id = stream_ids.pop() stream_def_ids, _ = self.resource_registry.find_objects( stream_id, 'hasStreamDefinition', id_only=True) stream_def_id = stream_def_ids.pop() route = self.pubsub_management.read_stream_route(stream_id) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) time_param = rdt._pdict.temporal_parameter_name if time_param is None: print '%s has no temporal parameter' % self.resource_registry.read( data_product_id).name return rdt[time_param] = np.arange(40) for field in rdt.fields: if field == rdt._pdict.temporal_parameter_name: continue rdt[field] = self.fill_values( rdt._pdict.get_context(field).param_type, 40) publisher = StandaloneStreamPublisher(stream_id, route) publisher.publish(rdt.to_granule()) self.wait_until_we_have_enough_granules(dataset_id, 40) granule = self.data_retriever.retrieve(dataset_id) rdt_out = RecordDictionaryTool.load_from_granule(granule) bad = [] for field in rdt.fields: if not np.array_equal(rdt[field], rdt_out[field]): print '%s' % field print '%s != %s' % (rdt[field], rdt_out[field]) bad.append(field) return bad
def test_activation_and_deactivation(self): stream_id, route = self.pubsub_management.create_stream( 'stream1', 'xp1') subscription_id = self.pubsub_management.create_subscription( 'sub1', stream_ids=[stream_id]) self.check1 = Event() def verifier(m, r, s): self.check1.set() subscriber = StandaloneStreamSubscriber('sub1', verifier) subscriber.start() publisher = StandaloneStreamPublisher(stream_id, route) publisher.publish('should not receive') self.assertFalse(self.check1.wait(0.25)) self.pubsub_management.activate_subscription(subscription_id) publisher.publish('should receive') self.assertTrue(self.check1.wait(2)) self.check1.clear() self.assertFalse(self.check1.is_set()) self.pubsub_management.deactivate_subscription(subscription_id) publisher.publish('should not receive') self.assertFalse(self.check1.wait(0.5)) self.pubsub_management.activate_subscription(subscription_id) publisher.publish('should receive') self.assertTrue(self.check1.wait(2)) subscriber.stop() self.pubsub_management.deactivate_subscription(subscription_id) self.pubsub_management.delete_subscription(subscription_id) self.pubsub_management.delete_stream(stream_id)
def _publish_granules(self, stream_id=None, stream_route=None, values = None,number=None): pub = StandaloneStreamPublisher(stream_id, stream_route) stream_def = self.pubsub_management.read_stream_definition(stream_id=stream_id) stream_def_id = stream_def._id rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) times = numpy.array([number for l in xrange(self.length)]) for i in xrange(number): rdt['input_voltage'] = values rdt['preferred_timestamp'] = ['time' for l in xrange(len(times))] rdt['time'] = times g = rdt.to_granule() g.data_producer_id = 'instrument_1' log.debug("granule #%s published by instrument:: %s" % ( number,g)) pub.publish(g)
def test_retrieve_and_transform(self): # Make a simple dataset and start ingestion, pretty standard stuff. ctd_stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset( ) self.start_ingestion(ctd_stream_id, dataset_id) # Stream definition for the salinity data salinity_pdict_id = self.dataset_management.read_parameter_dictionary_by_name( 'ctd_parsed_param_dict', id_only=True) sal_stream_def_id = self.pubsub_management.create_stream_definition( 'sal data', parameter_dictionary_id=salinity_pdict_id) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = np.arange(10) rdt['temp'] = np.random.randn(10) * 10 + 30 rdt['conductivity'] = np.random.randn(10) * 2 + 10 rdt['pressure'] = np.random.randn(10) * 1 + 12 publisher = StandaloneStreamPublisher(ctd_stream_id, route) publisher.publish(rdt.to_granule()) rdt['time'] = np.arange(10, 20) publisher.publish(rdt.to_granule()) self.wait_until_we_have_enough_granules(dataset_id, 20) granule = self.data_retriever.retrieve( dataset_id, None, None, 'ion.processes.data.transforms.ctd.ctd_L2_salinity', 'CTDL2SalinityTransformAlgorithm', kwargs=dict(params=sal_stream_def_id)) rdt = RecordDictionaryTool.load_from_granule(granule) for i in rdt['salinity']: self.assertNotEquals(i, 0) self.streams.append(ctd_stream_id) self.stop_ingestion(ctd_stream_id)
def test_execute_transform(self): streams = self.setup_transform() in_stream_id, in_stream_def_id = streams[0] out_stream_id, out_stream_def_id = streams[1] validation_event = Event() def validator(msg, route, stream_id): rdt = RecordDictionaryTool.load_from_granule(msg) if not np.allclose(rdt['TEMPWAT_L1'], np.array([18.])): return if not np.allclose(rdt['CONDWAT_L1'], np.array([0.5])): return if not np.allclose(rdt['PRESWAT_L1'], np.array([0.04536611])): return validation_event.set() self.setup_validator(validator) in_route = self.pubsub_management.read_stream_route(in_stream_id) publisher = StandaloneStreamPublisher(in_stream_id, in_route) outbound_rdt = RecordDictionaryTool( stream_definition_id=in_stream_def_id) outbound_rdt['time'] = [0] outbound_rdt['TEMPWAT_L0'] = [280000] outbound_rdt['CONDWAT_L0'] = [100000] outbound_rdt['PRESWAT_L0'] = [2789] outbound_rdt['lat'] = [45] outbound_rdt['lon'] = [-71] outbound_granule = outbound_rdt.to_granule() publisher.publish(outbound_granule) self.assertTrue(validation_event.wait(2))
def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') self.dataset_management_client = DatasetManagementServiceClient( node=self.container.node) self.pubsub_client = PubsubManagementServiceClient( node=self.container.node) self.time_dom, self.spatial_dom = time_series_domain() self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name( name='ctd_parsed_param_dict', id_only=True) self.stream_def_id = self.pubsub_client.create_stream_definition( name='stream_def', parameter_dictionary_id=self.parameter_dict_id) self.addCleanup(self.pubsub_client.delete_stream_definition, self.stream_def_id) self.stream_id, self.route_id = self.pubsub_client.create_stream( name='parsed_stream', stream_definition_id=self.stream_def_id, exchange_point='science_data') self.addCleanup(self.pubsub_client.delete_stream, self.stream_id) self.subscription_id = self.pubsub_client.create_subscription( name='parsed_subscription', stream_ids=[self.stream_id], exchange_name='parsed_subscription') self.addCleanup(self.pubsub_client.delete_subscription, self.subscription_id) self.pubsub_client.activate_subscription(self.subscription_id) self.addCleanup(self.pubsub_client.deactivate_subscription, self.subscription_id) self.publisher = StandaloneStreamPublisher(self.stream_id, self.route_id)
def test_ctd_L2_salinity(self): ''' Test that packets are processed by the ctd_L1_salinity transform ''' #--------------------------------------------------------------------------------------------- # Launch a ctd transform #--------------------------------------------------------------------------------------------- # Create the process definition process_definition = ProcessDefinition( name='SalinityTransform', description='For testing SalinityTransform') process_definition.executable[ 'module'] = 'ion.processes.data.transforms.ctd.ctd_L2_salinity' process_definition.executable['class'] = 'SalinityTransform' ctd_transform_proc_def_id = self.process_dispatcher.create_process_definition( process_definition=process_definition) # Build the config config = DotDict() config.process.queue_name = self.exchange_name config.process.exchange_point = self.exchange_point pdict_id = self.dataset_management.read_parameter_dictionary_by_name( 'ctd_parsed_param_dict', id_only=True) stream_def_id = self.pubsub.create_stream_definition( 'sal_stream_def', parameter_dictionary_id=pdict_id) sal_stream_id, _ = self.pubsub.create_stream( 'test_salinity', stream_definition_id=stream_def_id, exchange_point='science_data') config.process.publish_streams.salinity = sal_stream_id # Schedule the process self.process_dispatcher.schedule_process( process_definition_id=ctd_transform_proc_def_id, configuration=config) #--------------------------------------------------------------------------------------------- # Create a subscriber that will receive the salinity granule from the ctd transform #--------------------------------------------------------------------------------------------- ar_sal = gevent.event.AsyncResult() def subscriber3(m, r, s): ar_sal.set(m) sub_sal = StandaloneStreamSubscriber('sub_sal', subscriber3) self.addCleanup(sub_sal.stop) sub_sal_id = self.pubsub.create_subscription( 'subscription_sal', stream_ids=[sal_stream_id], exchange_name='sub_sal') self.pubsub.activate_subscription(sub_sal_id) self.queue_cleanup.append(sub_sal.xn.queue) sub_sal.start() #------------------------------------------------------------------------------------------------------ # Use a StandaloneStreamPublisher to publish a packet that can be then picked up by a ctd transform #------------------------------------------------------------------------------------------------------ # Do all the routing stuff for the publishing routing_key = 'stream_id.stream' stream_route = StreamRoute(self.exchange_point, routing_key) xn = self.container.ex_manager.create_xn_queue(self.exchange_name) xp = self.container.ex_manager.create_xp(self.exchange_point) xn.bind('stream_id.stream', xp) pub = StandaloneStreamPublisher('stream_id', stream_route) # Build a packet that can be published publish_granule = self._get_new_ctd_packet( stream_definition_id=stream_def_id, length=5) # Publish the packet pub.publish(publish_granule) #------------------------------------------------------------------------------------------------------ # Make assertions about whether the ctd transform executed its algorithm and published the correct # granules #------------------------------------------------------------------------------------------------------ # Get the granule that is published by the ctd transform post processing result = ar_sal.get(timeout=10) self.assertTrue(isinstance(result, Granule)) rdt = RecordDictionaryTool.load_from_granule(result) self.assertTrue(rdt.__contains__('salinity')) self.check_salinity_algorithm_execution(publish_granule, result)
def test_lookup_values_ingest_replay(self): ph = ParameterHelper(self.dataset_management, self.addCleanup) pdict_id = ph.create_lookups() stream_def_id = self.pubsub_management.create_stream_definition( 'lookups', parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) stream_id, route = self.pubsub_management.create_stream( 'example', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id) self.addCleanup(self.pubsub_management.delete_stream, stream_id) ingestion_config_id = self.get_ingestion_config() dataset_id = self.create_dataset(pdict_id) config = DotDict() config.process.lookup_docs = ['test1', 'test2'] self.ingestion_management.persist_data_stream( stream_id=stream_id, ingestion_configuration_id=ingestion_config_id, dataset_id=dataset_id, config=config) self.addCleanup(self.ingestion_management.unpersist_data_stream, stream_id, ingestion_config_id) stored_value_manager = StoredValueManager(self.container) stored_value_manager.stored_value_cas('test1', { 'offset_a': 10.0, 'offset_b': 13.1 }) publisher = StandaloneStreamPublisher(stream_id, route) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = np.arange(20) rdt['temp'] = [20.0] * 20 granule = rdt.to_granule() dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) publisher.publish(granule) self.assertTrue(dataset_monitor.event.wait(30)) replay_granule = self.data_retriever.retrieve(dataset_id) rdt_out = RecordDictionaryTool.load_from_granule(replay_granule) np.testing.assert_array_almost_equal(rdt_out['time'], np.arange(20)) np.testing.assert_array_almost_equal(rdt_out['temp'], np.array([20.] * 20)) np.testing.assert_array_almost_equal(rdt_out['calibrated'], np.array([30.] * 20)) np.testing.assert_array_equal( rdt_out['offset_b'], np.array([rdt_out.fill_value('offset_b')] * 20)) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = np.arange(20, 40) rdt['temp'] = [20.0] * 20 granule = rdt.to_granule() dataset_monitor.event.clear() stored_value_manager.stored_value_cas('test1', {'offset_a': 20.0}) stored_value_manager.stored_value_cas('coefficient_document', {'offset_b': 10.0}) gevent.sleep(2) publisher.publish(granule) self.assertTrue(dataset_monitor.event.wait(30)) replay_granule = self.data_retriever.retrieve(dataset_id) rdt_out = RecordDictionaryTool.load_from_granule(replay_granule) np.testing.assert_array_almost_equal(rdt_out['time'], np.arange(40)) np.testing.assert_array_almost_equal(rdt_out['temp'], np.array([20.] * 20 + [20.] * 20)) np.testing.assert_array_equal(rdt_out['offset_b'], np.array([10.] * 40)) np.testing.assert_array_almost_equal(rdt_out['calibrated'], np.array([30.] * 20 + [40.] * 20)) np.testing.assert_array_almost_equal(rdt_out['calibrated_b'], np.array([40.] * 20 + [50.] * 20))
def test_ctdbp_L0_all(self): """ Test packets processed by the ctdbp_L0_all transform """ #----------- Data Process Definition -------------------------------- dpd_obj = IonObject( RT.DataProcessDefinition, name='CTDBP_L0_all', description= 'Take parsed stream and put the C, T and P into three separate L0 streams.', module='ion.processes.data.transforms.ctdbp.ctdbp_L0', class_name='CTDBP_L0_all') dprocdef_id = self.data_process_management.create_data_process_definition( dpd_obj) self.addCleanup( self.data_process_management.delete_data_process_definition, dprocdef_id) log.debug("created data process definition: id = %s", dprocdef_id) #----------- Data Products -------------------------------- # Construct temporal and spatial Coordinate Reference System objects tdom, sdom = time_series_domain() sdom = sdom.dump() tdom = tdom.dump() input_param_dict = self._create_input_param_dict_for_test( parameter_dict_name='fictitious_ctdp_param_dict') # Get the stream definition for the stream using the parameter dictionary # input_param_dict = self.dataset_management.read_parameter_dictionary_by_name('ctdbp_cdef_sample', id_only=True) input_stream_def_dict = self.pubsub.create_stream_definition( name='parsed', parameter_dictionary_id=input_param_dict) self.addCleanup(self.pubsub.delete_stream_definition, input_stream_def_dict) log.debug("Got the parsed parameter dictionary: id: %s", input_param_dict) log.debug("Got the stream def for parsed input: %s", input_stream_def_dict) # Input data product parsed_stream_dp_obj = IonObject( RT.DataProduct, name='parsed_stream', description='Parsed stream input to CTBP L0 transform', temporal_domain=tdom, spatial_domain=sdom) input_dp_id = self.dataproduct_management.create_data_product( data_product=parsed_stream_dp_obj, stream_definition_id=input_stream_def_dict) self.addCleanup(self.dataproduct_management.delete_data_product, input_dp_id) # output data product L0_stream_dp_obj = IonObject( RT.DataProduct, name='L0_stream', description='L0_stream output of CTBP L0 transform', temporal_domain=tdom, spatial_domain=sdom) L0_stream_dp_id = self.dataproduct_management.create_data_product( data_product=L0_stream_dp_obj, stream_definition_id=input_stream_def_dict) self.addCleanup(self.dataproduct_management.delete_data_product, L0_stream_dp_id) # We need the key name here to be "L0_stream", since when the data process is launched, this name goes into # the config as in config.process.publish_streams.L0_stream when the config is used to launch the data process out_stream_ids, _ = self.resource_registry.find_objects( L0_stream_dp_id, PRED.hasStream, RT.Stream, True) self.assertTrue(len(out_stream_ids)) output_stream_id = out_stream_ids[0] dproc_id = self.data_process_management.create_data_process( data_process_definition_id=dprocdef_id, in_data_product_ids=[input_dp_id], out_data_product_ids=[L0_stream_dp_id], configuration=None) self.addCleanup(self.data_process_management.delete_data_process, dproc_id) log.debug("Created a data process for ctdbp_L0. id: %s", dproc_id) # Activate the data process self.data_process_management.activate_data_process(dproc_id) self.addCleanup(self.data_process_management.deactivate_data_process, dproc_id) #----------- Find the stream that is associated with the input data product when it was created by create_data_product() -------------------------------- stream_ids, _ = self.resource_registry.find_objects( input_dp_id, PRED.hasStream, RT.Stream, True) self.assertTrue(len(stream_ids)) input_stream_id = stream_ids[0] stream_route = self.pubsub.read_stream_route(input_stream_id) log.debug("The input stream for the L0 transform: %s", input_stream_id) #----------- Create a subscriber that will listen to the transform's output -------------------------------- ar = gevent.event.AsyncResult() def subscriber(m, r, s): ar.set(m) sub = StandaloneStreamSubscriber(exchange_name='sub', callback=subscriber) sub_id = self.pubsub.create_subscription('subscriber_to_transform', stream_ids=[output_stream_id], exchange_name='sub') self.addCleanup(self.pubsub.delete_subscription, sub_id) self.pubsub.activate_subscription(sub_id) self.addCleanup(self.pubsub.deactivate_subscription, sub_id) sub.start() self.addCleanup(sub.stop) #----------- Publish on that stream so that the transform can receive it -------------------------------- pub = StandaloneStreamPublisher(input_stream_id, stream_route) publish_granule = self._get_new_ctd_packet( stream_definition_id=input_stream_def_dict, length=5) pub.publish(publish_granule) log.debug("Published the following granule: %s", publish_granule) granule_from_transform = ar.get(timeout=20) log.debug("Got the following granule from the transform: %s", granule_from_transform) # Check that the granule published by the L0 transform has the right properties self._check_granule_from_transform(granule_from_transform)
def test_sparse_values(self): ph = ParameterHelper(self.dataset_management, self.addCleanup) pdict_id = ph.create_sparse() stream_def_id = self.pubsub_management.create_stream_definition( 'sparse', parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) stream_id, route = self.pubsub_management.create_stream( 'example', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id) self.addCleanup(self.pubsub_management.delete_stream, stream_id) dataset_id = self.create_dataset(pdict_id) self.start_ingestion(stream_id, dataset_id) self.addCleanup(self.stop_ingestion, stream_id) ntp_now = time.time() + 2208988800 rdt = ph.get_rdt(stream_def_id) rdt['time'] = [ntp_now] rdt['internal_timestamp'] = [ntp_now] rdt['temp'] = [300000] rdt['preferred_timestamp'] = ['driver_timestamp'] rdt['port_timestamp'] = [ntp_now] rdt['quality_flag'] = [None] rdt['lat'] = [45] rdt['conductivity'] = [4341400] rdt['driver_timestamp'] = [ntp_now] rdt['lon'] = [-71] rdt['pressure'] = [256.8] publisher = StandaloneStreamPublisher(stream_id, route) dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) publisher.publish(rdt.to_granule()) self.assertTrue(dataset_monitor.event.wait(30)) dataset_monitor.event.clear() replay_granule = self.data_retriever.retrieve(dataset_id) rdt_out = RecordDictionaryTool.load_from_granule(replay_granule) np.testing.assert_array_almost_equal(rdt_out['time'], rdt['time']) np.testing.assert_array_almost_equal(rdt_out['temp'], rdt['temp']) np.testing.assert_array_almost_equal(rdt_out['lat'], np.array([45])) np.testing.assert_array_almost_equal(rdt_out['lon'], np.array([-71])) np.testing.assert_array_almost_equal(rdt_out['conductivity_L1'], np.array([42.914])) np.testing.assert_array_almost_equal(rdt_out['temp_L1'], np.array([20.])) np.testing.assert_array_almost_equal(rdt_out['pressure_L1'], np.array([3.068])) np.testing.assert_array_almost_equal( rdt_out['density'], np.array([1021.7144739593881], dtype='float32')) np.testing.assert_array_almost_equal( rdt_out['salinity'], np.array([30.935132729668283], dtype='float32')) rdt = ph.get_rdt(stream_def_id) rdt['lat'] = [46] rdt['lon'] = [-73] publisher.publish(rdt.to_granule()) self.assertTrue(dataset_monitor.event.wait(30)) dataset_monitor.event.clear() rdt = ph.get_rdt(stream_def_id) rdt['lat'] = [1000] rdt['lon'] = [3] publisher.publish(rdt.to_granule()) rdt = ph.get_rdt(stream_def_id) rdt['time'] = [ntp_now] rdt['internal_timestamp'] = [ntp_now] rdt['temp'] = [300000] rdt['preferred_timestamp'] = ['driver_timestamp'] rdt['port_timestamp'] = [ntp_now] rdt['quality_flag'] = [None] rdt['conductivity'] = [4341400] rdt['driver_timestamp'] = [ntp_now] rdt['pressure'] = [256.8] publisher.publish(rdt.to_granule()) self.assertTrue(dataset_monitor.event.wait(30)) dataset_monitor.event.clear() replay_granule = self.data_retriever.retrieve(dataset_id) rdt_out = RecordDictionaryTool.load_from_granule(replay_granule) np.testing.assert_array_almost_equal(rdt_out['lat'], np.array([45, 46])) np.testing.assert_array_almost_equal(rdt_out['lon'], np.array([-71, -73]))
def test_transform_worker(self): # test that a data process (type: data-product-in / data-product-out) can be defined and launched. # verify that the output granule fields are correctly populated # test that the input and output data products are linked to facilitate provenance self.dp_list = [] self.data_process_objs = [] self._output_stream_ids = [] self.granule_verified = Event() self.worker_assigned_event_verified = Event() self.dp_created_event_verified = Event() self.heartbeat_event_verified = Event() self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name( name='ctd_parsed_param_dict', id_only=True) # create the StreamDefinition self.stream_def_id = self.pubsub_client.create_stream_definition( name='stream_def', parameter_dictionary_id=self.parameter_dict_id) self.addCleanup(self.pubsub_client.delete_stream_definition, self.stream_def_id) # create the DataProduct that is the input to the data processes input_dp_obj = IonObject(RT.DataProduct, name='input_data_product', description='input test stream') self.input_dp_id = self.dataproductclient.create_data_product( data_product=input_dp_obj, stream_definition_id=self.stream_def_id) # retrieve the Stream for this data product stream_ids, assoc_ids = self.rrclient.find_objects( self.input_dp_id, PRED.hasStream, RT.Stream, True) self.stream_id = stream_ids[0] self.start_event_listener() # create the DPD, DataProcess and output DataProduct dataprocessdef_id, dataprocess_id, dataproduct_id = self.create_data_process( ) self.dp_list.append(dataprocess_id) # validate the repository for data product algorithms persists the new resources NEW SA-1 # create_data_process call created one of each dpd_ids, _ = self.rrclient.find_resources( restype=OT.DataProcessDefinition, id_only=False) # there will be more than one becuase of the DPDs that reperesent the PFs in the data product above self.assertTrue(dpd_ids is not None) dp_ids, _ = self.rrclient.find_resources(restype=OT.DataProcess, id_only=False) # only one DP becuase the PFs that are in the code dataproduct above are not activated yet. self.assertEquals(len(dp_ids), 1) # validate the name and version label NEW SA - 2 dataprocessdef_obj = self.dataprocessclient.read_data_process_definition( dataprocessdef_id) self.assertEqual(dataprocessdef_obj.version_label, '1.0a') self.assertEqual(dataprocessdef_obj.name, 'add_arrays') # validate that the DPD has an attachment NEW SA - 21 attachment_ids, assoc_ids = self.rrclient.find_objects( dataprocessdef_id, PRED.hasAttachment, RT.Attachment, True) self.assertEqual(len(attachment_ids), 1) attachment_obj = self.rrclient.read_attachment(attachment_ids[0]) log.debug('attachment: %s', attachment_obj) # validate that the data process resource has input and output data products associated # L4-CI-SA-RQ-364 and NEW SA-3 outproduct_ids, assoc_ids = self.rrclient.find_objects( dataprocess_id, PRED.hasOutputProduct, RT.DataProduct, True) self.assertEqual(len(outproduct_ids), 1) inproduct_ids, assoc_ids = self.rrclient.find_objects( dataprocess_id, PRED.hasInputProduct, RT.DataProduct, True) self.assertEqual(len(inproduct_ids), 1) # Test for provenance. Get Data product produced by the data processes output_data_product_id, _ = self.rrclient.find_objects( subject=dataprocess_id, object_type=RT.DataProduct, predicate=PRED.hasOutputProduct, id_only=True) output_data_product_provenance = self.dataproductclient.get_data_product_provenance( output_data_product_id[0]) # Do a basic check to see if there were 3 entries in the provenance graph. Parent and Child and the # DataProcessDefinition creating the child from the parent. self.assertTrue(len(output_data_product_provenance) == 2) self.assertTrue(self.input_dp_id in output_data_product_provenance[ output_data_product_id[0]]['parents']) self.assertTrue(output_data_product_provenance[ output_data_product_id[0]]['parents'][self.input_dp_id] ['data_process_definition_id'] == dataprocessdef_id) # NEW SA - 4 | Data processing shall include the appropriate data product algorithm name and version number in # the metadata of each output data product created by the data product algorithm. output_data_product_obj, _ = self.rrclient.find_objects( subject=dataprocess_id, object_type=RT.DataProduct, predicate=PRED.hasOutputProduct, id_only=False) self.assertTrue(output_data_product_obj[0].name != None) self.assertTrue(output_data_product_obj[0]._rev != None) # retrieve subscription from data process subscription_objs, _ = self.rrclient.find_objects( subject=dataprocess_id, predicate=PRED.hasSubscription, object_type=RT.Subscription, id_only=False) log.debug('test_transform_worker subscription_obj: %s', subscription_objs[0]) # create a queue to catch the published granules self.subscription_id = self.pubsub_client.create_subscription( name='parsed_subscription', stream_ids=[self.stream_id], exchange_name=subscription_objs[0].exchange_name) self.addCleanup(self.pubsub_client.delete_subscription, self.subscription_id) self.pubsub_client.activate_subscription(self.subscription_id) self.addCleanup(self.pubsub_client.deactivate_subscription, self.subscription_id) stream_route = self.pubsub_client.read_stream_route(self.stream_id) self.publisher = StandaloneStreamPublisher(stream_id=self.stream_id, stream_route=stream_route) for n in range(1, 101): rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id) rdt['time'] = [0] # time should always come first rdt['conductivity'] = [1] rdt['pressure'] = [2] rdt['salinity'] = [8] self.publisher.publish(rdt.to_granule()) # validate that the output granule is received and the updated value is correct self.assertTrue(self.granule_verified.wait(self.wait_time)) # validate that the data process loaded into worker event is received (L4-CI-SA-RQ-182) self.assertTrue( self.worker_assigned_event_verified.wait(self.wait_time)) # validate that the data process create (with data product ids) event is received (NEW SA -42) self.assertTrue(self.dp_created_event_verified.wait(self.wait_time)) # validate that the data process heartbeat event is received (for every hundred granules processed) (L4-CI-SA-RQ-182) #this takes a while so set wait limit to large value self.assertTrue(self.heartbeat_event_verified.wait(200)) # validate that the code from the transform function can be retrieve via inspect_data_process_definition src = self.dataprocessclient.inspect_data_process_definition( dataprocessdef_id) self.assertIn('def add_arrays(a, b)', src) # now delete the DPD and DP then verify that the resources are retired so that information required for provenance are still available self.dataprocessclient.delete_data_process(dataprocess_id) self.dataprocessclient.delete_data_process_definition( dataprocessdef_id) in_dp_objs, _ = self.rrclient.find_objects( subject=dataprocess_id, predicate=PRED.hasInputProduct, object_type=RT.DataProduct, id_only=True) self.assertTrue(in_dp_objs is not None) dpd_objs, _ = self.rrclient.find_subjects( subject_type=RT.DataProcessDefinition, predicate=PRED.hasDataProcess, object=dataprocess_id, id_only=True) self.assertTrue(dpd_objs is not None)
def test_stream_processing(self): #-------------------------------------------------------------------------------- #Test that streams are processed by the transforms according to a provided algorithm #-------------------------------------------------------------------------------- #todo: In this simple implementation, we are checking if the stream has the word, PUBLISH, #todo(contd) and if the word VALUE=<number> exists and that number is less than something #todo later on we are going to use complex algorithms to make this prototype powerful #------------------------------------------------------------------------------------- # Start a subscriber to listen for an alert event from the Stream Alert Transform #------------------------------------------------------------------------------------- queue = gevent.queue.Queue() def event_received(message, headers): queue.put(message) event_subscriber = EventSubscriber( origin="StreamAlertTransform", event_type="DeviceEvent", callback=event_received) event_subscriber.start() self.addCleanup(event_subscriber.stop) #------------------------------------------------------------------------------------- # The configuration for the Stream Alert Transform... set up the event types to listen to #------------------------------------------------------------------------------------- config = { 'process':{ 'queue_name': 'a_queue', 'value': 10, 'event_type':'DeviceEvent' } } #------------------------------------------------------------------------------------- # Create the process #------------------------------------------------------------------------------------- pid = TransformPrototypeIntTest.create_process( name= 'transform_data_process', module='ion.processes.data.transforms.event_alert_transform', class_name='StreamAlertTransform', configuration= config) self.addCleanup(self.process_dispatcher.cancel_process, pid) self.assertIsNotNone(pid) #------------------------------------------------------------------------------------- # Publish streams and make assertions about alerts #------------------------------------------------------------------------------------- exchange_name = 'a_queue' exchange_point = 'test_exchange' routing_key = 'stream_id.stream' stream_route = StreamRoute(exchange_point, routing_key) xn = self.container.ex_manager.create_xn_queue(exchange_name) xp = self.container.ex_manager.create_xp(exchange_point) xn.bind('stream_id.stream', xp) pub = StandaloneStreamPublisher('stream_id', stream_route) message = "A dummy example message containing the word PUBLISH, and with VALUE = 5 . This message" +\ " will trigger an alert event from the StreamAlertTransform because the value provided is "\ "less than 10 that was passed in through the config." pub.publish(message) event = queue.get(timeout=10) self.assertEquals(event.type_, "DeviceEvent") self.assertEquals(event.origin, "StreamAlertTransform")
def test_multi_subscriptions(self): self.dp_list = [] self.event1_verified = Event() self.event2_verified = Event() self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name( name='ctd_parsed_param_dict', id_only=True) # create the StreamDefinition self.stream_def_id = self.pubsub_client.create_stream_definition( name='stream_def', parameter_dictionary_id=self.parameter_dict_id) self.addCleanup(self.pubsub_client.delete_stream_definition, self.stream_def_id) # create the DataProduct input_dp_obj = IonObject(RT.DataProduct, name='input_data_product_one', description='input test stream one') self.input_dp_one_id = self.dataproductclient.create_data_product( data_product=input_dp_obj, stream_definition_id=self.stream_def_id) input_dp_obj = IonObject(RT.DataProduct, name='input_data_product_two', description='input test stream two') self.input_dp_two_id = self.dataproductclient.create_data_product( data_product=input_dp_obj, stream_definition_id=self.stream_def_id) #retrieve the Stream for this data product stream_ids, assoc_ids = self.rrclient.find_objects( self.input_dp_one_id, PRED.hasStream, RT.Stream, True) self.stream_one_id = stream_ids[0] stream_ids, assoc_ids = self.rrclient.find_objects( self.input_dp_two_id, PRED.hasStream, RT.Stream, True) self.stream_two_id = stream_ids[0] dpd_id = self.create_data_process_definition() dp1_func_output_dp_id, dp2_func_output_dp_id = self.create_output_data_products( ) first_dp_id = self.create_data_process_one(dpd_id, dp1_func_output_dp_id) second_dp_id = self.create_data_process_two(dpd_id, self.input_dp_two_id, dp2_func_output_dp_id) #retrieve subscription from data process subscription_objs, _ = self.rrclient.find_objects( subject=first_dp_id, predicate=PRED.hasSubscription, object_type=RT.Subscription, id_only=False) log.debug('test_transform_worker subscription_obj: %s', subscription_objs[0]) #create subscription to stream ONE, create data process and publish granule on stream ONE #create a queue to catch the published granules of stream ONE self.subscription_one_id = self.pubsub_client.create_subscription( name='parsed_subscription_one', stream_ids=[self.stream_one_id], exchange_name=subscription_objs[0].exchange_name) self.addCleanup(self.pubsub_client.delete_subscription, self.subscription_one_id) self.pubsub_client.activate_subscription(self.subscription_one_id) self.addCleanup(self.pubsub_client.deactivate_subscription, self.subscription_one_id) stream_route_one = self.pubsub_client.read_stream_route( self.stream_one_id) self.publisher_one = StandaloneStreamPublisher( stream_id=self.stream_one_id, stream_route=stream_route_one) self.start_event_listener() #data process 1 adds conductivity + pressure and puts the result in salinity rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id) rdt['time'] = [0] # time should always come first rdt['conductivity'] = [1] rdt['pressure'] = [2] rdt['salinity'] = [8] self.publisher_one.publish(msg=rdt.to_granule(), stream_id=self.stream_one_id) #retrieve subscription from data process subscription_objs, _ = self.rrclient.find_objects( subject=second_dp_id, predicate=PRED.hasSubscription, object_type=RT.Subscription, id_only=False) log.debug('test_transform_worker subscription_obj: %s', subscription_objs[0]) #create subscription to stream ONE and TWO, move TW subscription, create data process and publish granule on stream TWO #create a queue to catch the published granules of stream TWO self.subscription_two_id = self.pubsub_client.create_subscription( name='parsed_subscription_one_two', stream_ids=[self.stream_two_id], exchange_name=subscription_objs[0].exchange_name) self.addCleanup(self.pubsub_client.delete_subscription, self.subscription_two_id) self.pubsub_client.activate_subscription(self.subscription_two_id) self.addCleanup(self.pubsub_client.deactivate_subscription, self.subscription_two_id) stream_route_two = self.pubsub_client.read_stream_route( self.stream_two_id) self.publisher_two = StandaloneStreamPublisher( stream_id=self.stream_two_id, stream_route=stream_route_two) #data process 1 adds conductivity + pressure and puts the result in salinity rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id) rdt['time'] = [0] # time should always come first rdt['conductivity'] = [1] rdt['pressure'] = [2] rdt['salinity'] = [8] self.publisher_one.publish(msg=rdt.to_granule(), stream_id=self.stream_one_id) #data process 2 adds salinity + pressure and puts the result in conductivity rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id) rdt['time'] = [0] # time should always come first rdt['conductivity'] = [22] rdt['pressure'] = [4] rdt['salinity'] = [1] self.publisher_two.publish(msg=rdt.to_granule(), stream_id=self.stream_two_id) self.assertTrue(self.event2_verified.wait(self.wait_time)) self.assertTrue(self.event1_verified.wait(self.wait_time))
def test_two_transforms_inline(self): self.dp_list = [] self.event1_verified = Event() self.event2_verified = Event() self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name( name='ctd_parsed_param_dict', id_only=True) # create the StreamDefinition self.stream_def_id = self.pubsub_client.create_stream_definition( name='stream_def', parameter_dictionary_id=self.parameter_dict_id) self.addCleanup(self.pubsub_client.delete_stream_definition, self.stream_def_id) # create the DataProduct input_dp_obj = IonObject(RT.DataProduct, name='input_data_product_one', description='input test stream one') self.input_dp_one_id = self.dataproductclient.create_data_product( data_product=input_dp_obj, stream_definition_id=self.stream_def_id) dpd_id = self.create_data_process_definition() dp1_func_output_dp_id, dp2_func_output_dp_id = self.create_output_data_products( ) first_dp_id = self.create_data_process_one(dpd_id, dp1_func_output_dp_id) second_dp_id = self.create_data_process_two(dpd_id, dp1_func_output_dp_id, dp2_func_output_dp_id) #retrieve subscription from data process one subscription_objs, _ = self.rrclient.find_objects( subject=first_dp_id, predicate=PRED.hasSubscription, object_type=RT.Subscription, id_only=False) log.debug('test_transform_worker subscription_obj: %s', subscription_objs[0]) #retrieve the Stream for these data product stream_ids, assoc_ids = self.rrclient.find_objects( self.input_dp_one_id, PRED.hasStream, RT.Stream, True) self.stream_one_id = stream_ids[0] #the input to data process two is the output from data process one stream_ids, assoc_ids = self.rrclient.find_objects( dp1_func_output_dp_id, PRED.hasStream, RT.Stream, True) self.stream_two_id = stream_ids[0] # Run provenance on the output dataproduct of the second data process to see all the links # are as expected output_data_product_provenance = self.dataproductclient.get_data_product_provenance( dp2_func_output_dp_id) # Do a basic check to see if there were 2 entries in the provenance graph. Parent and Child. self.assertTrue(len(output_data_product_provenance) == 3) # confirm that the linking from the output dataproduct to input dataproduct is correct self.assertTrue( dp1_func_output_dp_id in output_data_product_provenance[dp2_func_output_dp_id]['parents']) self.assertTrue( self.input_dp_one_id in output_data_product_provenance[dp1_func_output_dp_id]['parents']) #create subscription to stream ONE, create data process and publish granule on stream ONE #create a queue to catch the published granules of stream ONE subscription_id = self.pubsub_client.create_subscription( name='parsed_subscription', stream_ids=[self.stream_one_id, self.stream_two_id], exchange_name=subscription_objs[0].exchange_name) self.addCleanup(self.pubsub_client.delete_subscription, subscription_id) self.pubsub_client.activate_subscription(subscription_id) self.addCleanup(self.pubsub_client.deactivate_subscription, subscription_id) stream_route_one = self.pubsub_client.read_stream_route( self.stream_one_id) self.publisher_one = StandaloneStreamPublisher( stream_id=self.stream_one_id, stream_route=stream_route_one) #retrieve subscription from data process subscription_objs, _ = self.rrclient.find_objects( subject=second_dp_id, predicate=PRED.hasSubscription, object_type=RT.Subscription, id_only=False) log.debug('test_transform_worker subscription_obj: %s', subscription_objs[0]) #data process 1 adds conductivity + pressure and puts the result in salinity #data process 2 adds salinity + pressure and puts the result in conductivity self.start_event_listener() rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id) rdt['time'] = [0] # time should always come first rdt['conductivity'] = [1] rdt['pressure'] = [2] rdt['salinity'] = [8] self.publisher_one.publish(msg=rdt.to_granule(), stream_id=self.stream_one_id) self.assertTrue(self.event2_verified.wait(self.wait_time)) self.assertTrue(self.event1_verified.wait(self.wait_time))
def execute_acquire_sample(self, *args): """ Creates a copy of self._dh_config, creates a publisher, and spawns a greenlet to perform a data acquisition cycle If the args[0] is a dict, any entries keyed with one of the 'PATCHABLE_CONFIG_KEYS' are used to patch the config Greenlet binds to BaseDataHandler._acquire_sample and passes the publisher and config Disallows multiple "new data" (unconstrained) requests using BaseDataHandler._semaphore lock Called from: InstrumentAgent._handler_observatory_execute_resource |--> ExternalDataAgent._handler_streaming_execute_resource @parameter args First argument can be a config dictionary @throws IndexError if first argument is not a dictionary @throws ConfigurationError if required members aren't present @retval New ResourceAgentState (COMMAND) """ log.debug('Executing acquire_sample: args = {0}'.format(args)) # Make a copy of the config to ensure no cross-pollution config = self._dh_config.copy() # Patch the config if mods are passed in try: config_mods = args[0] if not isinstance(config_mods, dict): raise IndexError() log.debug('Configuration modifications provided: {0}'.format(config_mods)) for k in self._params['PATCHABLE_CONFIG_KEYS']: p = get_safe(config_mods, k) if not p is None: config[k] = p except IndexError: log.info('No configuration modifications were provided') # Verify that there is a stream_id member in the config stream_id = get_safe(config, 'stream_id', None) if not stream_id: raise ConfigurationError('Configuration does not contain required \'stream_id\' member') stream_route = get_safe(config, 'stream_route', None) if not stream_route: raise ConfigurationError('Configuration does not contain required \'stream_route\' member') isNew = get_safe(config, 'constraints') is None if isNew and not self._semaphore.acquire(blocking=False): log.warn('Already acquiring new data - action not duplicated') return ndc = None if isNew: # Get the NewDataCheck attachment and add it's content to the config ext_ds_id = get_safe(config, 'external_dataset_res_id') if ext_ds_id: ndc = self._find_new_data_check_attachment(ext_ds_id) config['new_data_check'] = ndc # Create a publisher to pass into the greenlet publisher = StandaloneStreamPublisher(stream_id=stream_id, stream_route=stream_route) # Spawn a greenlet to do the data acquisition and publishing g = spawn(self._acquire_sample, config, publisher, self._unlock_new_data_callback, self._update_new_data_check_attachment) log.debug('** Spawned {0}'.format(g)) self._glet_queue.append(g) return ResourceAgentState.COMMAND, None
def test_event_transform_worker(self): self.data_process_objs = [] self._output_stream_ids = [] self.event_verified = Event() # test that a data process (type: data-product-in / event-out) can be defined and launched. # verify that event fields are correctly populated self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name( name='ctd_parsed_param_dict', id_only=True) # create the StreamDefinition self.stream_def_id = self.pubsub_client.create_stream_definition( name='stream_def', parameter_dictionary_id=self.parameter_dict_id) self.addCleanup(self.pubsub_client.delete_stream_definition, self.stream_def_id) # create the DataProduct input_dp_obj = IonObject(RT.DataProduct, name='input_data_product', description='input test stream') self.input_dp_id = self.dataproductclient.create_data_product( data_product=input_dp_obj, stream_definition_id=self.stream_def_id) # retrieve the Stream for this data product stream_ids, assoc_ids = self.rrclient.find_objects( self.input_dp_id, PRED.hasStream, RT.Stream, True) self.stream_id = stream_ids[0] # create the DPD and two DPs self.event_data_process_id = self.create_event_data_processes() # retrieve subscription from data process subscription_objs, _ = self.rrclient.find_objects( subject=self.event_data_process_id, predicate=PRED.hasSubscription, object_type=RT.Subscription, id_only=False) log.debug('test_event_transform_worker subscription_obj: %s', subscription_objs[0]) # create a queue to catch the published granules self.subscription_id = self.pubsub_client.create_subscription( name='parsed_subscription', stream_ids=[self.stream_id], exchange_name=subscription_objs[0].exchange_name) self.addCleanup(self.pubsub_client.delete_subscription, self.subscription_id) self.pubsub_client.activate_subscription(self.subscription_id) self.addCleanup(self.pubsub_client.deactivate_subscription, self.subscription_id) stream_route = self.pubsub_client.read_stream_route(self.stream_id) self.publisher = StandaloneStreamPublisher(stream_id=self.stream_id, stream_route=stream_route) self.start_event_transform_listener() self.data_modified = Event() rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id) rdt['time'] = [0] # time should always come first rdt['conductivity'] = [1] rdt['pressure'] = [2] rdt['salinity'] = [8] self.publisher.publish(rdt.to_granule()) self.assertTrue(self.event_verified.wait(self.wait_time))
def test_sparse_values(self): ph = ParameterHelper(self.dataset_management, self.addCleanup) pdict_id = ph.create_sparse() stream_def_id = self.pubsub_management.create_stream_definition('sparse', parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) stream_id, route = self.pubsub_management.create_stream('example', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id) self.addCleanup(self.pubsub_management.delete_stream, stream_id) dataset_id = self.create_dataset(pdict_id) self.start_ingestion(stream_id,dataset_id) self.addCleanup(self.stop_ingestion, stream_id) # Publish initial granule # the first one has the sparse value set inside it, sets lat to 45 and lon to -71 ntp_now = time.time() + 2208988800 rdt = ph.get_rdt(stream_def_id) rdt['time'] = [ntp_now] rdt['internal_timestamp'] = [ntp_now] rdt['temp'] = [300000] rdt['preferred_timestamp'] = ['driver_timestamp'] rdt['port_timestamp'] = [ntp_now] rdt['quality_flag'] = [''] rdt['lat'] = [45] rdt['conductivity'] = [4341400] rdt['driver_timestamp'] = [ntp_now] rdt['lon'] = [-71] rdt['pressure'] = [256.8] publisher = StandaloneStreamPublisher(stream_id, route) dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) publisher.publish(rdt.to_granule()) self.assertTrue(dataset_monitor.wait()) dataset_monitor.reset() replay_granule = self.data_retriever.retrieve(dataset_id) rdt_out = RecordDictionaryTool.load_from_granule(replay_granule) # Check the values and make sure they're correct np.testing.assert_allclose(rdt_out['time'], rdt['time']) np.testing.assert_allclose(rdt_out['temp'], rdt['temp']) np.testing.assert_allclose(rdt_out['lat'], np.array([45])) np.testing.assert_allclose(rdt_out['lon'], np.array([-71])) np.testing.assert_allclose(rdt_out['conductivity_L1'], np.array([42.914])) np.testing.assert_allclose(rdt_out['temp_L1'], np.array([20.])) np.testing.assert_allclose(rdt_out['pressure_L1'], np.array([3.068])) np.testing.assert_allclose(rdt_out['density'], np.array([1021.7144739593881], dtype='float32')) np.testing.assert_allclose(rdt_out['salinity'], np.array([30.935132729668283], dtype='float32')) # We're going to change the lat/lon rdt = ph.get_rdt(stream_def_id) rdt['time'] = time.time() + 2208988800 rdt['lat'] = [46] rdt['lon'] = [-73] publisher.publish(rdt.to_granule()) self.assertTrue(dataset_monitor.wait()) dataset_monitor.reset() replay_granule = self.data_retriever.retrieve(dataset_id) rdt_out = RecordDictionaryTool.load_from_granule(replay_granule) np.testing.assert_allclose(rdt_out['time'], rdt['time']) for i in xrange(9): ntp_now = time.time() + 2208988800 rdt['time'] = [ntp_now] rdt['internal_timestamp'] = [ntp_now] rdt['temp'] = [300000] rdt['preferred_timestamp'] = ['driver_timestamp'] rdt['port_timestamp'] = [ntp_now] rdt['quality_flag'] = [None] rdt['conductivity'] = [4341400] rdt['driver_timestamp'] = [ntp_now] rdt['pressure'] = [256.8] publisher.publish(rdt.to_granule()) self.assertTrue(dataset_monitor.wait()) dataset_monitor.reset() replay_granule = self.data_retriever.retrieve(dataset_id) rdt_out = RecordDictionaryTool.load_from_granule(replay_granule) np.testing.assert_allclose(rdt_out['pressure'], np.array([256.8] * 10)) np.testing.assert_allclose(rdt_out['lat'], np.array([45] + [46] * 9)) np.testing.assert_allclose(rdt_out['lon'], np.array([-71] + [-73] * 9))
def publish_on_stream(self, stream_id, msg): stream = self.pubsub_management.read_stream(stream_id) stream_route = stream.stream_route publisher = StandaloneStreamPublisher(stream_id=stream_id, stream_route=stream_route) publisher.publish(msg)