def test_serialize_compatability(self): ph = ParameterHelper(self.dataset_management, self.addCleanup) pdict_id = ph.create_extended_parsed() stream_def_id = self.pubsub_management.create_stream_definition('ctd extended', parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) stream_id, route = self.pubsub_management.create_stream('ctd1', 'xp1', stream_definition_id=stream_def_id) self.addCleanup(self.pubsub_management.delete_stream, stream_id) sub_id = self.pubsub_management.create_subscription('sub1', stream_ids=[stream_id]) self.addCleanup(self.pubsub_management.delete_subscription, sub_id) self.pubsub_management.activate_subscription(sub_id) self.addCleanup(self.pubsub_management.deactivate_subscription, sub_id) verified = Event() def verifier(msg, route, stream_id): for k,v in msg.record_dictionary.iteritems(): if v is not None: self.assertIsInstance(v, np.ndarray) rdt = RecordDictionaryTool.load_from_granule(msg) for k,v in rdt.iteritems(): self.assertIsInstance(rdt[k], np.ndarray) self.assertIsInstance(v, np.ndarray) verified.set() subscriber = StandaloneStreamSubscriber('sub1', callback=verifier) subscriber.start() self.addCleanup(subscriber.stop) publisher = StandaloneStreamPublisher(stream_id,route) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) ph.fill_rdt(rdt,10) publisher.publish(rdt.to_granule()) self.assertTrue(verified.wait(60))
def test_granule_publish(self): log.debug("test_granule_publish ") self.loggerpids = [] #retrieve the param dict from the repository pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict',id_only=True) stream_definition_id = self.pubsubclient.create_stream_definition('parsed stream', parameter_dictionary_id=pdict_id) tdom, sdom = time_series_domain() dp_obj = IonObject(RT.DataProduct, name=str(uuid.uuid4()), description='ctd stream test', temporal_domain = tdom.dump(), spatial_domain = sdom.dump()) data_product_id1 = self.dpclient.create_data_product(data_product=dp_obj, stream_definition_id=stream_definition_id) # Retrieve the id of the output stream of the out Data Product stream_ids, _ = self.rrclient.find_objects(data_product_id1, PRED.hasStream, None, True) log.debug( 'test_granule_publish: Data product streams1 = %s', stream_ids) pid = self.create_logger('ctd_parsed', stream_ids[0] ) self.loggerpids.append(pid) rdt = RecordDictionaryTool(stream_definition_id=stream_definition_id) #create the publisher from the stream route stream_route = self.pubsubclient.read_stream_route(stream_ids[0]) publisher = StandaloneStreamPublisher(stream_ids[0], stream_route) # this is one sample from the ctd driver tomato = {"driver_timestamp": 3555971105.1268806, "instrument_id": "ABC-123", "pkt_format_id": "JSON_Data", "pkt_version": 1, "preferred_timestamp": "driver_timestamp", "quality_flag": "ok", "stream_name": "parsed", "values": [{"value": 22.9304, "value_id": "temp"}, {"value": 51.57381, "value_id": "conductivity"}, {"value": 915.551, "value_id": "pressure"}]} for value in tomato['values']: log.debug("test_granule_publish: Looping tomato values key: %s val: %s ", str(value['value']), str(value['value_id'])) if value['value_id'] in rdt: rdt[value['value_id']] = numpy.array( [ value['value'] ] ) log.debug("test_granule_publish: Added data item %s val: %s ", str(value['value']), str(value['value_id']) ) g = rdt.to_granule() publisher.publish(g) gevent.sleep(3) for pid in self.loggerpids: self.processdispatchclient.cancel_process(pid) #-------------------------------------------------------------------------------- # Cleanup data products #-------------------------------------------------------------------------------- dp_ids, _ = self.rrclient.find_resources(restype=RT.DataProduct, id_only=True) for dp_id in dp_ids: self.dataproductclient.delete_data_product(dp_id)
def test_data_product_subscription(self): pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) stream_def_id = self.pubsub_management.create_stream_definition('ctd parsed', parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) tdom, sdom = time_series_domain() dp = DataProduct(name='ctd parsed') dp.spatial_domain = sdom.dump() dp.temporal_domain = tdom.dump() data_product_id = self.data_product_management.create_data_product(data_product=dp, stream_definition_id=stream_def_id) self.addCleanup(self.data_product_management.delete_data_product, data_product_id) subscription_id = self.pubsub_management.create_subscription('validator', data_product_ids=[data_product_id]) self.addCleanup(self.pubsub_management.delete_subscription, subscription_id) validated = Event() def validation(msg, route, stream_id): validated.set() stream_ids, _ = self.resource_registry.find_objects(subject=data_product_id, predicate=PRED.hasStream, id_only=True) dp_stream_id = stream_ids.pop() validator = StandaloneStreamSubscriber('validator', callback=validation) validator.start() self.addCleanup(validator.stop) self.pubsub_management.activate_subscription(subscription_id) self.addCleanup(self.pubsub_management.deactivate_subscription, subscription_id) route = self.pubsub_management.read_stream_route(dp_stream_id) publisher = StandaloneStreamPublisher(dp_stream_id, route) publisher.publish('hi') self.assertTrue(validated.wait(10))
def publish_to_data_product(self, data_product_id): stream_ids, _ = self.resource_registry.find_objects(subject=data_product_id, predicate=PRED.hasStream, id_only=True) self.assertTrue(len(stream_ids)) stream_id = stream_ids.pop() route = self.pubsub_management.read_stream_route(stream_id) stream_definition = self.pubsub_management.read_stream_definition(stream_id=stream_id) stream_def_id = stream_definition._id publisher = StandaloneStreamPublisher(stream_id, route) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) now = time.time() ntp_now = now + 2208988800 # Do not use in production, this is a loose translation rdt['internal_timestamp'] = [ntp_now] rdt['temp'] = [300000] rdt['preferred_timestamp'] = ['driver_timestamp'] rdt['time'] = [ntp_now] rdt['port_timestamp'] = [ntp_now] rdt['quality_flag'] = [None] rdt['lat'] = [45] rdt['conductivity'] = [4341400] rdt['driver_timestamp'] = [ntp_now] rdt['lon'] = [-71] rdt['pressure'] = [256.8] granule = rdt.to_granule() publisher.publish(granule)
def test_execute_advanced_transform(self): # Runs a transform across L0-L2 with stream definitions including available fields streams = self.setup_advanced_transform() in_stream_id, in_stream_def_id = streams[0] out_stream_id, out_stream_defs_id = streams[1] validation_event = Event() def validator(msg, route, stream_id): rdt = RecordDictionaryTool.load_from_granule(msg) if not np.allclose(rdt['rho'], np.array([1001.0055034])): return validation_event.set() self.setup_validator(validator) in_route = self.pubsub_management.read_stream_route(in_stream_id) publisher = StandaloneStreamPublisher(in_stream_id, in_route) outbound_rdt = RecordDictionaryTool(stream_definition_id=in_stream_def_id) outbound_rdt['time'] = [0] outbound_rdt['TEMPWAT_L0'] = [280000] outbound_rdt['CONDWAT_L0'] = [100000] outbound_rdt['PRESWAT_L0'] = [2789] outbound_rdt['lat'] = [45] outbound_rdt['lon'] = [-71] outbound_granule = outbound_rdt.to_granule() publisher.publish(outbound_granule) self.assertTrue(validation_event.wait(2))
def test_qc_events(self): ph = ParameterHelper(self.dataset_management, self.addCleanup) pdict_id = ph.create_qc_pdict() stream_def_id = self.pubsub_management.create_stream_definition('qc stream def', parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) stream_id, route = self.pubsub_management.create_stream('qc stream', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id) self.addCleanup(self.pubsub_management.delete_stream, stream_id) ingestion_config_id = self.get_ingestion_config() dataset_id = self.create_dataset(pdict_id) config = DotDict() self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingestion_config_id, dataset_id=dataset_id, config=config) self.addCleanup(self.ingestion_management.unpersist_data_stream, stream_id, ingestion_config_id) publisher = StandaloneStreamPublisher(stream_id, route) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = np.arange(10) rdt['temp'] = np.arange(10) * 3 verified = Event() def verification(event, *args, **kwargs): self.assertEquals(event.qc_parameter, 'temp_qc') self.assertEquals(event.temporal_value, 7) verified.set() es = EventSubscriber(event_type=OT.ParameterQCEvent, origin=dataset_id, callback=verification, auto_delete=True) es.start() self.addCleanup(es.stop) publisher.publish(rdt.to_granule()) self.assertTrue(verified.wait(10))
def _publish_to_transform(self, stream_id = '', stream_route = None): pub = StandaloneStreamPublisher(stream_id, stream_route) publish_granule = self._get_new_ctd_L0_packet(stream_definition_id=self.in_stream_def_id_for_L0, length = 5) pub.publish(publish_granule) log.debug("Published the following granule: %s", publish_granule)
def test_execute_advanced_transform(self): # Runs a transform across L0-L2 with stream definitions including available fields streams = self.setup_advanced_transform() in_stream_id, in_stream_def_id = streams[0] out_stream_id, out_stream_defs_id = streams[1] validation_event = Event() def validator(msg, route, stream_id): rdt = RecordDictionaryTool.load_from_granule(msg) if not np.allclose(rdt['rho'], np.array([1001.0055034])): return validation_event.set() self.setup_validator(validator) in_route = self.pubsub_management.read_stream_route(in_stream_id) publisher = StandaloneStreamPublisher(in_stream_id, in_route) outbound_rdt = RecordDictionaryTool( stream_definition_id=in_stream_def_id) outbound_rdt['time'] = [0] outbound_rdt['TEMPWAT_L0'] = [280000] outbound_rdt['CONDWAT_L0'] = [100000] outbound_rdt['PRESWAT_L0'] = [2789] outbound_rdt['lat'] = [45] outbound_rdt['lon'] = [-71] outbound_granule = outbound_rdt.to_granule() publisher.publish(outbound_granule) self.assertTrue(validation_event.wait(2))
def test_derived_data_product(self): pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) ctd_stream_def_id = self.pubsubcli.create_stream_definition(name='ctd parsed', parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsubcli.delete_stream_definition, ctd_stream_def_id) tdom, sdom = time_series_domain() dp = DataProduct(name='Instrument DP', temporal_domain=tdom.dump(), spatial_domain=sdom.dump()) dp_id = self.dpsc_cli.create_data_product(dp, stream_definition_id=ctd_stream_def_id) self.addCleanup(self.dpsc_cli.force_delete_data_product, dp_id) self.dpsc_cli.activate_data_product_persistence(dp_id) self.addCleanup(self.dpsc_cli.suspend_data_product_persistence, dp_id) dataset_ids, _ = self.rrclient.find_objects(subject=dp_id, predicate=PRED.hasDataset, id_only=True) if not dataset_ids: raise NotFound("Data Product %s dataset does not exist" % str(dp_id)) dataset_id = dataset_ids[0] # Make the derived data product simple_stream_def_id = self.pubsubcli.create_stream_definition(name='TEMPWAT stream def', parameter_dictionary_id=pdict_id, available_fields=['time','temp']) tempwat_dp = DataProduct(name='TEMPWAT') tempwat_dp_id = self.dpsc_cli.create_data_product(tempwat_dp, stream_definition_id=simple_stream_def_id, parent_data_product_id=dp_id) self.addCleanup(self.dpsc_cli.delete_data_product, tempwat_dp_id) self.dpsc_cli.activate_data_product_persistence(tempwat_dp_id) self.addCleanup(self.dpsc_cli.suspend_data_product_persistence, tempwat_dp_id) # Check that the streams associated with the data product are persisted with stream_ids, _ = self.rrclient.find_objects(dp_id,PRED.hasStream,RT.Stream,True) for stream_id in stream_ids: self.assertTrue(self.ingestclient.is_persisted(stream_id)) stream_id = stream_ids[0] route = self.pubsubcli.read_stream_route(stream_id=stream_id) rdt = RecordDictionaryTool(stream_definition_id=ctd_stream_def_id) rdt['time'] = np.arange(20) rdt['temp'] = np.arange(20) rdt['pressure'] = np.arange(20) publisher = StandaloneStreamPublisher(stream_id,route) dataset_modified = Event() def cb(*args, **kwargs): dataset_modified.set() es = EventSubscriber(event_type=OT.DatasetModified, callback=cb, origin=dataset_id, auto_delete=True) es.start() self.addCleanup(es.stop) publisher.publish(rdt.to_granule()) self.assertTrue(dataset_modified.wait(30)) tempwat_dataset_ids, _ = self.rrclient.find_objects(tempwat_dp_id, PRED.hasDataset, id_only=True) tempwat_dataset_id = tempwat_dataset_ids[0] granule = self.data_retriever.retrieve(tempwat_dataset_id, delivery_format=simple_stream_def_id) rdt = RecordDictionaryTool.load_from_granule(granule) np.testing.assert_array_equal(rdt['time'], np.arange(20)) self.assertEquals(set(rdt.fields), set(['time','temp']))
def publish_rdt_to_data_product(cls,data_product_id, rdt, connection_id='', connection_index=''): resource_registry = Container.instance.resource_registry pubsub_management = PubsubManagementServiceClient() stream_ids, _ = resource_registry.find_objects(data_product_id,'hasStream',id_only=True) stream_id = stream_ids[0] route = pubsub_management.read_stream_route(stream_id) publisher = StandaloneStreamPublisher(stream_id,route) publisher.publish(rdt.to_granule(connection_id=connection_id, connection_index=connection_index))
def publish_and_wait(self, dataset_id, granule): stream_ids, _ = self.resource_registry.find_objects(dataset_id, PRED.hasStream,id_only=True) stream_id=stream_ids[0] route = self.pubsub_management.read_stream_route(stream_id) publisher = StandaloneStreamPublisher(stream_id,route) dataset_monitor = DatasetMonitor(dataset_id) publisher.publish(granule) self.assertTrue(dataset_monitor.event.wait(10))
def test_granule(self): pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) stream_def_id = self.pubsub_management.create_stream_definition('ctd', parameter_dictionary_id=pdict_id, stream_configuration={'reference_designator':"GA03FLMA-RI001-13-CTDMOG999"}) pdict = DatasetManagementService.get_parameter_dictionary_by_name('ctd_parsed_param_dict') self.addCleanup(self.pubsub_management.delete_stream_definition,stream_def_id) stream_id, route = self.pubsub_management.create_stream('ctd_stream', 'xp1', stream_definition_id=stream_def_id) self.addCleanup(self.pubsub_management.delete_stream,stream_id) publisher = StandaloneStreamPublisher(stream_id, route) subscriber = StandaloneStreamSubscriber('sub', self.verify_incoming) subscriber.start() self.addCleanup(subscriber.stop) subscription_id = self.pubsub_management.create_subscription('sub', stream_ids=[stream_id]) self.pubsub_management.activate_subscription(subscription_id) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = np.arange(10) rdt['temp'] = np.random.randn(10) * 10 + 30 rdt['pressure'] = [20] * 10 self.assertEquals(set(pdict.keys()), set(rdt.fields)) self.assertEquals(pdict.temporal_parameter_name, rdt.temporal_parameter) self.assertEquals(rdt._stream_config['reference_designator'],"GA03FLMA-RI001-13-CTDMOG999") self.rdt = rdt self.data_producer_id = 'data_producer' self.provider_metadata_update = {1:1} publisher.publish(rdt.to_granule(data_producer_id='data_producer', provider_metadata_update={1:1})) self.assertTrue(self.event.wait(10)) self.pubsub_management.deactivate_subscription(subscription_id) self.pubsub_management.delete_subscription(subscription_id) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = np.array([None,None,None]) self.assertTrue(rdt['time'] is None) rdt['time'] = np.array([None, 1, 2]) self.assertEquals(rdt['time'][0], rdt.fill_value('time')) stream_def_obj = self.pubsub_management.read_stream_definition(stream_def_id) rdt = RecordDictionaryTool(stream_definition=stream_def_obj) rdt['time'] = np.arange(20) rdt['temp'] = np.arange(20) granule = rdt.to_granule() rdt = RecordDictionaryTool.load_from_granule(granule) np.testing.assert_array_equal(rdt['time'], np.arange(20)) np.testing.assert_array_equal(rdt['temp'], np.arange(20))
def publish_hifi(self,stream_id,stream_route,offset=0): pub = StandaloneStreamPublisher(stream_id, stream_route) stream_def = self.pubsub_management.read_stream_definition(stream_id=stream_id) stream_def_id = stream_def._id rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = np.arange(10) + (offset * 10) rdt['temp'] = np.arange(10) + (offset * 10) pub.publish(rdt.to_granule())
def test_granule_publish(self): log.debug("test_granule_publish ") self.loggerpids = [] #retrieve the param dict from the repository pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict',id_only=True) stream_definition_id = self.pubsubclient.create_stream_definition('parsed stream', parameter_dictionary_id=pdict_id) dp_obj = IonObject(RT.DataProduct, name=str(uuid.uuid4()), description='ctd stream test') data_product_id1 = self.dpclient.create_data_product(data_product=dp_obj, stream_definition_id=stream_definition_id) # Retrieve the id of the output stream of the out Data Product stream_ids, _ = self.rrclient.find_objects(data_product_id1, PRED.hasStream, None, True) log.debug( 'test_granule_publish: Data product streams1 = %s', stream_ids) pid = self.create_logger('ctd_parsed', stream_ids[0] ) self.loggerpids.append(pid) rdt = RecordDictionaryTool(stream_definition_id=stream_definition_id) #create the publisher from the stream route stream_route = self.pubsubclient.read_stream_route(stream_ids[0]) publisher = StandaloneStreamPublisher(stream_ids[0], stream_route) # this is one sample from the ctd driver tomato = {"driver_timestamp": 3555971105.1268806, "instrument_id": "ABC-123", "pkt_format_id": "JSON_Data", "pkt_version": 1, "preferred_timestamp": "driver_timestamp", "quality_flag": "ok", "stream_name": "parsed", "values": [{"value": 22.9304, "value_id": "temp"}, {"value": 51.57381, "value_id": "conductivity"}, {"value": 915.551, "value_id": "pressure"}]} for value in tomato['values']: log.debug("test_granule_publish: Looping tomato values key: %s val: %s ", str(value['value']), str(value['value_id'])) if value['value_id'] in rdt: rdt[value['value_id']] = numpy.array( [ value['value'] ] ) log.debug("test_granule_publish: Added data item %s val: %s ", str(value['value']), str(value['value_id']) ) g = rdt.to_granule() publisher.publish(g) gevent.sleep(3) for pid in self.loggerpids: self.processdispatchclient.cancel_process(pid) #-------------------------------------------------------------------------------- # Cleanup data products #-------------------------------------------------------------------------------- dp_ids, _ = self.rrclient.find_resources(restype=RT.DataProduct, id_only=True) for dp_id in dp_ids: self.dataproductclient.delete_data_product(dp_id)
def publish_and_wait(self, dataset_id, granule): stream_ids, _ = self.resource_registry.find_objects(dataset_id, PRED.hasStream,id_only=True) stream_id=stream_ids[0] route = self.pubsub_management.read_stream_route(stream_id) publisher = StandaloneStreamPublisher(stream_id,route) dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) publisher.publish(granule) self.assertTrue(dataset_monitor.wait())
def test_coverage_transform(self): ph = ParameterHelper(self.dataset_management, self.addCleanup) pdict_id = ph.create_parsed() stream_def_id = self.pubsub_management.create_stream_definition( 'ctd parsed', parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) stream_id, route = self.pubsub_management.create_stream( 'example', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id) self.addCleanup(self.pubsub_management.delete_stream, stream_id) ingestion_config_id = self.get_ingestion_config() dataset_id = self.create_dataset(pdict_id) self.ingestion_management.persist_data_stream( stream_id=stream_id, ingestion_configuration_id=ingestion_config_id, dataset_id=dataset_id) self.addCleanup(self.ingestion_management.unpersist_data_stream, stream_id, ingestion_config_id) publisher = StandaloneStreamPublisher(stream_id, route) rdt = ph.get_rdt(stream_def_id) ph.fill_parsed_rdt(rdt) dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) publisher.publish(rdt.to_granule()) self.assertTrue(dataset_monitor.event.wait(30)) replay_granule = self.data_retriever.retrieve(dataset_id) rdt_out = RecordDictionaryTool.load_from_granule(replay_granule) np.testing.assert_array_almost_equal(rdt_out['time'], rdt['time']) np.testing.assert_array_almost_equal(rdt_out['temp'], rdt['temp']) np.testing.assert_array_almost_equal(rdt_out['conductivity_L1'], np.array([42.914])) np.testing.assert_array_almost_equal(rdt_out['temp_L1'], np.array([20.])) np.testing.assert_array_almost_equal(rdt_out['pressure_L1'], np.array([3.068])) np.testing.assert_array_almost_equal( rdt_out['density'], np.array([1021.7144739593881], dtype='float32')) np.testing.assert_array_almost_equal( rdt_out['salinity'], np.array([30.935132729668283], dtype='float32'))
def _publish_granules(self, stream_id=None, stream_route=None, values = None,number=None, length=None): pub = StandaloneStreamPublisher(stream_id, stream_route) stream_def = self.pubsub_management.read_stream_definition(stream_id=stream_id) stream_def_id = stream_def._id rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) for i in xrange(number): rdt['input_voltage'] = values rdt['preferred_timestamp'] = numpy.array([random.uniform(0,1000) for l in xrange(length)]) g = rdt.to_granule() pub.publish(g)
def publish_hifi(self,stream_id,stream_route,offset=0): ''' Publish deterministic data ''' pub = StandaloneStreamPublisher(stream_id, stream_route) stream_def = self.pubsub_management.read_stream_definition(stream_id=stream_id) stream_def_id = stream_def._id rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = np.arange(10) + (offset * 10) rdt['temp'] = np.arange(10) + (offset * 10) pub.publish(rdt.to_granule())
def test_move_activated_subscription(self): stream_id, route = self.pubsub_management.create_stream( name='test_stream', exchange_point='test_xp') #-------------------------------------------------------------------------------- # Test moving after activate #-------------------------------------------------------------------------------- subscription_id = self.pubsub_management.create_subscription( 'first_queue', stream_ids=[stream_id]) self.pubsub_management.activate_subscription(subscription_id) xn_ids, _ = self.resource_registry.find_resources( restype=RT.ExchangeName, name='first_queue', id_only=True) subjects, _ = self.resource_registry.find_subjects( object=subscription_id, predicate=PRED.hasSubscription, id_only=True) self.assertEquals(xn_ids[0], subjects[0]) self.verified = Event() def verify(m, r, s): self.assertEquals(m, 'verified') self.verified.set() subscriber = StandaloneStreamSubscriber('second_queue', verify) subscriber.start() self.pubsub_management.move_subscription(subscription_id, exchange_name='second_queue') xn_ids, _ = self.resource_registry.find_resources( restype=RT.ExchangeName, name='second_queue', id_only=True) subjects, _ = self.resource_registry.find_subjects( object=subscription_id, predicate=PRED.hasSubscription, id_only=True) self.assertEquals(len(subjects), 1) self.assertEquals(subjects[0], xn_ids[0]) publisher = StandaloneStreamPublisher(stream_id, route) publisher.publish('verified') self.assertTrue(self.verified.wait(2)) self.pubsub_management.deactivate_subscription(subscription_id) self.pubsub_management.delete_subscription(subscription_id) self.pubsub_management.delete_stream(stream_id)
def write_to_data_product(self, data_product_id): dataset_ids, _ = self.resource_registry.find_objects(data_product_id, 'hasDataset', id_only=True) dataset_id = dataset_ids.pop() stream_ids, _ = self.resource_registry.find_objects(data_product_id, 'hasStream', id_only=True) stream_id = stream_ids.pop() stream_def_ids, _ = self.resource_registry.find_objects( stream_id, 'hasStreamDefinition', id_only=True) stream_def_id = stream_def_ids.pop() route = self.pubsub_management.read_stream_route(stream_id) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) time_param = rdt._pdict.temporal_parameter_name if time_param is None: print '%s has no temporal parameter' % self.resource_registry.read( data_product_id).name return rdt[time_param] = np.arange(40) for field in rdt.fields: if field == rdt._pdict.temporal_parameter_name: continue rdt[field] = self.fill_values( rdt._pdict.get_context(field).param_type, 40) publisher = StandaloneStreamPublisher(stream_id, route) publisher.publish(rdt.to_granule()) self.wait_until_we_have_enough_granules(dataset_id, 40) granule = self.data_retriever.retrieve(dataset_id) rdt_out = RecordDictionaryTool.load_from_granule(granule) bad = [] for field in rdt.fields: if not np.array_equal(rdt[field], rdt_out[field]): print '%s' % field print '%s != %s' % (rdt[field], rdt_out[field]) bad.append(field) return bad
def test_demux(self): self.stream0, self.route0 = self.pubsub_client.create_stream( 'stream0', exchange_point='test') self.stream1, self.route1 = self.pubsub_client.create_stream( 'stream1', exchange_point='main_data') self.stream2, self.route2 = self.pubsub_client.create_stream( 'stream2', exchange_point='alt_data') self.r_stream1 = gevent.event.Event() self.r_stream2 = gevent.event.Event() def process(msg, stream_route, stream_id): if stream_id == self.stream1: self.r_stream1.set() elif stream_id == self.stream2: self.r_stream2.set() self.container.spawn_process( 'demuxer', 'ion.processes.data.transforms.mux', 'DemuxTransform', {'process': { 'out_streams': [self.stream1, self.stream2] }}, 'demuxer_pid') self.queue_cleanup.append('demuxer_pid') sub1 = StandaloneStreamSubscriber('sub1', process) sub2 = StandaloneStreamSubscriber('sub2', process) sub1.xn.bind(self.route1.routing_key, self.container.ex_manager.create_xp('main_data')) sub2.xn.bind(self.route2.routing_key, self.container.ex_manager.create_xp('alt_data')) sub1.start() sub2.start() self.queue_cleanup.append(sub1.xn) self.queue_cleanup.append(sub2.xn) xn = self.container.ex_manager.create_xn_queue('demuxer_pid') xn.bind( self.route0.routing_key, self.container.ex_manager.create_xp(self.route0.exchange_point)) domino = StandaloneStreamPublisher(self.stream0, self.route0) domino.publish('test') self.assertTrue(self.r_stream1.wait(2)) self.assertTrue(self.r_stream2.wait(2)) self.container.proc_manager.terminate_process('demuxer_pid') sub1.stop() sub2.stop()
def set_configuration(self, config): log.warn("DRIVER: set_configuration") """ expect configuration to have: - parser module/class - directory, wildcard to find data files - optional timestamp of last granule - optional poll rate - publish info """ log.error("Log level: %s", log.getEffectiveLevel()) log.debug('using configuration: %s', config) self.config = config self.max_records = get_safe(config, 'max_records', 100) self.stream_config = self.CFG.get('stream_config', {}) if len(self.stream_config) == 1: stream_cfg = self.stream_config.values()[0] elif len(self.stream_config) > 1: stream_cfg = self.stream_config.values()[0] stream_id = stream_cfg['stream_id'] stream_route = IonObject(OT.StreamRoute, routing_key=stream_cfg['routing_key'], exchange_point=stream_cfg['exchange_point']) param_dict = stream_cfg['stream_def_dict']['parameter_dictionary'] self.publisher = StandaloneStreamPublisher(stream_id=stream_id, stream_route=stream_route) self.parameter_dictionary = ParameterDictionary.load(param_dict) self.time_field = self.parameter_dictionary.get_temporal_context() self.latest_granule_time = get_safe(config, 'last_time', 0)
def test_transform_prime_no_available_fields(self): available_fields_in = [] available_fields_out = [] exchange_pt1 = 'xp1' exchange_pt2 = 'xp2' stream_id_in,stream_id_out,stream_route_in,stream_route_out,stream_def_in_id,stream_def_out_id = self._setup_streams(exchange_pt1, exchange_pt2, available_fields_in, available_fields_out) #launch transform config = {'process':{'routes':{(stream_id_in, stream_id_out):None},'queue_name':exchange_pt1, 'publish_streams':{str(stream_id_out):stream_id_out}, 'process_type':'stream_process'}} pid = self.container.spawn_process('transform_stream','ion.processes.data.transforms.transform_prime','TransformPrime',config) #create publish publisher = StandaloneStreamPublisher(stream_id_in, stream_route_in) self.container.proc_manager.procs[pid].subscriber.xn.bind(stream_route_in.routing_key, publisher.xp) #data rdt_in = RecordDictionaryTool(stream_definition_id=stream_def_in_id) dt = 20 rdt_in['time'] = np.arange(dt) rdt_in['lat'] = [40.992469] * dt rdt_in['lon'] = [-71.727069] * dt rdt_in['TEMPWAT_L0'] = self._get_param_vals('TEMPWAT_L0', slice(None), (dt,)) rdt_in['CONDWAT_L0'] = self._get_param_vals('CONDWAT_L0', slice(None), (dt,)) rdt_in['PRESWAT_L0'] = self._get_param_vals('PRESWAT_L0', slice(None), (dt,)) msg = rdt_in.to_granule() #publish granule to transform and have transform publish it to subsciber #validate transformed data e = gevent.event.Event() def cb(msg, sr, sid): self.assertEqual(sid, stream_id_out) rdt_out = RecordDictionaryTool.load_from_granule(msg) self.assertEquals(set([k for k,v in rdt_out.iteritems()]), set(available_fields_out)) for k,v in rdt_out.iteritems(): self.assertEquals(rdt_out[k], None) e.set() sub = StandaloneStreamSubscriber('stream_subscriber', cb) sub.xn.bind(stream_route_out.routing_key, getattr(self.container.proc_manager.procs[pid], stream_id_out).xp) self.addCleanup(sub.stop) sub.start() #publish msg to transform publisher.publish(msg) #wait to receive msg self.assertTrue(e.wait(4))
def test_retrieve_and_transform(self): # Stream definition for the CTD data pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) stream_def_id = self.pubsub_management.create_stream_definition('ctd data', parameter_dictionary_id=pdict_id) ctd_stream_id, route = self.pubsub_management.create_stream('ctd stream', 'xp1', stream_definition_id=stream_def_id) # Stream definition for the salinity data salinity_pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) sal_stream_def_id = self.pubsub_management.create_stream_definition('sal data', parameter_dictionary_id=salinity_pdict_id) ingest_config_id = self.get_ingestion_config() dataset_id = self.create_dataset(pdict_id) #-------------------------------------------------------------------------------- # Again with this ridiculous problem #-------------------------------------------------------------------------------- self.get_datastore(dataset_id) self.ingestion_management.persist_data_stream(stream_id=ctd_stream_id, ingestion_configuration_id=ingest_config_id, dataset_id=dataset_id) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = np.arange(10) rdt['temp'] = np.random.randn(10) * 10 + 30 rdt['conductivity'] = np.random.randn(10) * 2 + 10 publisher = StandaloneStreamPublisher(ctd_stream_id, route) publisher.publish(rdt.to_granule()) rdt['time'] = np.arange(10,20) publisher.publish(rdt.to_granule()) self.wait_until_we_have_enough_granules(dataset_id, 2) granule = self.data_retriever.retrieve(dataset_id, None, None, 'ion.processes.data.transforms.ctd.ctd_L2_salinity', 'CTDL2SalinityTransformAlgorithm', kwargs=dict(params=sal_stream_def_id)) rdt = RecordDictionaryTool.load_from_granule(granule) for i in rdt['salinity']: self.assertNotEquals(i,0)
def write_to_data_product(self,data_product_id): dataset_ids, _ = self.resource_registry.find_objects(data_product_id, 'hasDataset', id_only=True) dataset_id = dataset_ids.pop() stream_ids , _ = self.resource_registry.find_objects(data_product_id, 'hasStream', id_only=True) stream_id = stream_ids.pop() stream_def_ids, _ = self.resource_registry.find_objects(stream_id, 'hasStreamDefinition', id_only=True) stream_def_id = stream_def_ids.pop() route = self.pubsub_management.read_stream_route(stream_id) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) time_param = rdt._pdict.temporal_parameter_name if time_param is None: print '%s has no temporal parameter' % self.resource_registry.read(data_product_id).name return rdt[time_param] = np.arange(40) for field in rdt.fields: if field == rdt._pdict.temporal_parameter_name: continue rdt[field] = self.fill_values(rdt._pdict.get_context(field).param_type,40) publisher = StandaloneStreamPublisher(stream_id, route) publisher.publish(rdt.to_granule()) self.wait_until_we_have_enough_granules(dataset_id,40) granule = self.data_retriever.retrieve(dataset_id) rdt_out = RecordDictionaryTool.load_from_granule(granule) bad = [] for field in rdt.fields: if not np.array_equal(rdt[field], rdt_out[field]): print '%s' % field print '%s != %s' % (rdt[field], rdt_out[field]) bad.append(field) return bad
def test_move_activated_subscription(self): stream_id, route = self.pubsub_management.create_stream(name="test_stream", exchange_point="test_xp") # -------------------------------------------------------------------------------- # Test moving after activate # -------------------------------------------------------------------------------- subscription_id = self.pubsub_management.create_subscription("first_queue", stream_ids=[stream_id]) self.pubsub_management.activate_subscription(subscription_id) xn_ids, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name="first_queue", id_only=True) subjects, _ = self.resource_registry.find_subjects( object=subscription_id, predicate=PRED.hasSubscription, id_only=True ) self.assertEquals(xn_ids[0], subjects[0]) self.verified = Event() def verify(m, r, s): self.assertEquals(m, "verified") self.verified.set() subscriber = StandaloneStreamSubscriber("second_queue", verify) subscriber.start() self.pubsub_management.move_subscription(subscription_id, exchange_name="second_queue") xn_ids, _ = self.resource_registry.find_resources(restype=RT.ExchangeName, name="second_queue", id_only=True) subjects, _ = self.resource_registry.find_subjects( object=subscription_id, predicate=PRED.hasSubscription, id_only=True ) self.assertEquals(len(subjects), 1) self.assertEquals(subjects[0], xn_ids[0]) publisher = StandaloneStreamPublisher(stream_id, route) publisher.publish("verified") self.assertTrue(self.verified.wait(2)) self.pubsub_management.deactivate_subscription(subscription_id) self.pubsub_management.delete_subscription(subscription_id) self.pubsub_management.delete_stream(stream_id)
def test_event_transform_worker(self): self.data_process_objs = [] self._output_stream_ids = [] self.event_verified = Event() # test that a data process (type: data-product-in / event-out) can be defined and launched. # verify that event fields are correctly populated self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(name='ctd_parsed_param_dict', id_only=True) # create the StreamDefinition self.stream_def_id = self.pubsub_client.create_stream_definition(name='stream_def', parameter_dictionary_id=self.parameter_dict_id) self.addCleanup(self.pubsub_client.delete_stream_definition, self.stream_def_id) # create the DataProduct input_dp_obj = IonObject( RT.DataProduct, name='input_data_product', description='input test stream', temporal_domain = self.time_dom.dump(), spatial_domain = self.spatial_dom.dump()) self.input_dp_id = self.dataproductclient.create_data_product(data_product=input_dp_obj, stream_definition_id=self.stream_def_id) # retrieve the Stream for this data product stream_ids, assoc_ids = self.rrclient.find_objects(self.input_dp_id, PRED.hasStream, RT.Stream, True) self.stream_id = stream_ids[0] # create the DPD and two DPs self.event_data_process_id = self.create_event_data_processes() # retrieve subscription from data process subscription_objs, _ = self.rrclient.find_objects(subject=self.event_data_process_id, predicate=PRED.hasSubscription, object_type=RT.Subscription, id_only=False) log.debug('test_event_transform_worker subscription_obj: %s', subscription_objs[0]) # create a queue to catch the published granules self.subscription_id = self.pubsub_client.create_subscription(name='parsed_subscription', stream_ids=[self.stream_id], exchange_name=subscription_objs[0].exchange_name) self.addCleanup(self.pubsub_client.delete_subscription, self.subscription_id) self.pubsub_client.activate_subscription(self.subscription_id) self.addCleanup(self.pubsub_client.deactivate_subscription, self.subscription_id) stream_route = self.pubsub_client.read_stream_route(self.stream_id) self.publisher = StandaloneStreamPublisher(stream_id=self.stream_id, stream_route=stream_route ) self.start_event_transform_listener() self.data_modified = Event() rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id) rdt['time'] = [0] # time should always come first rdt['conductivity'] = [1] rdt['pressure'] = [2] rdt['salinity'] = [8] self.publisher.publish(rdt.to_granule()) self.assertTrue(self.event_verified.wait(self.wait_time))
def test_activation_and_deactivation(self): stream_id, route = self.pubsub_management.create_stream( 'stream1', 'xp1') subscription_id = self.pubsub_management.create_subscription( 'sub1', stream_ids=[stream_id]) self.check1 = Event() def verifier(m, r, s): self.check1.set() subscriber = StandaloneStreamSubscriber('sub1', verifier) subscriber.start() publisher = StandaloneStreamPublisher(stream_id, route) publisher.publish('should not receive') self.assertFalse(self.check1.wait(0.25)) self.pubsub_management.activate_subscription(subscription_id) publisher.publish('should receive') self.assertTrue(self.check1.wait(2)) self.check1.clear() self.assertFalse(self.check1.is_set()) self.pubsub_management.deactivate_subscription(subscription_id) publisher.publish('should not receive') self.assertFalse(self.check1.wait(0.5)) self.pubsub_management.activate_subscription(subscription_id) publisher.publish('should receive') self.assertTrue(self.check1.wait(2)) subscriber.stop() self.pubsub_management.deactivate_subscription(subscription_id) self.pubsub_management.delete_subscription(subscription_id) self.pubsub_management.delete_stream(stream_id)
def test_retrieve_and_transform(self): # Make a simple dataset and start ingestion, pretty standard stuff. ctd_stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset() self.start_ingestion(ctd_stream_id, dataset_id) # Stream definition for the salinity data salinity_pdict_id = self.dataset_management.read_parameter_dictionary_by_name( "ctd_parsed_param_dict", id_only=True ) sal_stream_def_id = self.pubsub_management.create_stream_definition( "sal data", parameter_dictionary_id=salinity_pdict_id ) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt["time"] = np.arange(10) rdt["temp"] = np.random.randn(10) * 10 + 30 rdt["conductivity"] = np.random.randn(10) * 2 + 10 rdt["pressure"] = np.random.randn(10) * 1 + 12 publisher = StandaloneStreamPublisher(ctd_stream_id, route) publisher.publish(rdt.to_granule()) rdt["time"] = np.arange(10, 20) publisher.publish(rdt.to_granule()) self.wait_until_we_have_enough_granules(dataset_id, 20) granule = self.data_retriever.retrieve( dataset_id, None, None, "ion.processes.data.transforms.ctd.ctd_L2_salinity", "CTDL2SalinityTransformAlgorithm", kwargs=dict(params=sal_stream_def_id), ) rdt = RecordDictionaryTool.load_from_granule(granule) for i in rdt["salinity"]: self.assertNotEquals(i, 0) self.streams.append(ctd_stream_id) self.stop_ingestion(ctd_stream_id)
def _publish_granules(self, stream_id=None, stream_route=None, values = None,number=None): pub = StandaloneStreamPublisher(stream_id, stream_route) stream_def = self.pubsub_management.read_stream_definition(stream_id=stream_id) stream_def_id = stream_def._id rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) times = numpy.array([number for l in xrange(self.length)]) for i in xrange(number): rdt['input_voltage'] = values rdt['preferred_timestamp'] = ['time' for l in xrange(len(times))] rdt['time'] = times g = rdt.to_granule() g.data_producer_id = 'instrument_1' log.debug("granule #%s published by instrument:: %s" % ( number,g)) pub.publish(g)
def test_transform_worker(self): self.loggerpids = [] self.data_process_objs = [] self._output_stream_ids = [] self.start_transform_worker() self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(name='ctd_parsed_param_dict', id_only=True) # create the StreamDefinition self.stream_def_id = self.pubsub_client.create_stream_definition(name='stream_def', parameter_dictionary_id=self.parameter_dict_id) self.addCleanup(self.pubsub_client.delete_stream_definition, self.stream_def_id) # create the DataProduct input_dp_obj = IonObject( RT.DataProduct, name='input_data_product', description='input test stream', temporal_domain = self.time_dom.dump(), spatial_domain = self.spatial_dom.dump()) self.input_dp_id = self.dataproductclient.create_data_product(data_product=input_dp_obj, stream_definition_id=self.stream_def_id) #retrieve the Stream for this data product stream_ids, assoc_ids = self.rrclient.find_objects(self.input_dp_id, PRED.hasStream, RT.Stream, True) self.stream_id = stream_ids[0] #create a queue to catch the published granules self.subscription_id = self.pubsub_client.create_subscription(name='parsed_subscription', stream_ids=[self.stream_id], exchange_name='parsed_subscription') self.addCleanup(self.pubsub_client.delete_subscription, self.subscription_id) self.pubsub_client.activate_subscription(self.subscription_id) self.addCleanup(self.pubsub_client.deactivate_subscription, self.subscription_id) stream_route = self.pubsub_client.read_stream_route(self.stream_id) self.publisher = StandaloneStreamPublisher(stream_id=self.stream_id, stream_route=stream_route ) self.start_event_listener() self.dp_list = self.create_data_processes() self.data_modified = Event() self.data_modified.wait(5) rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id) rdt['time'] = [0] # time should always come first rdt['conductivity'] = [1] rdt['pressure'] = [2] rdt['salinity'] = [8] self.publisher.publish(rdt.to_granule()) self.data_modified.wait(5) # Cleanup processes for pid in self.loggerpids: self.processdispatchclient.cancel_process(pid)
def test_ingestion_pause(self): ctd_stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset() ingestion_config_id = self.get_ingestion_config() self.start_ingestion(ctd_stream_id, dataset_id) self.addCleanup(self.stop_ingestion, ctd_stream_id) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = np.arange(10) publisher = StandaloneStreamPublisher(ctd_stream_id, route) monitor = DatasetMonitor(dataset_id) self.addCleanup(monitor.stop) publisher.publish(rdt.to_granule()) self.assertTrue(monitor.wait()) granule = self.data_retriever.retrieve(dataset_id) self.ingestion_management.pause_data_stream(ctd_stream_id, ingestion_config_id) monitor.event.clear() rdt['time'] = np.arange(10,20) publisher.publish(rdt.to_granule()) self.assertFalse(monitor.event.wait(1)) self.ingestion_management.resume_data_stream(ctd_stream_id, ingestion_config_id) self.assertTrue(monitor.wait()) granule = self.data_retriever.retrieve(dataset_id) rdt2 = RecordDictionaryTool.load_from_granule(granule) np.testing.assert_array_almost_equal(rdt2['time'], np.arange(20))
def test_demux(self): self.stream0, self.route0 = self.pubsub_client.create_stream('stream0', exchange_point='test') self.stream1, self.route1 = self.pubsub_client.create_stream('stream1', exchange_point='main_data') self.stream2, self.route2 = self.pubsub_client.create_stream('stream2', exchange_point='alt_data') self.r_stream1 = gevent.event.Event() self.r_stream2 = gevent.event.Event() def process(msg, stream_route, stream_id): if stream_id == self.stream1: self.r_stream1.set() elif stream_id == self.stream2: self.r_stream2.set() self.container.spawn_process('demuxer', 'ion.processes.data.transforms.mux', 'DemuxTransform', {'process':{'out_streams':[self.stream1, self.stream2]}}, 'demuxer_pid') self.queue_cleanup.append('demuxer_pid') sub1 = StandaloneStreamSubscriber('sub1', process) sub2 = StandaloneStreamSubscriber('sub2', process) sub1.xn.bind(self.route1.routing_key, self.container.ex_manager.create_xp('main_data')) sub2.xn.bind(self.route2.routing_key, self.container.ex_manager.create_xp('alt_data')) sub1.start() sub2.start() self.queue_cleanup.append(sub1.xn) self.queue_cleanup.append(sub2.xn) xn = self.container.ex_manager.create_xn_queue('demuxer_pid') xn.bind(self.route0.routing_key, self.container.ex_manager.create_xp(self.route0.exchange_point)) domino = StandaloneStreamPublisher(self.stream0, self.route0) domino.publish('test') self.assertTrue(self.r_stream1.wait(2)) self.assertTrue(self.r_stream2.wait(2)) self.container.proc_manager.terminate_process('demuxer_pid') sub1.stop() sub2.stop()
def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') self.dataset_management_client = DatasetManagementServiceClient( node=self.container.node) self.pubsub_client = PubsubManagementServiceClient( node=self.container.node) self.time_dom, self.spatial_dom = time_series_domain() self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name( name='ctd_parsed_param_dict', id_only=True) self.stream_def_id = self.pubsub_client.create_stream_definition( name='stream_def', parameter_dictionary_id=self.parameter_dict_id) self.addCleanup(self.pubsub_client.delete_stream_definition, self.stream_def_id) self.stream_id, self.route_id = self.pubsub_client.create_stream( name='parsed_stream', stream_definition_id=self.stream_def_id, exchange_point='science_data') self.addCleanup(self.pubsub_client.delete_stream, self.stream_id) self.subscription_id = self.pubsub_client.create_subscription( name='parsed_subscription', stream_ids=[self.stream_id], exchange_name='parsed_subscription') self.addCleanup(self.pubsub_client.delete_subscription, self.subscription_id) self.pubsub_client.activate_subscription(self.subscription_id) self.addCleanup(self.pubsub_client.deactivate_subscription, self.subscription_id) self.publisher = StandaloneStreamPublisher(self.stream_id, self.route_id)
def test_execute_transform(self): streams = self.setup_transform() in_stream_id, in_stream_def_id = streams[0] out_stream_id, out_stream_def_id = streams[1] validation_event = Event() def validator(msg, route, stream_id): rdt = RecordDictionaryTool.load_from_granule(msg) if not np.allclose(rdt['TEMPWAT_L1'], np.array([18.])): return if not np.allclose(rdt['CONDWAT_L1'], np.array([0.5])): return if not np.allclose(rdt['PRESWAT_L1'], np.array([0.04536611])): return validation_event.set() self.setup_validator(validator) in_route = self.pubsub_management.read_stream_route(in_stream_id) publisher = StandaloneStreamPublisher(in_stream_id, in_route) outbound_rdt = RecordDictionaryTool( stream_definition_id=in_stream_def_id) outbound_rdt['time'] = [0] outbound_rdt['TEMPWAT_L0'] = [280000] outbound_rdt['CONDWAT_L0'] = [100000] outbound_rdt['PRESWAT_L0'] = [2789] outbound_rdt['lat'] = [45] outbound_rdt['lon'] = [-71] outbound_granule = outbound_rdt.to_granule() publisher.publish(outbound_granule) self.assertTrue(validation_event.wait(2))
def test_granule(self): pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) stream_def_id = self.pubsub_management.create_stream_definition('ctd', parameter_dictionary_id=pdict_id) pdict = DatasetManagementService.get_parameter_dictionary_by_name('ctd_parsed_param_dict') stream_id, route = self.pubsub_management.create_stream('ctd_stream', 'xp1', stream_definition_id=stream_def_id) self.xps.append('xp1') publisher = StandaloneStreamPublisher(stream_id, route) subscriber = StandaloneStreamSubscriber('sub', self.verify_incoming) subscriber.start() subscription_id = self.pubsub_management.create_subscription('sub', stream_ids=[stream_id]) self.xns.append('sub') self.pubsub_management.activate_subscription(subscription_id) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = np.arange(10) rdt['temp'] = np.random.randn(10) * 10 + 30 rdt['pressure'] = [20] * 10 self.assertEquals(set(pdict.keys()), set(rdt.fields)) self.assertEquals(pdict.temporal_parameter_name, rdt.temporal_parameter) self.rdt = rdt self.data_producer_id = 'data_producer' self.provider_metadata_update = {1:1} publisher.publish(rdt.to_granule(data_producer_id='data_producer', provider_metadata_update={1:1})) self.assertTrue(self.event.wait(10)) self.pubsub_management.deactivate_subscription(subscription_id) self.pubsub_management.delete_subscription(subscription_id)
def test_activation_and_deactivation(self): stream_id, route = self.pubsub_management.create_stream('stream1','xp1') subscription_id = self.pubsub_management.create_subscription('sub1', stream_ids=[stream_id]) self.check1 = Event() def verifier(m,r,s): self.check1.set() subscriber = StandaloneStreamSubscriber('sub1',verifier) subscriber.start() publisher = StandaloneStreamPublisher(stream_id, route) publisher.publish('should not receive') self.assertFalse(self.check1.wait(0.25)) self.pubsub_management.activate_subscription(subscription_id) publisher.publish('should receive') self.assertTrue(self.check1.wait(2)) self.check1.clear() self.assertFalse(self.check1.is_set()) self.pubsub_management.deactivate_subscription(subscription_id) publisher.publish('should not receive') self.assertFalse(self.check1.wait(0.5)) self.pubsub_management.activate_subscription(subscription_id) publisher.publish('should receive') self.assertTrue(self.check1.wait(2)) subscriber.stop() self.pubsub_management.deactivate_subscription(subscription_id) self.pubsub_management.delete_subscription(subscription_id) self.pubsub_management.delete_stream(stream_id)
def test_execute_transform(self): streams = self.setup_transform() in_stream_id, in_stream_def_id = streams[0] out_stream_id, out_stream_def_id = streams[1] validation_event = Event() def validator(msg, route, stream_id): rdt = RecordDictionaryTool.load_from_granule(msg) if not np.allclose(rdt['TEMPWAT_L1'], np.array([18.])): return if not np.allclose(rdt['CONDWAT_L1'], np.array([0.5])): return if not np.allclose(rdt['PRESWAT_L1'], np.array([0.04536611])): return validation_event.set() self.setup_validator(validator) in_route = self.pubsub_management.read_stream_route(in_stream_id) publisher = StandaloneStreamPublisher(in_stream_id, in_route) outbound_rdt = RecordDictionaryTool(stream_definition_id=in_stream_def_id) outbound_rdt['time'] = [0] outbound_rdt['TEMPWAT_L0'] = [280000] outbound_rdt['CONDWAT_L0'] = [100000] outbound_rdt['PRESWAT_L0'] = [2789] outbound_rdt['lat'] = [45] outbound_rdt['lon'] = [-71] outbound_granule = outbound_rdt.to_granule() publisher.publish(outbound_granule) self.assertTrue(validation_event.wait(2))
def test_coverage_transform(self): ph = ParameterHelper(self.dataset_management, self.addCleanup) pdict_id = ph.create_parsed() stream_def_id = self.pubsub_management.create_stream_definition('ctd parsed', parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) stream_id, route = self.pubsub_management.create_stream('example', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id) self.addCleanup(self.pubsub_management.delete_stream, stream_id) ingestion_config_id = self.get_ingestion_config() dataset_id = self.create_dataset(pdict_id) self.ingestion_management.persist_data_stream(stream_id=stream_id, ingestion_configuration_id=ingestion_config_id, dataset_id=dataset_id) self.addCleanup(self.ingestion_management.unpersist_data_stream, stream_id, ingestion_config_id) publisher = StandaloneStreamPublisher(stream_id, route) rdt = ph.get_rdt(stream_def_id) ph.fill_parsed_rdt(rdt) dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) publisher.publish(rdt.to_granule()) self.assertTrue(dataset_monitor.wait()) replay_granule = self.data_retriever.retrieve(dataset_id) rdt_out = RecordDictionaryTool.load_from_granule(replay_granule) np.testing.assert_array_almost_equal(rdt_out['time'], rdt['time']) np.testing.assert_array_almost_equal(rdt_out['temp'], rdt['temp']) np.testing.assert_allclose(rdt_out['conductivity_L1'], np.array([42.914])) np.testing.assert_allclose(rdt_out['temp_L1'], np.array([20.])) np.testing.assert_allclose(rdt_out['pressure_L1'], np.array([3.068])) np.testing.assert_allclose(rdt_out['density'], np.array([1021.7144739593881], dtype='float32')) np.testing.assert_allclose(rdt_out['salinity'], np.array([30.935132729668283], dtype='float32'))
def set_configuration(self, config): """ expect configuration to have: - parser module/class - directory, wildcard to find data files - optional timestamp of last granule - optional poll rate - publish info """ log.debug('using configuration: %s', config) self.config = config self.max_records = get_safe(config, 'max_records', 100) stream_id = config['stream_id'] stream_route_param = config['stream_route'] stream_route = IonObject(OT.StreamRoute, stream_route_param) self.publisher = StandaloneStreamPublisher(stream_id=stream_id, stream_route=stream_route) self.parameter_dictionary = ParameterDictionary.load(config['parameter_dict']) self.time_field = self.parameter_dictionary.get_temporal_context() self.latest_granule_time = get_safe(config, 'last_time', 0)
def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') self.dataset_management_client = DatasetManagementServiceClient(node=self.container.node) self.pubsub_client = PubsubManagementServiceClient(node=self.container.node) self.time_dom, self.spatial_dom = time_series_domain() self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(name='ctd_parsed_param_dict', id_only=True) self.stream_def_id = self.pubsub_client.create_stream_definition(name='stream_def', parameter_dictionary_id=self.parameter_dict_id) self.addCleanup(self.pubsub_client.delete_stream_definition, self.stream_def_id) self.stream_id, self.route_id = self.pubsub_client.create_stream(name='parsed_stream', stream_definition_id=self.stream_def_id, exchange_point='science_data') self.addCleanup(self.pubsub_client.delete_stream, self.stream_id) self.subscription_id = self.pubsub_client.create_subscription(name='parsed_subscription', stream_ids=[self.stream_id], exchange_name='parsed_subscription') self.addCleanup(self.pubsub_client.delete_subscription, self.subscription_id) self.pubsub_client.activate_subscription(self.subscription_id) self.addCleanup(self.pubsub_client.deactivate_subscription, self.subscription_id) self.publisher = StandaloneStreamPublisher(self.stream_id, self.route_id)
def test_retrieve_and_transform(self): # Make a simple dataset and start ingestion, pretty standard stuff. ctd_stream_id, route, stream_def_id, dataset_id = self.make_simple_dataset( ) self.start_ingestion(ctd_stream_id, dataset_id) # Stream definition for the salinity data salinity_pdict_id = self.dataset_management.read_parameter_dictionary_by_name( 'ctd_parsed_param_dict', id_only=True) sal_stream_def_id = self.pubsub_management.create_stream_definition( 'sal data', parameter_dictionary_id=salinity_pdict_id) rdt = RecordDictionaryTool(stream_definition_id=stream_def_id) rdt['time'] = np.arange(10) rdt['temp'] = np.random.randn(10) * 10 + 30 rdt['conductivity'] = np.random.randn(10) * 2 + 10 rdt['pressure'] = np.random.randn(10) * 1 + 12 publisher = StandaloneStreamPublisher(ctd_stream_id, route) publisher.publish(rdt.to_granule()) rdt['time'] = np.arange(10, 20) publisher.publish(rdt.to_granule()) self.wait_until_we_have_enough_granules(dataset_id, 20) granule = self.data_retriever.retrieve( dataset_id, None, None, 'ion.processes.data.transforms.ctd.ctd_L2_salinity', 'CTDL2SalinityTransformAlgorithm', kwargs=dict(params=sal_stream_def_id)) rdt = RecordDictionaryTool.load_from_granule(granule) for i in rdt['salinity']: self.assertNotEquals(i, 0) self.streams.append(ctd_stream_id) self.stop_ingestion(ctd_stream_id)
def test_sparse_values(self): ph = ParameterHelper(self.dataset_management, self.addCleanup) pdict_id = ph.create_sparse() stream_def_id = self.pubsub_management.create_stream_definition('sparse', parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) stream_id, route = self.pubsub_management.create_stream('example', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id) self.addCleanup(self.pubsub_management.delete_stream, stream_id) dataset_id = self.create_dataset(pdict_id) self.start_ingestion(stream_id,dataset_id) self.addCleanup(self.stop_ingestion, stream_id) # Publish initial granule # the first one has the sparse value set inside it, sets lat to 45 and lon to -71 ntp_now = time.time() + 2208988800 rdt = ph.get_rdt(stream_def_id) rdt['time'] = [ntp_now] rdt['internal_timestamp'] = [ntp_now] rdt['temp'] = [300000] rdt['preferred_timestamp'] = ['driver_timestamp'] rdt['port_timestamp'] = [ntp_now] rdt['quality_flag'] = [''] rdt['lat'] = [45] rdt['conductivity'] = [4341400] rdt['driver_timestamp'] = [ntp_now] rdt['lon'] = [-71] rdt['pressure'] = [256.8] publisher = StandaloneStreamPublisher(stream_id, route) dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) publisher.publish(rdt.to_granule()) self.assertTrue(dataset_monitor.wait()) dataset_monitor.reset() replay_granule = self.data_retriever.retrieve(dataset_id) rdt_out = RecordDictionaryTool.load_from_granule(replay_granule) # Check the values and make sure they're correct np.testing.assert_allclose(rdt_out['time'], rdt['time']) np.testing.assert_allclose(rdt_out['temp'], rdt['temp']) np.testing.assert_allclose(rdt_out['lat'], np.array([45])) np.testing.assert_allclose(rdt_out['lon'], np.array([-71])) np.testing.assert_allclose(rdt_out['conductivity_L1'], np.array([42.914])) np.testing.assert_allclose(rdt_out['temp_L1'], np.array([20.])) np.testing.assert_allclose(rdt_out['pressure_L1'], np.array([3.068])) np.testing.assert_allclose(rdt_out['density'], np.array([1021.7144739593881], dtype='float32')) np.testing.assert_allclose(rdt_out['salinity'], np.array([30.935132729668283], dtype='float32')) # We're going to change the lat/lon rdt = ph.get_rdt(stream_def_id) rdt['time'] = time.time() + 2208988800 rdt['lat'] = [46] rdt['lon'] = [-73] publisher.publish(rdt.to_granule()) self.assertTrue(dataset_monitor.wait()) dataset_monitor.reset() replay_granule = self.data_retriever.retrieve(dataset_id) rdt_out = RecordDictionaryTool.load_from_granule(replay_granule) np.testing.assert_allclose(rdt_out['time'], rdt['time']) for i in xrange(9): ntp_now = time.time() + 2208988800 rdt['time'] = [ntp_now] rdt['internal_timestamp'] = [ntp_now] rdt['temp'] = [300000] rdt['preferred_timestamp'] = ['driver_timestamp'] rdt['port_timestamp'] = [ntp_now] rdt['quality_flag'] = [None] rdt['conductivity'] = [4341400] rdt['driver_timestamp'] = [ntp_now] rdt['pressure'] = [256.8] publisher.publish(rdt.to_granule()) self.assertTrue(dataset_monitor.wait()) dataset_monitor.reset() replay_granule = self.data_retriever.retrieve(dataset_id) rdt_out = RecordDictionaryTool.load_from_granule(replay_granule) np.testing.assert_allclose(rdt_out['pressure'], np.array([256.8] * 10)) np.testing.assert_allclose(rdt_out['lat'], np.array([45] + [46] * 9)) np.testing.assert_allclose(rdt_out['lon'], np.array([-71] + [-73] * 9))
def publish_on_stream(self, stream_id, msg): stream = self.pubsub_management.read_stream(stream_id) stream_route = stream.stream_route publisher = StandaloneStreamPublisher(stream_id=stream_id, stream_route=stream_route) publisher.publish(msg)
class TestTransformWorkerSubscriptions(IonIntegrationTestCase): def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') self.dataset_management_client = DatasetManagementServiceClient( node=self.container.node) self.pubsub_client = PubsubManagementServiceClient( node=self.container.node) self.dataproductclient = DataProductManagementServiceClient( node=self.container.node) self.dataprocessclient = DataProcessManagementServiceClient( node=self.container.node) self.processdispatchclient = ProcessDispatcherServiceClient( node=self.container.node) self.damsclient = DataAcquisitionManagementServiceClient( node=self.container.node) self.rrclient = ResourceRegistryServiceClient(node=self.container.node) self.wait_time = CFG.get_safe('endpoint.receive.timeout', 10) @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode') def test_multi_subscriptions(self): self.dp_list = [] self.event1_verified = Event() self.event2_verified = Event() self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name( name='ctd_parsed_param_dict', id_only=True) # create the StreamDefinition self.stream_def_id = self.pubsub_client.create_stream_definition( name='stream_def', parameter_dictionary_id=self.parameter_dict_id) self.addCleanup(self.pubsub_client.delete_stream_definition, self.stream_def_id) # create the DataProduct input_dp_obj = IonObject(RT.DataProduct, name='input_data_product_one', description='input test stream one') self.input_dp_one_id = self.dataproductclient.create_data_product( data_product=input_dp_obj, stream_definition_id=self.stream_def_id) input_dp_obj = IonObject(RT.DataProduct, name='input_data_product_two', description='input test stream two') self.input_dp_two_id = self.dataproductclient.create_data_product( data_product=input_dp_obj, stream_definition_id=self.stream_def_id) #retrieve the Stream for this data product stream_ids, assoc_ids = self.rrclient.find_objects( self.input_dp_one_id, PRED.hasStream, RT.Stream, True) self.stream_one_id = stream_ids[0] stream_ids, assoc_ids = self.rrclient.find_objects( self.input_dp_two_id, PRED.hasStream, RT.Stream, True) self.stream_two_id = stream_ids[0] dpd_id = self.create_data_process_definition() dp1_func_output_dp_id, dp2_func_output_dp_id = self.create_output_data_products( ) first_dp_id = self.create_data_process_one(dpd_id, dp1_func_output_dp_id) second_dp_id = self.create_data_process_two(dpd_id, self.input_dp_two_id, dp2_func_output_dp_id) #retrieve subscription from data process subscription_objs, _ = self.rrclient.find_objects( subject=first_dp_id, predicate=PRED.hasSubscription, object_type=RT.Subscription, id_only=False) log.debug('test_transform_worker subscription_obj: %s', subscription_objs[0]) #create subscription to stream ONE, create data process and publish granule on stream ONE #create a queue to catch the published granules of stream ONE self.subscription_one_id = self.pubsub_client.create_subscription( name='parsed_subscription_one', stream_ids=[self.stream_one_id], exchange_name=subscription_objs[0].exchange_name) self.addCleanup(self.pubsub_client.delete_subscription, self.subscription_one_id) self.pubsub_client.activate_subscription(self.subscription_one_id) self.addCleanup(self.pubsub_client.deactivate_subscription, self.subscription_one_id) stream_route_one = self.pubsub_client.read_stream_route( self.stream_one_id) self.publisher_one = StandaloneStreamPublisher( stream_id=self.stream_one_id, stream_route=stream_route_one) self.start_event_listener() #data process 1 adds conductivity + pressure and puts the result in salinity rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id) rdt['time'] = [0] # time should always come first rdt['conductivity'] = [1] rdt['pressure'] = [2] rdt['salinity'] = [8] self.publisher_one.publish(msg=rdt.to_granule(), stream_id=self.stream_one_id) #retrieve subscription from data process subscription_objs, _ = self.rrclient.find_objects( subject=second_dp_id, predicate=PRED.hasSubscription, object_type=RT.Subscription, id_only=False) log.debug('test_transform_worker subscription_obj: %s', subscription_objs[0]) #create subscription to stream ONE and TWO, move TW subscription, create data process and publish granule on stream TWO #create a queue to catch the published granules of stream TWO self.subscription_two_id = self.pubsub_client.create_subscription( name='parsed_subscription_one_two', stream_ids=[self.stream_two_id], exchange_name=subscription_objs[0].exchange_name) self.addCleanup(self.pubsub_client.delete_subscription, self.subscription_two_id) self.pubsub_client.activate_subscription(self.subscription_two_id) self.addCleanup(self.pubsub_client.deactivate_subscription, self.subscription_two_id) stream_route_two = self.pubsub_client.read_stream_route( self.stream_two_id) self.publisher_two = StandaloneStreamPublisher( stream_id=self.stream_two_id, stream_route=stream_route_two) #data process 1 adds conductivity + pressure and puts the result in salinity rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id) rdt['time'] = [0] # time should always come first rdt['conductivity'] = [1] rdt['pressure'] = [2] rdt['salinity'] = [8] self.publisher_one.publish(msg=rdt.to_granule(), stream_id=self.stream_one_id) #data process 2 adds salinity + pressure and puts the result in conductivity rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id) rdt['time'] = [0] # time should always come first rdt['conductivity'] = [22] rdt['pressure'] = [4] rdt['salinity'] = [1] self.publisher_two.publish(msg=rdt.to_granule(), stream_id=self.stream_two_id) self.assertTrue(self.event2_verified.wait(self.wait_time)) self.assertTrue(self.event1_verified.wait(self.wait_time)) @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode') def test_two_transforms_inline(self): self.dp_list = [] self.event1_verified = Event() self.event2_verified = Event() self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name( name='ctd_parsed_param_dict', id_only=True) # create the StreamDefinition self.stream_def_id = self.pubsub_client.create_stream_definition( name='stream_def', parameter_dictionary_id=self.parameter_dict_id) self.addCleanup(self.pubsub_client.delete_stream_definition, self.stream_def_id) # create the DataProduct input_dp_obj = IonObject(RT.DataProduct, name='input_data_product_one', description='input test stream one') self.input_dp_one_id = self.dataproductclient.create_data_product( data_product=input_dp_obj, stream_definition_id=self.stream_def_id) dpd_id = self.create_data_process_definition() dp1_func_output_dp_id, dp2_func_output_dp_id = self.create_output_data_products( ) first_dp_id = self.create_data_process_one(dpd_id, dp1_func_output_dp_id) second_dp_id = self.create_data_process_two(dpd_id, dp1_func_output_dp_id, dp2_func_output_dp_id) #retrieve subscription from data process one subscription_objs, _ = self.rrclient.find_objects( subject=first_dp_id, predicate=PRED.hasSubscription, object_type=RT.Subscription, id_only=False) log.debug('test_transform_worker subscription_obj: %s', subscription_objs[0]) #retrieve the Stream for these data product stream_ids, assoc_ids = self.rrclient.find_objects( self.input_dp_one_id, PRED.hasStream, RT.Stream, True) self.stream_one_id = stream_ids[0] #the input to data process two is the output from data process one stream_ids, assoc_ids = self.rrclient.find_objects( dp1_func_output_dp_id, PRED.hasStream, RT.Stream, True) self.stream_two_id = stream_ids[0] # Run provenance on the output dataproduct of the second data process to see all the links # are as expected output_data_product_provenance = self.dataproductclient.get_data_product_provenance( dp2_func_output_dp_id) # Do a basic check to see if there were 2 entries in the provenance graph. Parent and Child. self.assertTrue(len(output_data_product_provenance) == 3) # confirm that the linking from the output dataproduct to input dataproduct is correct self.assertTrue( dp1_func_output_dp_id in output_data_product_provenance[dp2_func_output_dp_id]['parents']) self.assertTrue( self.input_dp_one_id in output_data_product_provenance[dp1_func_output_dp_id]['parents']) #create subscription to stream ONE, create data process and publish granule on stream ONE #create a queue to catch the published granules of stream ONE subscription_id = self.pubsub_client.create_subscription( name='parsed_subscription', stream_ids=[self.stream_one_id, self.stream_two_id], exchange_name=subscription_objs[0].exchange_name) self.addCleanup(self.pubsub_client.delete_subscription, subscription_id) self.pubsub_client.activate_subscription(subscription_id) self.addCleanup(self.pubsub_client.deactivate_subscription, subscription_id) stream_route_one = self.pubsub_client.read_stream_route( self.stream_one_id) self.publisher_one = StandaloneStreamPublisher( stream_id=self.stream_one_id, stream_route=stream_route_one) #retrieve subscription from data process subscription_objs, _ = self.rrclient.find_objects( subject=second_dp_id, predicate=PRED.hasSubscription, object_type=RT.Subscription, id_only=False) log.debug('test_transform_worker subscription_obj: %s', subscription_objs[0]) #data process 1 adds conductivity + pressure and puts the result in salinity #data process 2 adds salinity + pressure and puts the result in conductivity self.start_event_listener() rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id) rdt['time'] = [0] # time should always come first rdt['conductivity'] = [1] rdt['pressure'] = [2] rdt['salinity'] = [8] self.publisher_one.publish(msg=rdt.to_granule(), stream_id=self.stream_one_id) self.assertTrue(self.event2_verified.wait(self.wait_time)) self.assertTrue(self.event1_verified.wait(self.wait_time)) def create_data_process_definition(self): #two data processes using one transform and one DPD # Set up DPD and DP #2 - array add function tf_obj = IonObject( RT.TransformFunction, name='add_array_func', description='adds values in an array', function='add_arrays', module="ion_example.add_arrays", arguments=['arr1', 'arr2'], function_type=TransformFunctionType.TRANSFORM, uri= 'http://sddevrepo.oceanobservatories.org/releases/ion_example-0.1-py2.7.egg' ) add_array_func_id, rev = self.rrclient.create(tf_obj) dpd_obj = IonObject( RT.DataProcessDefinition, name='add_arrays', description='adds the values of two arrays', data_process_type=DataProcessTypeEnum.TRANSFORM_PROCESS, ) add_array_dpd_id = self.dataprocessclient.create_data_process_definition( data_process_definition=dpd_obj, function_id=add_array_func_id) self.dataprocessclient.assign_stream_definition_to_data_process_definition( self.stream_def_id, add_array_dpd_id, binding='add_array_func') return add_array_dpd_id def create_data_process_one(self, data_process_definition_id, output_dataproduct): # Create the data process #data process 1 adds conductivity + pressure and puts the result in salinity argument_map = {"arr1": "conductivity", "arr2": "pressure"} output_param = "salinity" dp1_data_process_id = self.dataprocessclient.create_data_process( data_process_definition_id=data_process_definition_id, inputs=[self.input_dp_one_id], outputs=[output_dataproduct], argument_map=argument_map, out_param_name=output_param) self.damsclient.register_process(dp1_data_process_id) self.addCleanup(self.dataprocessclient.delete_data_process, dp1_data_process_id) self.dp_list.append(dp1_data_process_id) return dp1_data_process_id def create_data_process_two(self, data_process_definition_id, input_dataproduct, output_dataproduct): # Create the data process #data process 2 adds salinity + pressure and puts the result in conductivity argument_map = {'arr1': 'salinity', 'arr2': 'pressure'} output_param = 'conductivity' dp2_func_data_process_id = self.dataprocessclient.create_data_process( data_process_definition_id=data_process_definition_id, inputs=[input_dataproduct], outputs=[output_dataproduct], argument_map=argument_map, out_param_name=output_param) self.damsclient.register_process(dp2_func_data_process_id) self.addCleanup(self.dataprocessclient.delete_data_process, dp2_func_data_process_id) self.dp_list.append(dp2_func_data_process_id) return dp2_func_data_process_id def create_output_data_products(self): dp1_outgoing_stream_id = self.pubsub_client.create_stream_definition( name='dp1_stream', parameter_dictionary_id=self.parameter_dict_id) dp1_output_dp_obj = IonObject(RT.DataProduct, name='data_process1_data_product', description='output of add array func') dp1_func_output_dp_id = self.dataproductclient.create_data_product( dp1_output_dp_obj, dp1_outgoing_stream_id) self.addCleanup(self.dataproductclient.delete_data_product, dp1_func_output_dp_id) # Retrieve the id of the OUTPUT stream from the out Data Product and add to granule logger stream_ids, _ = self.rrclient.find_objects(dp1_func_output_dp_id, PRED.hasStream, None, True) self._output_stream_one_id = stream_ids[0] dp2_func_outgoing_stream_id = self.pubsub_client.create_stream_definition( name='dp2_stream', parameter_dictionary_id=self.parameter_dict_id) dp2_func_output_dp_obj = IonObject( RT.DataProduct, name='data_process2_data_product', description='output of add array func') dp2_func_output_dp_id = self.dataproductclient.create_data_product( dp2_func_output_dp_obj, dp2_func_outgoing_stream_id) self.addCleanup(self.dataproductclient.delete_data_product, dp2_func_output_dp_id) # Retrieve the id of the OUTPUT stream from the out Data Product and add to granule logger stream_ids, _ = self.rrclient.find_objects(dp2_func_output_dp_id, PRED.hasStream, None, True) self._output_stream_two_id = stream_ids[0] subscription_id = self.pubsub_client.create_subscription( 'validator', data_product_ids=[dp1_func_output_dp_id, dp2_func_output_dp_id]) self.addCleanup(self.pubsub_client.delete_subscription, subscription_id) def on_granule(msg, route, stream_id): log.debug('recv_packet stream_id: %s route: %s msg: %s', stream_id, route, msg) self.validate_output_granule(msg, route, stream_id) validator = StandaloneStreamSubscriber('validator', callback=on_granule) validator.start() self.addCleanup(validator.stop) self.pubsub_client.activate_subscription(subscription_id) self.addCleanup(self.pubsub_client.deactivate_subscription, subscription_id) return dp1_func_output_dp_id, dp2_func_output_dp_id def validate_event(self, *args, **kwargs): """ This method is a callback function for receiving DataProcessStatusEvent. """ data_process_event = args[0] log.debug("DataProcessStatusEvent: %s", str(data_process_event.__dict__)) #if data process already created, check origin if not 'data process assigned to transform worker' in data_process_event.description: self.assertIn(data_process_event.origin, self.dp_list) def validate_output_granule(self, msg, route, stream_id): self.assertTrue( stream_id in [self._output_stream_one_id, self._output_stream_two_id]) rdt = RecordDictionaryTool.load_from_granule(msg) log.debug('validate_output_granule stream_id: %s', stream_id) if stream_id == self._output_stream_one_id: sal_val = rdt['salinity'] log.debug('validate_output_granule sal_val: %s', sal_val) np.testing.assert_array_equal(sal_val, np.array([3])) self.event1_verified.set() else: cond_val = rdt['conductivity'] log.debug('validate_output_granule cond_val: %s', cond_val) np.testing.assert_array_equal(cond_val, np.array([5])) self.event2_verified.set() def start_event_listener(self): es = EventSubscriber(event_type=OT.DataProcessStatusEvent, callback=self.validate_event) es.start() self.addCleanup(es.stop)
def test_stream_processing(self): #-------------------------------------------------------------------------------- #Test that streams are processed by the transforms according to a provided algorithm #-------------------------------------------------------------------------------- #todo: In this simple implementation, we are checking if the stream has the word, PUBLISH, #todo(contd) and if the word VALUE=<number> exists and that number is less than something #todo later on we are going to use complex algorithms to make this prototype powerful #------------------------------------------------------------------------------------- # Start a subscriber to listen for an alert event from the Stream Alert Transform #------------------------------------------------------------------------------------- queue = gevent.queue.Queue() def event_received(message, headers): queue.put(message) event_subscriber = EventSubscriber( origin="StreamAlertTransform", event_type="DeviceEvent", callback=event_received) event_subscriber.start() self.addCleanup(event_subscriber.stop) #------------------------------------------------------------------------------------- # The configuration for the Stream Alert Transform... set up the event types to listen to #------------------------------------------------------------------------------------- config = { 'process':{ 'queue_name': 'a_queue', 'value': 10, 'event_type':'DeviceEvent' } } #------------------------------------------------------------------------------------- # Create the process #------------------------------------------------------------------------------------- pid = TransformPrototypeIntTest.create_process( name= 'transform_data_process', module='ion.processes.data.transforms.event_alert_transform', class_name='StreamAlertTransform', configuration= config) self.addCleanup(self.process_dispatcher.cancel_process, pid) self.assertIsNotNone(pid) #------------------------------------------------------------------------------------- # Publish streams and make assertions about alerts #------------------------------------------------------------------------------------- exchange_name = 'a_queue' exchange_point = 'test_exchange' routing_key = 'stream_id.stream' stream_route = StreamRoute(exchange_point, routing_key) xn = self.container.ex_manager.create_xn_queue(exchange_name) xp = self.container.ex_manager.create_xp(exchange_point) xn.bind('stream_id.stream', xp) pub = StandaloneStreamPublisher('stream_id', stream_route) message = "A dummy example message containing the word PUBLISH, and with VALUE = 5 . This message" +\ " will trigger an alert event from the StreamAlertTransform because the value provided is "\ "less than 10 that was passed in through the config." pub.publish(message) event = queue.get(timeout=10) self.assertEquals(event.type_, "DeviceEvent") self.assertEquals(event.origin, "StreamAlertTransform")
def test_multi_subscriptions(self): self.dp_list = [] self.event1_verified = Event() self.event2_verified = Event() self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name( name='ctd_parsed_param_dict', id_only=True) # create the StreamDefinition self.stream_def_id = self.pubsub_client.create_stream_definition( name='stream_def', parameter_dictionary_id=self.parameter_dict_id) self.addCleanup(self.pubsub_client.delete_stream_definition, self.stream_def_id) # create the DataProduct input_dp_obj = IonObject(RT.DataProduct, name='input_data_product_one', description='input test stream one') self.input_dp_one_id = self.dataproductclient.create_data_product( data_product=input_dp_obj, stream_definition_id=self.stream_def_id) input_dp_obj = IonObject(RT.DataProduct, name='input_data_product_two', description='input test stream two') self.input_dp_two_id = self.dataproductclient.create_data_product( data_product=input_dp_obj, stream_definition_id=self.stream_def_id) #retrieve the Stream for this data product stream_ids, assoc_ids = self.rrclient.find_objects( self.input_dp_one_id, PRED.hasStream, RT.Stream, True) self.stream_one_id = stream_ids[0] stream_ids, assoc_ids = self.rrclient.find_objects( self.input_dp_two_id, PRED.hasStream, RT.Stream, True) self.stream_two_id = stream_ids[0] dpd_id = self.create_data_process_definition() dp1_func_output_dp_id, dp2_func_output_dp_id = self.create_output_data_products( ) first_dp_id = self.create_data_process_one(dpd_id, dp1_func_output_dp_id) second_dp_id = self.create_data_process_two(dpd_id, self.input_dp_two_id, dp2_func_output_dp_id) #retrieve subscription from data process subscription_objs, _ = self.rrclient.find_objects( subject=first_dp_id, predicate=PRED.hasSubscription, object_type=RT.Subscription, id_only=False) log.debug('test_transform_worker subscription_obj: %s', subscription_objs[0]) #create subscription to stream ONE, create data process and publish granule on stream ONE #create a queue to catch the published granules of stream ONE self.subscription_one_id = self.pubsub_client.create_subscription( name='parsed_subscription_one', stream_ids=[self.stream_one_id], exchange_name=subscription_objs[0].exchange_name) self.addCleanup(self.pubsub_client.delete_subscription, self.subscription_one_id) self.pubsub_client.activate_subscription(self.subscription_one_id) self.addCleanup(self.pubsub_client.deactivate_subscription, self.subscription_one_id) stream_route_one = self.pubsub_client.read_stream_route( self.stream_one_id) self.publisher_one = StandaloneStreamPublisher( stream_id=self.stream_one_id, stream_route=stream_route_one) self.start_event_listener() #data process 1 adds conductivity + pressure and puts the result in salinity rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id) rdt['time'] = [0] # time should always come first rdt['conductivity'] = [1] rdt['pressure'] = [2] rdt['salinity'] = [8] self.publisher_one.publish(msg=rdt.to_granule(), stream_id=self.stream_one_id) #retrieve subscription from data process subscription_objs, _ = self.rrclient.find_objects( subject=second_dp_id, predicate=PRED.hasSubscription, object_type=RT.Subscription, id_only=False) log.debug('test_transform_worker subscription_obj: %s', subscription_objs[0]) #create subscription to stream ONE and TWO, move TW subscription, create data process and publish granule on stream TWO #create a queue to catch the published granules of stream TWO self.subscription_two_id = self.pubsub_client.create_subscription( name='parsed_subscription_one_two', stream_ids=[self.stream_two_id], exchange_name=subscription_objs[0].exchange_name) self.addCleanup(self.pubsub_client.delete_subscription, self.subscription_two_id) self.pubsub_client.activate_subscription(self.subscription_two_id) self.addCleanup(self.pubsub_client.deactivate_subscription, self.subscription_two_id) stream_route_two = self.pubsub_client.read_stream_route( self.stream_two_id) self.publisher_two = StandaloneStreamPublisher( stream_id=self.stream_two_id, stream_route=stream_route_two) #data process 1 adds conductivity + pressure and puts the result in salinity rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id) rdt['time'] = [0] # time should always come first rdt['conductivity'] = [1] rdt['pressure'] = [2] rdt['salinity'] = [8] self.publisher_one.publish(msg=rdt.to_granule(), stream_id=self.stream_one_id) #data process 2 adds salinity + pressure and puts the result in conductivity rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id) rdt['time'] = [0] # time should always come first rdt['conductivity'] = [22] rdt['pressure'] = [4] rdt['salinity'] = [1] self.publisher_two.publish(msg=rdt.to_granule(), stream_id=self.stream_two_id) self.assertTrue(self.event2_verified.wait(self.wait_time)) self.assertTrue(self.event1_verified.wait(self.wait_time))
def test_ctdbp_L0_all(self): """ Test packets processed by the ctdbp_L0_all transform """ #----------- Data Process Definition -------------------------------- dpd_obj = IonObject( RT.DataProcessDefinition, name='CTDBP_L0_all', description= 'Take parsed stream and put the C, T and P into three separate L0 streams.', module='ion.processes.data.transforms.ctdbp.ctdbp_L0', class_name='CTDBP_L0_all') dprocdef_id = self.data_process_management.create_data_process_definition( dpd_obj) self.addCleanup( self.data_process_management.delete_data_process_definition, dprocdef_id) log.debug("created data process definition: id = %s", dprocdef_id) #----------- Data Products -------------------------------- # Construct temporal and spatial Coordinate Reference System objects tdom, sdom = time_series_domain() sdom = sdom.dump() tdom = tdom.dump() input_param_dict = self._create_input_param_dict_for_test( parameter_dict_name='fictitious_ctdp_param_dict') # Get the stream definition for the stream using the parameter dictionary # input_param_dict = self.dataset_management.read_parameter_dictionary_by_name('ctdbp_cdef_sample', id_only=True) input_stream_def_dict = self.pubsub.create_stream_definition( name='parsed', parameter_dictionary_id=input_param_dict) self.addCleanup(self.pubsub.delete_stream_definition, input_stream_def_dict) log.debug("Got the parsed parameter dictionary: id: %s", input_param_dict) log.debug("Got the stream def for parsed input: %s", input_stream_def_dict) # Input data product parsed_stream_dp_obj = IonObject( RT.DataProduct, name='parsed_stream', description='Parsed stream input to CTBP L0 transform', temporal_domain=tdom, spatial_domain=sdom) input_dp_id = self.dataproduct_management.create_data_product( data_product=parsed_stream_dp_obj, stream_definition_id=input_stream_def_dict) self.addCleanup(self.dataproduct_management.delete_data_product, input_dp_id) # output data product L0_stream_dp_obj = IonObject( RT.DataProduct, name='L0_stream', description='L0_stream output of CTBP L0 transform', temporal_domain=tdom, spatial_domain=sdom) L0_stream_dp_id = self.dataproduct_management.create_data_product( data_product=L0_stream_dp_obj, stream_definition_id=input_stream_def_dict) self.addCleanup(self.dataproduct_management.delete_data_product, L0_stream_dp_id) # We need the key name here to be "L0_stream", since when the data process is launched, this name goes into # the config as in config.process.publish_streams.L0_stream when the config is used to launch the data process out_stream_ids, _ = self.resource_registry.find_objects( L0_stream_dp_id, PRED.hasStream, RT.Stream, True) self.assertTrue(len(out_stream_ids)) output_stream_id = out_stream_ids[0] dproc_id = self.data_process_management.create_data_process( data_process_definition_id=dprocdef_id, in_data_product_ids=[input_dp_id], out_data_product_ids=[L0_stream_dp_id], configuration=None) self.addCleanup(self.data_process_management.delete_data_process, dproc_id) log.debug("Created a data process for ctdbp_L0. id: %s", dproc_id) # Activate the data process self.data_process_management.activate_data_process(dproc_id) self.addCleanup(self.data_process_management.deactivate_data_process, dproc_id) #----------- Find the stream that is associated with the input data product when it was created by create_data_product() -------------------------------- stream_ids, _ = self.resource_registry.find_objects( input_dp_id, PRED.hasStream, RT.Stream, True) self.assertTrue(len(stream_ids)) input_stream_id = stream_ids[0] stream_route = self.pubsub.read_stream_route(input_stream_id) log.debug("The input stream for the L0 transform: %s", input_stream_id) #----------- Create a subscriber that will listen to the transform's output -------------------------------- ar = gevent.event.AsyncResult() def subscriber(m, r, s): ar.set(m) sub = StandaloneStreamSubscriber(exchange_name='sub', callback=subscriber) sub_id = self.pubsub.create_subscription('subscriber_to_transform', stream_ids=[output_stream_id], exchange_name='sub') self.addCleanup(self.pubsub.delete_subscription, sub_id) self.pubsub.activate_subscription(sub_id) self.addCleanup(self.pubsub.deactivate_subscription, sub_id) sub.start() self.addCleanup(sub.stop) #----------- Publish on that stream so that the transform can receive it -------------------------------- pub = StandaloneStreamPublisher(input_stream_id, stream_route) publish_granule = self._get_new_ctd_packet( stream_definition_id=input_stream_def_dict, length=5) pub.publish(publish_granule) log.debug("Published the following granule: %s", publish_granule) granule_from_transform = ar.get(timeout=20) log.debug("Got the following granule from the transform: %s", granule_from_transform) # Check that the granule published by the L0 transform has the right properties self._check_granule_from_transform(granule_from_transform)
def test_two_transforms_inline(self): self.dp_list = [] self.event1_verified = Event() self.event2_verified = Event() self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name( name='ctd_parsed_param_dict', id_only=True) # create the StreamDefinition self.stream_def_id = self.pubsub_client.create_stream_definition( name='stream_def', parameter_dictionary_id=self.parameter_dict_id) self.addCleanup(self.pubsub_client.delete_stream_definition, self.stream_def_id) # create the DataProduct input_dp_obj = IonObject(RT.DataProduct, name='input_data_product_one', description='input test stream one') self.input_dp_one_id = self.dataproductclient.create_data_product( data_product=input_dp_obj, stream_definition_id=self.stream_def_id) dpd_id = self.create_data_process_definition() dp1_func_output_dp_id, dp2_func_output_dp_id = self.create_output_data_products( ) first_dp_id = self.create_data_process_one(dpd_id, dp1_func_output_dp_id) second_dp_id = self.create_data_process_two(dpd_id, dp1_func_output_dp_id, dp2_func_output_dp_id) #retrieve subscription from data process one subscription_objs, _ = self.rrclient.find_objects( subject=first_dp_id, predicate=PRED.hasSubscription, object_type=RT.Subscription, id_only=False) log.debug('test_transform_worker subscription_obj: %s', subscription_objs[0]) #retrieve the Stream for these data product stream_ids, assoc_ids = self.rrclient.find_objects( self.input_dp_one_id, PRED.hasStream, RT.Stream, True) self.stream_one_id = stream_ids[0] #the input to data process two is the output from data process one stream_ids, assoc_ids = self.rrclient.find_objects( dp1_func_output_dp_id, PRED.hasStream, RT.Stream, True) self.stream_two_id = stream_ids[0] # Run provenance on the output dataproduct of the second data process to see all the links # are as expected output_data_product_provenance = self.dataproductclient.get_data_product_provenance( dp2_func_output_dp_id) # Do a basic check to see if there were 2 entries in the provenance graph. Parent and Child. self.assertTrue(len(output_data_product_provenance) == 3) # confirm that the linking from the output dataproduct to input dataproduct is correct self.assertTrue( dp1_func_output_dp_id in output_data_product_provenance[dp2_func_output_dp_id]['parents']) self.assertTrue( self.input_dp_one_id in output_data_product_provenance[dp1_func_output_dp_id]['parents']) #create subscription to stream ONE, create data process and publish granule on stream ONE #create a queue to catch the published granules of stream ONE subscription_id = self.pubsub_client.create_subscription( name='parsed_subscription', stream_ids=[self.stream_one_id, self.stream_two_id], exchange_name=subscription_objs[0].exchange_name) self.addCleanup(self.pubsub_client.delete_subscription, subscription_id) self.pubsub_client.activate_subscription(subscription_id) self.addCleanup(self.pubsub_client.deactivate_subscription, subscription_id) stream_route_one = self.pubsub_client.read_stream_route( self.stream_one_id) self.publisher_one = StandaloneStreamPublisher( stream_id=self.stream_one_id, stream_route=stream_route_one) #retrieve subscription from data process subscription_objs, _ = self.rrclient.find_objects( subject=second_dp_id, predicate=PRED.hasSubscription, object_type=RT.Subscription, id_only=False) log.debug('test_transform_worker subscription_obj: %s', subscription_objs[0]) #data process 1 adds conductivity + pressure and puts the result in salinity #data process 2 adds salinity + pressure and puts the result in conductivity self.start_event_listener() rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id) rdt['time'] = [0] # time should always come first rdt['conductivity'] = [1] rdt['pressure'] = [2] rdt['salinity'] = [8] self.publisher_one.publish(msg=rdt.to_granule(), stream_id=self.stream_one_id) self.assertTrue(self.event2_verified.wait(self.wait_time)) self.assertTrue(self.event1_verified.wait(self.wait_time))
def test_transform_worker(self): # test that a data process (type: data-product-in / data-product-out) can be defined and launched. # verify that the output granule fields are correctly populated # test that the input and output data products are linked to facilitate provenance self.dp_list = [] self.data_process_objs = [] self._output_stream_ids = [] self.granule_verified = Event() self.worker_assigned_event_verified = Event() self.dp_created_event_verified = Event() self.heartbeat_event_verified = Event() self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name( name='ctd_parsed_param_dict', id_only=True) # create the StreamDefinition self.stream_def_id = self.pubsub_client.create_stream_definition( name='stream_def', parameter_dictionary_id=self.parameter_dict_id) self.addCleanup(self.pubsub_client.delete_stream_definition, self.stream_def_id) # create the DataProduct that is the input to the data processes input_dp_obj = IonObject(RT.DataProduct, name='input_data_product', description='input test stream') self.input_dp_id = self.dataproductclient.create_data_product( data_product=input_dp_obj, stream_definition_id=self.stream_def_id) # retrieve the Stream for this data product stream_ids, assoc_ids = self.rrclient.find_objects( self.input_dp_id, PRED.hasStream, RT.Stream, True) self.stream_id = stream_ids[0] self.start_event_listener() # create the DPD, DataProcess and output DataProduct dataprocessdef_id, dataprocess_id, dataproduct_id = self.create_data_process( ) self.dp_list.append(dataprocess_id) # validate the repository for data product algorithms persists the new resources NEW SA-1 # create_data_process call created one of each dpd_ids, _ = self.rrclient.find_resources( restype=OT.DataProcessDefinition, id_only=False) # there will be more than one becuase of the DPDs that reperesent the PFs in the data product above self.assertTrue(dpd_ids is not None) dp_ids, _ = self.rrclient.find_resources(restype=OT.DataProcess, id_only=False) # only one DP becuase the PFs that are in the code dataproduct above are not activated yet. self.assertEquals(len(dp_ids), 1) # validate the name and version label NEW SA - 2 dataprocessdef_obj = self.dataprocessclient.read_data_process_definition( dataprocessdef_id) self.assertEqual(dataprocessdef_obj.version_label, '1.0a') self.assertEqual(dataprocessdef_obj.name, 'add_arrays') # validate that the DPD has an attachment NEW SA - 21 attachment_ids, assoc_ids = self.rrclient.find_objects( dataprocessdef_id, PRED.hasAttachment, RT.Attachment, True) self.assertEqual(len(attachment_ids), 1) attachment_obj = self.rrclient.read_attachment(attachment_ids[0]) log.debug('attachment: %s', attachment_obj) # validate that the data process resource has input and output data products associated # L4-CI-SA-RQ-364 and NEW SA-3 outproduct_ids, assoc_ids = self.rrclient.find_objects( dataprocess_id, PRED.hasOutputProduct, RT.DataProduct, True) self.assertEqual(len(outproduct_ids), 1) inproduct_ids, assoc_ids = self.rrclient.find_objects( dataprocess_id, PRED.hasInputProduct, RT.DataProduct, True) self.assertEqual(len(inproduct_ids), 1) # Test for provenance. Get Data product produced by the data processes output_data_product_id, _ = self.rrclient.find_objects( subject=dataprocess_id, object_type=RT.DataProduct, predicate=PRED.hasOutputProduct, id_only=True) output_data_product_provenance = self.dataproductclient.get_data_product_provenance( output_data_product_id[0]) # Do a basic check to see if there were 3 entries in the provenance graph. Parent and Child and the # DataProcessDefinition creating the child from the parent. self.assertTrue(len(output_data_product_provenance) == 2) self.assertTrue(self.input_dp_id in output_data_product_provenance[ output_data_product_id[0]]['parents']) self.assertTrue(output_data_product_provenance[ output_data_product_id[0]]['parents'][self.input_dp_id] ['data_process_definition_id'] == dataprocessdef_id) # NEW SA - 4 | Data processing shall include the appropriate data product algorithm name and version number in # the metadata of each output data product created by the data product algorithm. output_data_product_obj, _ = self.rrclient.find_objects( subject=dataprocess_id, object_type=RT.DataProduct, predicate=PRED.hasOutputProduct, id_only=False) self.assertTrue(output_data_product_obj[0].name != None) self.assertTrue(output_data_product_obj[0]._rev != None) # retrieve subscription from data process subscription_objs, _ = self.rrclient.find_objects( subject=dataprocess_id, predicate=PRED.hasSubscription, object_type=RT.Subscription, id_only=False) log.debug('test_transform_worker subscription_obj: %s', subscription_objs[0]) # create a queue to catch the published granules self.subscription_id = self.pubsub_client.create_subscription( name='parsed_subscription', stream_ids=[self.stream_id], exchange_name=subscription_objs[0].exchange_name) self.addCleanup(self.pubsub_client.delete_subscription, self.subscription_id) self.pubsub_client.activate_subscription(self.subscription_id) self.addCleanup(self.pubsub_client.deactivate_subscription, self.subscription_id) stream_route = self.pubsub_client.read_stream_route(self.stream_id) self.publisher = StandaloneStreamPublisher(stream_id=self.stream_id, stream_route=stream_route) for n in range(1, 101): rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id) rdt['time'] = [0] # time should always come first rdt['conductivity'] = [1] rdt['pressure'] = [2] rdt['salinity'] = [8] self.publisher.publish(rdt.to_granule()) # validate that the output granule is received and the updated value is correct self.assertTrue(self.granule_verified.wait(self.wait_time)) # validate that the data process loaded into worker event is received (L4-CI-SA-RQ-182) self.assertTrue( self.worker_assigned_event_verified.wait(self.wait_time)) # validate that the data process create (with data product ids) event is received (NEW SA -42) self.assertTrue(self.dp_created_event_verified.wait(self.wait_time)) # validate that the data process heartbeat event is received (for every hundred granules processed) (L4-CI-SA-RQ-182) #this takes a while so set wait limit to large value self.assertTrue(self.heartbeat_event_verified.wait(200)) # validate that the code from the transform function can be retrieve via inspect_data_process_definition src = self.dataprocessclient.inspect_data_process_definition( dataprocessdef_id) self.assertIn('def add_arrays(a, b)', src) # now delete the DPD and DP then verify that the resources are retired so that information required for provenance are still available self.dataprocessclient.delete_data_process(dataprocess_id) self.dataprocessclient.delete_data_process_definition( dataprocessdef_id) in_dp_objs, _ = self.rrclient.find_objects( subject=dataprocess_id, predicate=PRED.hasInputProduct, object_type=RT.DataProduct, id_only=True) self.assertTrue(in_dp_objs is not None) dpd_objs, _ = self.rrclient.find_subjects( subject_type=RT.DataProcessDefinition, predicate=PRED.hasDataProcess, object=dataprocess_id, id_only=True) self.assertTrue(dpd_objs is not None)
def test_event_transform_worker(self): self.data_process_objs = [] self._output_stream_ids = [] self.event_verified = Event() # test that a data process (type: data-product-in / event-out) can be defined and launched. # verify that event fields are correctly populated self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name( name='ctd_parsed_param_dict', id_only=True) # create the StreamDefinition self.stream_def_id = self.pubsub_client.create_stream_definition( name='stream_def', parameter_dictionary_id=self.parameter_dict_id) self.addCleanup(self.pubsub_client.delete_stream_definition, self.stream_def_id) # create the DataProduct input_dp_obj = IonObject(RT.DataProduct, name='input_data_product', description='input test stream') self.input_dp_id = self.dataproductclient.create_data_product( data_product=input_dp_obj, stream_definition_id=self.stream_def_id) # retrieve the Stream for this data product stream_ids, assoc_ids = self.rrclient.find_objects( self.input_dp_id, PRED.hasStream, RT.Stream, True) self.stream_id = stream_ids[0] # create the DPD and two DPs self.event_data_process_id = self.create_event_data_processes() # retrieve subscription from data process subscription_objs, _ = self.rrclient.find_objects( subject=self.event_data_process_id, predicate=PRED.hasSubscription, object_type=RT.Subscription, id_only=False) log.debug('test_event_transform_worker subscription_obj: %s', subscription_objs[0]) # create a queue to catch the published granules self.subscription_id = self.pubsub_client.create_subscription( name='parsed_subscription', stream_ids=[self.stream_id], exchange_name=subscription_objs[0].exchange_name) self.addCleanup(self.pubsub_client.delete_subscription, self.subscription_id) self.pubsub_client.activate_subscription(self.subscription_id) self.addCleanup(self.pubsub_client.deactivate_subscription, self.subscription_id) stream_route = self.pubsub_client.read_stream_route(self.stream_id) self.publisher = StandaloneStreamPublisher(stream_id=self.stream_id, stream_route=stream_route) self.start_event_transform_listener() self.data_modified = Event() rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id) rdt['time'] = [0] # time should always come first rdt['conductivity'] = [1] rdt['pressure'] = [2] rdt['salinity'] = [8] self.publisher.publish(rdt.to_granule()) self.assertTrue(self.event_verified.wait(self.wait_time))
def execute_acquire_sample(self, *args): """ Creates a copy of self._dh_config, creates a publisher, and spawns a greenlet to perform a data acquisition cycle If the args[0] is a dict, any entries keyed with one of the 'PATCHABLE_CONFIG_KEYS' are used to patch the config Greenlet binds to BaseDataHandler._acquire_sample and passes the publisher and config Disallows multiple "new data" (unconstrained) requests using BaseDataHandler._semaphore lock Called from: InstrumentAgent._handler_observatory_execute_resource |--> ExternalDataAgent._handler_streaming_execute_resource @parameter args First argument can be a config dictionary @throws IndexError if first argument is not a dictionary @throws ConfigurationError if required members aren't present @retval New ResourceAgentState (COMMAND) """ log.debug('Executing acquire_sample: args = {0}'.format(args)) # Make a copy of the config to ensure no cross-pollution config = self._dh_config.copy() # Patch the config if mods are passed in try: config_mods = args[0] if not isinstance(config_mods, dict): raise IndexError() log.debug('Configuration modifications provided: {0}'.format(config_mods)) for k in self._params['PATCHABLE_CONFIG_KEYS']: p = get_safe(config_mods, k) if not p is None: config[k] = p except IndexError: log.info('No configuration modifications were provided') # Verify that there is a stream_id member in the config stream_id = get_safe(config, 'stream_id', None) if not stream_id: raise ConfigurationError('Configuration does not contain required \'stream_id\' member') stream_route = get_safe(config, 'stream_route', None) if not stream_route: raise ConfigurationError('Configuration does not contain required \'stream_route\' member') isNew = get_safe(config, 'constraints') is None if isNew and not self._semaphore.acquire(blocking=False): log.warn('Already acquiring new data - action not duplicated') return ndc = None if isNew: # Get the NewDataCheck attachment and add it's content to the config ext_ds_id = get_safe(config, 'external_dataset_res_id') if ext_ds_id: ndc = self._find_new_data_check_attachment(ext_ds_id) config['new_data_check'] = ndc # Create a publisher to pass into the greenlet publisher = StandaloneStreamPublisher(stream_id=stream_id, stream_route=stream_route) # Spawn a greenlet to do the data acquisition and publishing g = spawn(self._acquire_sample, config, publisher, self._unlock_new_data_callback, self._update_new_data_check_attachment) log.debug('** Spawned {0}'.format(g)) self._glet_queue.append(g) return ResourceAgentState.COMMAND, None
class TestTransformWorker(IonIntegrationTestCase): def setUp(self): self._start_container() self.container.start_rel_from_url('res/deploy/r2deploy.yml') # Instantiate a process to represent the test process = TransformWorkerTestProcess() self.dataset_management_client = DatasetManagementServiceClient( node=self.container.node) self.pubsub_client = PubsubManagementServiceClient( node=self.container.node) self.dataproductclient = DataProductManagementServiceClient( node=self.container.node) self.dataprocessclient = DataProcessManagementServiceClient( node=self.container.node) self.processdispatchclient = ProcessDispatcherServiceClient( node=self.container.node) self.damsclient = DataAcquisitionManagementServiceClient( node=self.container.node) self.rrclient = ResourceRegistryServiceClient(node=self.container.node) self.imsclient = InstrumentManagementServiceProcessClient( node=self.container.node, process=process) self.time_dom, self.spatial_dom = time_series_domain() self.ph = ParameterHelper(self.dataset_management_client, self.addCleanup) self.wait_time = CFG.get_safe('endpoint.receive.timeout', 10) def push_granule(self, data_product_id): ''' Publishes and monitors that the granule arrived ''' datasets, _ = self.rrclient.find_objects(data_product_id, PRED.hasDataset, id_only=True) dataset_monitor = DatasetMonitor(datasets[0]) rdt = self.ph.rdt_for_data_product(data_product_id) self.ph.fill_parsed_rdt(rdt) self.ph.publish_rdt_to_data_product(data_product_id, rdt) assert dataset_monitor.wait() dataset_monitor.stop() @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode') def test_transform_worker(self): # test that a data process (type: data-product-in / data-product-out) can be defined and launched. # verify that the output granule fields are correctly populated # test that the input and output data products are linked to facilitate provenance self.dp_list = [] self.data_process_objs = [] self._output_stream_ids = [] self.granule_verified = Event() self.worker_assigned_event_verified = Event() self.dp_created_event_verified = Event() self.heartbeat_event_verified = Event() self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name( name='ctd_parsed_param_dict', id_only=True) # create the StreamDefinition self.stream_def_id = self.pubsub_client.create_stream_definition( name='stream_def', parameter_dictionary_id=self.parameter_dict_id) self.addCleanup(self.pubsub_client.delete_stream_definition, self.stream_def_id) # create the DataProduct that is the input to the data processes input_dp_obj = IonObject(RT.DataProduct, name='input_data_product', description='input test stream') self.input_dp_id = self.dataproductclient.create_data_product( data_product=input_dp_obj, stream_definition_id=self.stream_def_id) # retrieve the Stream for this data product stream_ids, assoc_ids = self.rrclient.find_objects( self.input_dp_id, PRED.hasStream, RT.Stream, True) self.stream_id = stream_ids[0] self.start_event_listener() # create the DPD, DataProcess and output DataProduct dataprocessdef_id, dataprocess_id, dataproduct_id = self.create_data_process( ) self.dp_list.append(dataprocess_id) # validate the repository for data product algorithms persists the new resources NEW SA-1 # create_data_process call created one of each dpd_ids, _ = self.rrclient.find_resources( restype=OT.DataProcessDefinition, id_only=False) # there will be more than one becuase of the DPDs that reperesent the PFs in the data product above self.assertTrue(dpd_ids is not None) dp_ids, _ = self.rrclient.find_resources(restype=OT.DataProcess, id_only=False) # only one DP becuase the PFs that are in the code dataproduct above are not activated yet. self.assertEquals(len(dp_ids), 1) # validate the name and version label NEW SA - 2 dataprocessdef_obj = self.dataprocessclient.read_data_process_definition( dataprocessdef_id) self.assertEqual(dataprocessdef_obj.version_label, '1.0a') self.assertEqual(dataprocessdef_obj.name, 'add_arrays') # validate that the DPD has an attachment NEW SA - 21 attachment_ids, assoc_ids = self.rrclient.find_objects( dataprocessdef_id, PRED.hasAttachment, RT.Attachment, True) self.assertEqual(len(attachment_ids), 1) attachment_obj = self.rrclient.read_attachment(attachment_ids[0]) log.debug('attachment: %s', attachment_obj) # validate that the data process resource has input and output data products associated # L4-CI-SA-RQ-364 and NEW SA-3 outproduct_ids, assoc_ids = self.rrclient.find_objects( dataprocess_id, PRED.hasOutputProduct, RT.DataProduct, True) self.assertEqual(len(outproduct_ids), 1) inproduct_ids, assoc_ids = self.rrclient.find_objects( dataprocess_id, PRED.hasInputProduct, RT.DataProduct, True) self.assertEqual(len(inproduct_ids), 1) # Test for provenance. Get Data product produced by the data processes output_data_product_id, _ = self.rrclient.find_objects( subject=dataprocess_id, object_type=RT.DataProduct, predicate=PRED.hasOutputProduct, id_only=True) output_data_product_provenance = self.dataproductclient.get_data_product_provenance( output_data_product_id[0]) # Do a basic check to see if there were 3 entries in the provenance graph. Parent and Child and the # DataProcessDefinition creating the child from the parent. self.assertTrue(len(output_data_product_provenance) == 2) self.assertTrue(self.input_dp_id in output_data_product_provenance[ output_data_product_id[0]]['parents']) self.assertTrue(output_data_product_provenance[ output_data_product_id[0]]['parents'][self.input_dp_id] ['data_process_definition_id'] == dataprocessdef_id) # NEW SA - 4 | Data processing shall include the appropriate data product algorithm name and version number in # the metadata of each output data product created by the data product algorithm. output_data_product_obj, _ = self.rrclient.find_objects( subject=dataprocess_id, object_type=RT.DataProduct, predicate=PRED.hasOutputProduct, id_only=False) self.assertTrue(output_data_product_obj[0].name != None) self.assertTrue(output_data_product_obj[0]._rev != None) # retrieve subscription from data process subscription_objs, _ = self.rrclient.find_objects( subject=dataprocess_id, predicate=PRED.hasSubscription, object_type=RT.Subscription, id_only=False) log.debug('test_transform_worker subscription_obj: %s', subscription_objs[0]) # create a queue to catch the published granules self.subscription_id = self.pubsub_client.create_subscription( name='parsed_subscription', stream_ids=[self.stream_id], exchange_name=subscription_objs[0].exchange_name) self.addCleanup(self.pubsub_client.delete_subscription, self.subscription_id) self.pubsub_client.activate_subscription(self.subscription_id) self.addCleanup(self.pubsub_client.deactivate_subscription, self.subscription_id) stream_route = self.pubsub_client.read_stream_route(self.stream_id) self.publisher = StandaloneStreamPublisher(stream_id=self.stream_id, stream_route=stream_route) for n in range(1, 101): rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id) rdt['time'] = [0] # time should always come first rdt['conductivity'] = [1] rdt['pressure'] = [2] rdt['salinity'] = [8] self.publisher.publish(rdt.to_granule()) # validate that the output granule is received and the updated value is correct self.assertTrue(self.granule_verified.wait(self.wait_time)) # validate that the data process loaded into worker event is received (L4-CI-SA-RQ-182) self.assertTrue( self.worker_assigned_event_verified.wait(self.wait_time)) # validate that the data process create (with data product ids) event is received (NEW SA -42) self.assertTrue(self.dp_created_event_verified.wait(self.wait_time)) # validate that the data process heartbeat event is received (for every hundred granules processed) (L4-CI-SA-RQ-182) #this takes a while so set wait limit to large value self.assertTrue(self.heartbeat_event_verified.wait(200)) # validate that the code from the transform function can be retrieve via inspect_data_process_definition src = self.dataprocessclient.inspect_data_process_definition( dataprocessdef_id) self.assertIn('def add_arrays(a, b)', src) # now delete the DPD and DP then verify that the resources are retired so that information required for provenance are still available self.dataprocessclient.delete_data_process(dataprocess_id) self.dataprocessclient.delete_data_process_definition( dataprocessdef_id) in_dp_objs, _ = self.rrclient.find_objects( subject=dataprocess_id, predicate=PRED.hasInputProduct, object_type=RT.DataProduct, id_only=True) self.assertTrue(in_dp_objs is not None) dpd_objs, _ = self.rrclient.find_subjects( subject_type=RT.DataProcessDefinition, predicate=PRED.hasDataProcess, object=dataprocess_id, id_only=True) self.assertTrue(dpd_objs is not None) @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode') def test_transform_worker_with_instrumentdevice(self): # test that a data process (type: data-product-in / data-product-out) can be defined and launched. # verify that the output granule fields are correctly populated # test that the input and output data products are linked to facilitate provenance self.data_process_objs = [] self._output_stream_ids = [] self.event_verified = Event() # Create CTD Parsed as the initial data product # create a stream definition for the data from the ctd simulator self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name( 'ctd_parsed_param_dict', id_only=True) self.stream_def_id = self.pubsub_client.create_stream_definition( name='stream_def', parameter_dictionary_id=self.parameter_dict_id) # create the DataProduct that is the input to the data processes input_dp_obj = IonObject(RT.DataProduct, name='input_data_product', description='input test stream') self.input_dp_id = self.dataproductclient.create_data_product( data_product=input_dp_obj, stream_definition_id=self.stream_def_id) # retrieve the Stream for this data product stream_ids, assoc_ids = self.rrclient.find_objects( self.input_dp_id, PRED.hasStream, RT.Stream, True) self.stream_id = stream_ids[0] log.debug('new ctd_parsed_data_product_id = %s' % self.input_dp_id) # only ever need one device for testing purposes. instDevice_obj, _ = self.rrclient.find_resources( restype=RT.InstrumentDevice, name='test_ctd_device') if instDevice_obj: instDevice_id = instDevice_obj[0]._id else: instDevice_obj = IonObject(RT.InstrumentDevice, name='test_ctd_device', description="test_ctd_device", serial_number="12345") instDevice_id = self.imsclient.create_instrument_device( instrument_device=instDevice_obj) self.damsclient.assign_data_product(input_resource_id=instDevice_id, data_product_id=self.input_dp_id) # create the DPD, DataProcess and output DataProduct dataprocessdef_id, dataprocess_id, dataproduct_id = self.create_data_process( ) self.addCleanup(self.dataprocessclient.delete_data_process, dataprocess_id) self.addCleanup(self.dataprocessclient.delete_data_process_definition, dataprocessdef_id) # Test for provenance. Get Data product produced by the data processes output_data_product_id, _ = self.rrclient.find_objects( subject=dataprocess_id, object_type=RT.DataProduct, predicate=PRED.hasOutputProduct, id_only=True) output_data_product_provenance = self.dataproductclient.get_data_product_provenance( output_data_product_id[0]) # Do a basic check to see if there were 3 entries in the provenance graph. Parent and Child and the # DataProcessDefinition creating the child from the parent. self.assertTrue(len(output_data_product_provenance) == 3) self.assertTrue(self.input_dp_id in output_data_product_provenance[ output_data_product_id[0]]['parents']) self.assertTrue(instDevice_id in output_data_product_provenance[ self.input_dp_id]['parents']) self.assertTrue(output_data_product_provenance[instDevice_id]['type'] == 'InstrumentDevice') @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode') def test_transform_worker_with_platformdevice(self): # test that a data process (type: data-product-in / data-product-out) can be defined and launched. # verify that the output granule fields are correctly populated # test that the input and output data products are linked to facilitate provenance self.data_process_objs = [] self._output_stream_ids = [] self.event_verified = Event() # Create CTD Parsed as the initial data product # create a stream definition for the data from the ctd simulator self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name( 'ctd_parsed_param_dict', id_only=True) self.stream_def_id = self.pubsub_client.create_stream_definition( name='stream_def', parameter_dictionary_id=self.parameter_dict_id) # create the DataProduct that is the input to the data processes input_dp_obj = IonObject(RT.DataProduct, name='input_data_product', description='input test stream') self.input_dp_id = self.dataproductclient.create_data_product( data_product=input_dp_obj, stream_definition_id=self.stream_def_id) # retrieve the Stream for this data product stream_ids, assoc_ids = self.rrclient.find_objects( self.input_dp_id, PRED.hasStream, RT.Stream, True) self.stream_id = stream_ids[0] log.debug('new ctd_parsed_data_product_id = %s' % self.input_dp_id) # only ever need one device for testing purposes. platform_device_obj, _ = self.rrclient.find_resources( restype=RT.PlatformDevice, name='TestPlatform') if platform_device_obj: platform_device_id = platform_device_obj[0]._id else: platform_device_obj = IonObject(RT.PlatformDevice, name='TestPlatform', description="TestPlatform", serial_number="12345") platform_device_id = self.imsclient.create_platform_device( platform_device=platform_device_obj) self.damsclient.assign_data_product( input_resource_id=platform_device_id, data_product_id=self.input_dp_id) # create the DPD, DataProcess and output DataProduct dataprocessdef_id, dataprocess_id, dataproduct_id = self.create_data_process( ) self.addCleanup(self.dataprocessclient.delete_data_process, dataprocess_id) self.addCleanup(self.dataprocessclient.delete_data_process_definition, dataprocessdef_id) # Test for provenance. Get Data product produced by the data processes output_data_product_id, _ = self.rrclient.find_objects( subject=dataprocess_id, object_type=RT.DataProduct, predicate=PRED.hasOutputProduct, id_only=True) output_data_product_provenance = self.dataproductclient.get_data_product_provenance( output_data_product_id[0]) # Do a basic check to see if there were 3 entries in the provenance graph. Parent and Child and the # DataProcessDefinition creating the child from the parent. self.assertTrue(len(output_data_product_provenance) == 3) self.assertTrue(self.input_dp_id in output_data_product_provenance[ output_data_product_id[0]]['parents']) self.assertTrue(platform_device_id in output_data_product_provenance[ self.input_dp_id]['parents']) self.assertTrue(output_data_product_provenance[platform_device_id] ['type'] == 'PlatformDevice') @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode') def test_event_transform_worker(self): self.data_process_objs = [] self._output_stream_ids = [] self.event_verified = Event() # test that a data process (type: data-product-in / event-out) can be defined and launched. # verify that event fields are correctly populated self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name( name='ctd_parsed_param_dict', id_only=True) # create the StreamDefinition self.stream_def_id = self.pubsub_client.create_stream_definition( name='stream_def', parameter_dictionary_id=self.parameter_dict_id) self.addCleanup(self.pubsub_client.delete_stream_definition, self.stream_def_id) # create the DataProduct input_dp_obj = IonObject(RT.DataProduct, name='input_data_product', description='input test stream') self.input_dp_id = self.dataproductclient.create_data_product( data_product=input_dp_obj, stream_definition_id=self.stream_def_id) # retrieve the Stream for this data product stream_ids, assoc_ids = self.rrclient.find_objects( self.input_dp_id, PRED.hasStream, RT.Stream, True) self.stream_id = stream_ids[0] # create the DPD and two DPs self.event_data_process_id = self.create_event_data_processes() # retrieve subscription from data process subscription_objs, _ = self.rrclient.find_objects( subject=self.event_data_process_id, predicate=PRED.hasSubscription, object_type=RT.Subscription, id_only=False) log.debug('test_event_transform_worker subscription_obj: %s', subscription_objs[0]) # create a queue to catch the published granules self.subscription_id = self.pubsub_client.create_subscription( name='parsed_subscription', stream_ids=[self.stream_id], exchange_name=subscription_objs[0].exchange_name) self.addCleanup(self.pubsub_client.delete_subscription, self.subscription_id) self.pubsub_client.activate_subscription(self.subscription_id) self.addCleanup(self.pubsub_client.deactivate_subscription, self.subscription_id) stream_route = self.pubsub_client.read_stream_route(self.stream_id) self.publisher = StandaloneStreamPublisher(stream_id=self.stream_id, stream_route=stream_route) self.start_event_transform_listener() self.data_modified = Event() rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id) rdt['time'] = [0] # time should always come first rdt['conductivity'] = [1] rdt['pressure'] = [2] rdt['salinity'] = [8] self.publisher.publish(rdt.to_granule()) self.assertTrue(self.event_verified.wait(self.wait_time)) @attr('LOCOINT') @unittest.skipIf(os.getenv('CEI_LAUNCH_TEST', False), 'Skip test while in CEI LAUNCH mode') def test_bad_argument_map(self): self._output_stream_ids = [] # test that a data process (type: data-product-in / data-product-out) parameter mapping it validated during # data process creation and that the correct exception is raised for both input and output. self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name( name='ctd_parsed_param_dict', id_only=True) # create the StreamDefinition self.stream_def_id = self.pubsub_client.create_stream_definition( name='stream_def', parameter_dictionary_id=self.parameter_dict_id) self.addCleanup(self.pubsub_client.delete_stream_definition, self.stream_def_id) # create the DataProduct that is the input to the data processes input_dp_obj = IonObject(RT.DataProduct, name='input_data_product', description='input test stream') self.input_dp_id = self.dataproductclient.create_data_product( data_product=input_dp_obj, stream_definition_id=self.stream_def_id) # two data processes using one transform and one DPD dp1_func_output_dp_id = self.create_output_data_product() # Set up DPD and DP #2 - array add function tf_obj = IonObject( RT.TransformFunction, name='add_array_func', description='adds values in an array', function='add_arrays', module="ion_example.add_arrays", arguments=['arr1', 'arr2'], function_type=TransformFunctionType.TRANSFORM, uri= 'http://sddevrepo.oceanobservatories.org/releases/ion_example-0.1-py2.7.egg' ) add_array_func_id, rev = self.rrclient.create(tf_obj) dpd_obj = IonObject( RT.DataProcessDefinition, name='add_arrays', description='adds the values of two arrays', data_process_type=DataProcessTypeEnum.TRANSFORM_PROCESS) add_array_dpd_id = self.dataprocessclient.create_data_process_definition( data_process_definition=dpd_obj, function_id=add_array_func_id) self.dataprocessclient.assign_stream_definition_to_data_process_definition( self.stream_def_id, add_array_dpd_id, binding='add_array_func') # create the data process with invalid argument map argument_map = {"arr1": "foo", "arr2": "bar"} output_param = "salinity" with self.assertRaises(BadRequest) as cm: dp1_data_process_id = self.dataprocessclient.create_data_process( data_process_definition_id=add_array_dpd_id, inputs=[self.input_dp_id], outputs=[dp1_func_output_dp_id], argument_map=argument_map, out_param_name=output_param) ex = cm.exception log.debug(' exception raised: %s', cm) self.assertEqual( ex.message, "Input data product does not contain the parameters defined in argument map" ) # create the data process with invalid output parameter name argument_map = {"arr1": "conductivity", "arr2": "pressure"} output_param = "foo" with self.assertRaises(BadRequest) as cm: dp1_data_process_id = self.dataprocessclient.create_data_process( data_process_definition_id=add_array_dpd_id, inputs=[self.input_dp_id], outputs=[dp1_func_output_dp_id], argument_map=argument_map, out_param_name=output_param) ex = cm.exception log.debug(' exception raised: %s', cm) self.assertEqual( ex.message, "Output data product does not contain the output parameter name provided" ) def create_event_data_processes(self): # two data processes using one transform and one DPD argument_map = {"a": "salinity"} # set up DPD and DP #2 - array add function tf_obj = IonObject( RT.TransformFunction, name='validate_salinity_array', description='validate_salinity_array', function='validate_salinity_array', module="ion.processes.data.transforms.test.test_transform_worker", arguments=['a'], function_type=TransformFunctionType.TRANSFORM) add_array_func_id, rev = self.rrclient.create(tf_obj) dpd_obj = IonObject( RT.DataProcessDefinition, name='validate_salinity_array', description='validate_salinity_array', data_process_type=DataProcessTypeEnum.TRANSFORM_PROCESS, ) add_array_dpd_id = self.dataprocessclient.create_data_process_definition( data_process_definition=dpd_obj, function_id=add_array_func_id) self.dataprocessclient.assign_stream_definition_to_data_process_definition( self.stream_def_id, add_array_dpd_id, binding='validate_salinity_array') # create the data process dp1_data_process_id = self.dataprocessclient.create_data_process( data_process_definition_id=add_array_dpd_id, inputs=[self.input_dp_id], outputs=None, argument_map=argument_map) self.damsclient.register_process(dp1_data_process_id) self.addCleanup(self.dataprocessclient.delete_data_process, dp1_data_process_id) return dp1_data_process_id def create_data_process(self): # two data processes using one transform and one DPD dp1_func_output_dp_id = self.create_output_data_product() argument_map = {"arr1": "conductivity", "arr2": "pressure"} output_param = "salinity" # set up DPD and DP #2 - array add function tf_obj = IonObject( RT.TransformFunction, name='add_array_func', description='adds values in an array', function='add_arrays', module="ion_example.add_arrays", arguments=['arr1', 'arr2'], function_type=TransformFunctionType.TRANSFORM, uri= 'http://sddevrepo.oceanobservatories.org/releases/ion_example-0.1-py2.7.egg' ) add_array_func_id, rev = self.rrclient.create(tf_obj) dpd_obj = IonObject( RT.DataProcessDefinition, name='add_arrays', description='adds the values of two arrays', data_process_type=DataProcessTypeEnum.TRANSFORM_PROCESS, version_label='1.0a') add_array_dpd_id = self.dataprocessclient.create_data_process_definition( data_process_definition=dpd_obj, function_id=add_array_func_id) self.dataprocessclient.assign_stream_definition_to_data_process_definition( self.stream_def_id, add_array_dpd_id, binding='add_array_func') # create the data process dp1_data_process_id = self.dataprocessclient.create_data_process( data_process_definition_id=add_array_dpd_id, inputs=[self.input_dp_id], outputs=[dp1_func_output_dp_id], argument_map=argument_map, out_param_name=output_param) self.damsclient.register_process(dp1_data_process_id) #self.addCleanup(self.dataprocessclient.delete_data_process, dp1_data_process_id) # add an attachment object to this DPD to test new SA-21 import msgpack attachment_content = 'foo bar' attachment_obj = IonObject(RT.Attachment, name='test_attachment', attachment_type=AttachmentType.ASCII, content_type='text/plain', content=msgpack.packb(attachment_content)) att_id = self.rrclient.create_attachment(add_array_dpd_id, attachment_obj) self.addCleanup(self.rrclient.delete_attachment, att_id) return add_array_dpd_id, dp1_data_process_id, dp1_func_output_dp_id def create_output_data_product(self): dp1_outgoing_stream_id = self.pubsub_client.create_stream_definition( name='dp1_stream', parameter_dictionary_id=self.parameter_dict_id) dp1_output_dp_obj = IonObject(RT.DataProduct, name='data_process1_data_product', description='output of add array func') dp1_func_output_dp_id = self.dataproductclient.create_data_product( dp1_output_dp_obj, dp1_outgoing_stream_id) self.addCleanup(self.dataproductclient.delete_data_product, dp1_func_output_dp_id) # retrieve the id of the OUTPUT stream from the out Data Product and add to granule logger stream_ids, _ = self.rrclient.find_objects(dp1_func_output_dp_id, PRED.hasStream, None, True) self._output_stream_ids.append(stream_ids[0]) subscription_id = self.pubsub_client.create_subscription( 'validator', data_product_ids=[dp1_func_output_dp_id]) self.addCleanup(self.pubsub_client.delete_subscription, subscription_id) def on_granule(msg, route, stream_id): log.debug('recv_packet stream_id: %s route: %s msg: %s', stream_id, route, msg) self.validate_output_granule(msg, route, stream_id) self.granule_verified.set() validator = StandaloneStreamSubscriber('validator', callback=on_granule) validator.start() self.addCleanup(validator.stop) self.pubsub_client.activate_subscription(subscription_id) self.addCleanup(self.pubsub_client.deactivate_subscription, subscription_id) return dp1_func_output_dp_id def validate_event(self, *args, **kwargs): """ This method is a callback function for receiving DataProcessStatusEvent. """ data_process_event = args[0] log.debug("DataProcessStatusEvent: %s", str(data_process_event.__dict__)) # if data process already created, check origin if self.dp_list: self.assertIn(data_process_event.origin, self.dp_list) # if this is a heartbeat event then 100 granules have been processed if 'data process status update.' in data_process_event.description: self.heartbeat_event_verified.set() else: # else check that this is the assign event if 'Data process assigned to transform worker' in data_process_event.description: self.worker_assigned_event_verified.set() elif 'Data process created for data product' in data_process_event.description: self.dp_created_event_verified.set() def validate_output_granule(self, msg, route, stream_id): self.assertIn(stream_id, self._output_stream_ids) rdt = RecordDictionaryTool.load_from_granule(msg) log.debug('validate_output_granule rdt: %s', rdt) sal_val = rdt['salinity'] np.testing.assert_array_equal(sal_val, np.array([3])) def start_event_listener(self): es = EventSubscriber(event_type=OT.DataProcessStatusEvent, callback=self.validate_event) es.start() self.addCleanup(es.stop) def validate_transform_event(self, *args, **kwargs): """ This method is a callback function for receiving DataProcessStatusEvent. """ status_alert_event = args[0] np.testing.assert_array_equal(status_alert_event.origin, self.stream_id) np.testing.assert_array_equal(status_alert_event.values, np.array([self.event_data_process_id])) log.debug("DeviceStatusAlertEvent: %s", str(status_alert_event.__dict__)) self.event_verified.set() def start_event_transform_listener(self): es = EventSubscriber(event_type=OT.DeviceStatusAlertEvent, callback=self.validate_transform_event) es.start() self.addCleanup(es.stop) def test_download(self): egg_url = 'http://sddevrepo.oceanobservatories.org/releases/ion_example-0.1-py2.7.egg' egg_path = TransformWorker.download_egg(egg_url) import pkg_resources pkg_resources.working_set.add_entry(egg_path) from ion_example.add_arrays import add_arrays a = add_arrays(1, 2) self.assertEquals(a, 3)
def test_ctd_L2_salinity(self): ''' Test that packets are processed by the ctd_L1_salinity transform ''' #--------------------------------------------------------------------------------------------- # Launch a ctd transform #--------------------------------------------------------------------------------------------- # Create the process definition process_definition = ProcessDefinition( name='SalinityTransform', description='For testing SalinityTransform') process_definition.executable['module']= 'ion.processes.data.transforms.ctd.ctd_L2_salinity' process_definition.executable['class'] = 'SalinityTransform' ctd_transform_proc_def_id = self.process_dispatcher.create_process_definition(process_definition=process_definition) # Build the config config = DotDict() config.process.queue_name = self.exchange_name config.process.exchange_point = self.exchange_point pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict', id_only=True) stream_def_id = self.pubsub.create_stream_definition('sal_stream_def', parameter_dictionary_id=pdict_id) sal_stream_id, _ = self.pubsub.create_stream('test_salinity', stream_definition_id=stream_def_id, exchange_point='science_data') config.process.publish_streams.salinity = sal_stream_id # Schedule the process self.process_dispatcher.schedule_process(process_definition_id=ctd_transform_proc_def_id, configuration=config) #--------------------------------------------------------------------------------------------- # Create a subscriber that will receive the salinity granule from the ctd transform #--------------------------------------------------------------------------------------------- ar_sal = gevent.event.AsyncResult() def subscriber3(m,r,s): ar_sal.set(m) sub_sal = StandaloneStreamSubscriber('sub_sal', subscriber3) self.addCleanup(sub_sal.stop) sub_sal_id = self.pubsub.create_subscription('subscription_sal', stream_ids=[sal_stream_id], exchange_name='sub_sal') self.pubsub.activate_subscription(sub_sal_id) self.queue_cleanup.append(sub_sal.xn.queue) sub_sal.start() #------------------------------------------------------------------------------------------------------ # Use a StandaloneStreamPublisher to publish a packet that can be then picked up by a ctd transform #------------------------------------------------------------------------------------------------------ # Do all the routing stuff for the publishing routing_key = 'stream_id.stream' stream_route = StreamRoute(self.exchange_point, routing_key) xn = self.container.ex_manager.create_xn_queue(self.exchange_name) xp = self.container.ex_manager.create_xp(self.exchange_point) xn.bind('stream_id.stream', xp) pub = StandaloneStreamPublisher('stream_id', stream_route) # Build a packet that can be published self.px_ctd = SimpleCtdPublisher() publish_granule = self._get_new_ctd_packet(stream_definition_id=stream_def_id, length = 5) # Publish the packet pub.publish(publish_granule) #------------------------------------------------------------------------------------------------------ # Make assertions about whether the ctd transform executed its algorithm and published the correct # granules #------------------------------------------------------------------------------------------------------ # Get the granule that is published by the ctd transform post processing result = ar_sal.get(timeout=10) self.assertTrue(isinstance(result, Granule)) rdt = RecordDictionaryTool.load_from_granule(result) self.assertTrue(rdt.__contains__('salinity')) self.check_salinity_algorithm_execution(publish_granule, result)
def test_sparse_values(self): ph = ParameterHelper(self.dataset_management, self.addCleanup) pdict_id = ph.create_sparse() stream_def_id = self.pubsub_management.create_stream_definition( 'sparse', parameter_dictionary_id=pdict_id) self.addCleanup(self.pubsub_management.delete_stream_definition, stream_def_id) stream_id, route = self.pubsub_management.create_stream( 'example', exchange_point=self.exchange_point_name, stream_definition_id=stream_def_id) self.addCleanup(self.pubsub_management.delete_stream, stream_id) dataset_id = self.create_dataset(pdict_id) self.start_ingestion(stream_id, dataset_id) self.addCleanup(self.stop_ingestion, stream_id) ntp_now = time.time() + 2208988800 rdt = ph.get_rdt(stream_def_id) rdt['time'] = [ntp_now] rdt['internal_timestamp'] = [ntp_now] rdt['temp'] = [300000] rdt['preferred_timestamp'] = ['driver_timestamp'] rdt['port_timestamp'] = [ntp_now] rdt['quality_flag'] = [None] rdt['lat'] = [45] rdt['conductivity'] = [4341400] rdt['driver_timestamp'] = [ntp_now] rdt['lon'] = [-71] rdt['pressure'] = [256.8] publisher = StandaloneStreamPublisher(stream_id, route) dataset_monitor = DatasetMonitor(dataset_id) self.addCleanup(dataset_monitor.stop) publisher.publish(rdt.to_granule()) self.assertTrue(dataset_monitor.event.wait(30)) dataset_monitor.event.clear() replay_granule = self.data_retriever.retrieve(dataset_id) rdt_out = RecordDictionaryTool.load_from_granule(replay_granule) np.testing.assert_array_almost_equal(rdt_out['time'], rdt['time']) np.testing.assert_array_almost_equal(rdt_out['temp'], rdt['temp']) np.testing.assert_array_almost_equal(rdt_out['lat'], np.array([45])) np.testing.assert_array_almost_equal(rdt_out['lon'], np.array([-71])) np.testing.assert_array_almost_equal(rdt_out['conductivity_L1'], np.array([42.914])) np.testing.assert_array_almost_equal(rdt_out['temp_L1'], np.array([20.])) np.testing.assert_array_almost_equal(rdt_out['pressure_L1'], np.array([3.068])) np.testing.assert_array_almost_equal( rdt_out['density'], np.array([1021.7144739593881], dtype='float32')) np.testing.assert_array_almost_equal( rdt_out['salinity'], np.array([30.935132729668283], dtype='float32')) rdt = ph.get_rdt(stream_def_id) rdt['lat'] = [46] rdt['lon'] = [-73] publisher.publish(rdt.to_granule()) self.assertTrue(dataset_monitor.event.wait(30)) dataset_monitor.event.clear() rdt = ph.get_rdt(stream_def_id) rdt['lat'] = [1000] rdt['lon'] = [3] publisher.publish(rdt.to_granule()) rdt = ph.get_rdt(stream_def_id) rdt['time'] = [ntp_now] rdt['internal_timestamp'] = [ntp_now] rdt['temp'] = [300000] rdt['preferred_timestamp'] = ['driver_timestamp'] rdt['port_timestamp'] = [ntp_now] rdt['quality_flag'] = [None] rdt['conductivity'] = [4341400] rdt['driver_timestamp'] = [ntp_now] rdt['pressure'] = [256.8] publisher.publish(rdt.to_granule()) self.assertTrue(dataset_monitor.event.wait(30)) dataset_monitor.event.clear() replay_granule = self.data_retriever.retrieve(dataset_id) rdt_out = RecordDictionaryTool.load_from_granule(replay_granule) np.testing.assert_array_almost_equal(rdt_out['lat'], np.array([45, 46])) np.testing.assert_array_almost_equal(rdt_out['lon'], np.array([-71, -73]))